Please, help us to better know about our user community by answering the following short survey: https://forms.gle/wpyrxWi18ox9Z5ae9
Eigen  3.4.0
 
Loading...
Searching...
No Matches
GenericPacketMath.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_GENERIC_PACKET_MATH_H
12#define EIGEN_GENERIC_PACKET_MATH_H
13
14namespace Eigen {
15
16namespace internal {
17
26#ifndef EIGEN_DEBUG_ALIGNED_LOAD
27#define EIGEN_DEBUG_ALIGNED_LOAD
28#endif
29
30#ifndef EIGEN_DEBUG_UNALIGNED_LOAD
31#define EIGEN_DEBUG_UNALIGNED_LOAD
32#endif
33
34#ifndef EIGEN_DEBUG_ALIGNED_STORE
35#define EIGEN_DEBUG_ALIGNED_STORE
36#endif
37
38#ifndef EIGEN_DEBUG_UNALIGNED_STORE
39#define EIGEN_DEBUG_UNALIGNED_STORE
40#endif
41
42struct default_packet_traits
43{
44 enum {
45 HasHalfPacket = 0,
46
47 HasAdd = 1,
48 HasSub = 1,
49 HasShift = 1,
50 HasMul = 1,
51 HasNegate = 1,
52 HasAbs = 1,
53 HasArg = 0,
54 HasAbs2 = 1,
55 HasAbsDiff = 0,
56 HasMin = 1,
57 HasMax = 1,
58 HasConj = 1,
59 HasSetLinear = 1,
60 HasBlend = 0,
61 // This flag is used to indicate whether packet comparison is supported.
62 // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.
63 HasCmp = 0,
64
65 HasDiv = 0,
66 HasSqrt = 0,
67 HasRsqrt = 0,
68 HasExp = 0,
69 HasExpm1 = 0,
70 HasLog = 0,
71 HasLog1p = 0,
72 HasLog10 = 0,
73 HasPow = 0,
74
75 HasSin = 0,
76 HasCos = 0,
77 HasTan = 0,
78 HasASin = 0,
79 HasACos = 0,
80 HasATan = 0,
81 HasSinh = 0,
82 HasCosh = 0,
83 HasTanh = 0,
84 HasLGamma = 0,
85 HasDiGamma = 0,
86 HasZeta = 0,
87 HasPolygamma = 0,
88 HasErf = 0,
89 HasErfc = 0,
90 HasNdtri = 0,
91 HasBessel = 0,
92 HasIGamma = 0,
93 HasIGammaDerA = 0,
94 HasGammaSampleDerAlpha = 0,
95 HasIGammac = 0,
96 HasBetaInc = 0,
97
98 HasRound = 0,
99 HasRint = 0,
100 HasFloor = 0,
101 HasCeil = 0,
102 HasSign = 0
103 };
104};
105
106template<typename T> struct packet_traits : default_packet_traits
107{
108 typedef T type;
109 typedef T half;
110 enum {
111 Vectorizable = 0,
112 size = 1,
113 AlignedOnScalar = 0,
114 HasHalfPacket = 0
115 };
116 enum {
117 HasAdd = 0,
118 HasSub = 0,
119 HasMul = 0,
120 HasNegate = 0,
121 HasAbs = 0,
122 HasAbs2 = 0,
123 HasMin = 0,
124 HasMax = 0,
125 HasConj = 0,
126 HasSetLinear = 0
127 };
128};
129
130template<typename T> struct packet_traits<const T> : packet_traits<T> { };
131
132template<typename T> struct unpacket_traits
133{
134 typedef T type;
135 typedef T half;
136 enum
137 {
138 size = 1,
139 alignment = 1,
140 vectorizable = false,
141 masked_load_available=false,
142 masked_store_available=false
143 };
144};
145
146template<typename T> struct unpacket_traits<const T> : unpacket_traits<T> { };
147
148template <typename Src, typename Tgt> struct type_casting_traits {
149 enum {
150 VectorizedCast = 0,
151 SrcCoeffRatio = 1,
152 TgtCoeffRatio = 1
153 };
154};
155
158template<typename T, int unique_id = 0>
159struct eigen_packet_wrapper
160{
161 EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
162 EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
163 EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
164 EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
165 EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
166 m_val = v;
167 return *this;
168 }
169
170 T m_val;
171};
172
173
177template<typename Packet>
178struct is_scalar {
179 typedef typename unpacket_traits<Packet>::type Scalar;
180 enum {
181 value = internal::is_same<Packet, Scalar>::value
182 };
183};
184
186template <typename SrcPacket, typename TgtPacket>
187EIGEN_DEVICE_FUNC inline TgtPacket
188pcast(const SrcPacket& a) {
189 return static_cast<TgtPacket>(a);
190}
191template <typename SrcPacket, typename TgtPacket>
192EIGEN_DEVICE_FUNC inline TgtPacket
193pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
194 return static_cast<TgtPacket>(a);
195}
196template <typename SrcPacket, typename TgtPacket>
197EIGEN_DEVICE_FUNC inline TgtPacket
198pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
199 return static_cast<TgtPacket>(a);
200}
201template <typename SrcPacket, typename TgtPacket>
202EIGEN_DEVICE_FUNC inline TgtPacket
203pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/,
204 const SrcPacket& /*e*/, const SrcPacket& /*f*/, const SrcPacket& /*g*/, const SrcPacket& /*h*/) {
205 return static_cast<TgtPacket>(a);
206}
207
209template <typename Target, typename Packet>
210EIGEN_DEVICE_FUNC inline Target
211preinterpret(const Packet& a); /* { return reinterpret_cast<const Target&>(a); } */
212
214template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
215padd(const Packet& a, const Packet& b) { return a+b; }
216// Avoid compiler warning for boolean algebra.
217template<> EIGEN_DEVICE_FUNC inline bool
218padd(const bool& a, const bool& b) { return a || b; }
219
221template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
222psub(const Packet& a, const Packet& b) { return a-b; }
223
225template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
226pnegate(const Packet& a) { return -a; }
227
228template<> EIGEN_DEVICE_FUNC inline bool
229pnegate(const bool& a) { return !a; }
230
232template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
233pconj(const Packet& a) { return numext::conj(a); }
234
236template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
237pmul(const Packet& a, const Packet& b) { return a*b; }
238// Avoid compiler warning for boolean algebra.
239template<> EIGEN_DEVICE_FUNC inline bool
240pmul(const bool& a, const bool& b) { return a && b; }
241
243template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
244pdiv(const Packet& a, const Packet& b) { return a/b; }
245
246// In the generic case, memset to all one bits.
247template<typename Packet, typename EnableIf = void>
248struct ptrue_impl {
249 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/){
250 Packet b;
251 memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
252 return b;
253 }
254};
255
256// For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).
257// Although this is technically not a valid bitmask, the scalar path for pselect
258// uses a comparison to zero, so this should still work in most cases. We don't
259// have another option, since the scalar type requires initialization.
260template<typename T>
261struct ptrue_impl<T,
262 typename internal::enable_if<is_scalar<T>::value && NumTraits<T>::RequireInitialization>::type > {
263 static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/){
264 return T(1);
265 }
266};
267
269template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
270ptrue(const Packet& a) {
271 return ptrue_impl<Packet>::run(a);
272}
273
274// In the general case, memset to zero.
275template<typename Packet, typename EnableIf = void>
276struct pzero_impl {
277 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
278 Packet b;
279 memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
280 return b;
281 }
282};
283
284// For scalars, explicitly set to Scalar(0), since the underlying representation
285// for zero may not consist of all-zero bits.
286template<typename T>
287struct pzero_impl<T,
288 typename internal::enable_if<is_scalar<T>::value>::type> {
289 static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) {
290 return T(0);
291 }
292};
293
295template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
296pzero(const Packet& a) {
297 return pzero_impl<Packet>::run(a);
298}
299
301template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
302pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); }
303
305template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
306pcmp_lt(const Packet& a, const Packet& b) { return a<b ? ptrue(a) : pzero(a); }
307
309template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
310pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); }
311
313template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
314pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); }
315
316template<typename T>
317struct bit_and {
318 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
319 return a & b;
320 }
321};
322
323template<typename T>
324struct bit_or {
325 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
326 return a | b;
327 }
328};
329
330template<typename T>
331struct bit_xor {
332 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
333 return a ^ b;
334 }
335};
336
337template<typename T>
338struct bit_not {
339 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const {
340 return ~a;
341 }
342};
343
344// Use operators &, |, ^, ~.
345template<typename T>
346struct operator_bitwise_helper {
347 EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
348 EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
349 EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
350 EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
351};
352
353// Apply binary operations byte-by-byte
354template<typename T>
355struct bytewise_bitwise_helper {
356 EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
357 return binary(a, b, bit_and<unsigned char>());
358 }
359 EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) {
360 return binary(a, b, bit_or<unsigned char>());
361 }
362 EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
363 return binary(a, b, bit_xor<unsigned char>());
364 }
365 EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) {
366 return unary(a,bit_not<unsigned char>());
367 }
368
369 private:
370 template<typename Op>
371 EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
372 const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
373 T c;
374 unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
375 for (size_t i = 0; i < sizeof(T); ++i) {
376 *c_ptr++ = op(*a_ptr++);
377 }
378 return c;
379 }
380
381 template<typename Op>
382 EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
383 const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
384 const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
385 T c;
386 unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
387 for (size_t i = 0; i < sizeof(T); ++i) {
388 *c_ptr++ = op(*a_ptr++, *b_ptr++);
389 }
390 return c;
391 }
392};
393
394// In the general case, use byte-by-byte manipulation.
395template<typename T, typename EnableIf = void>
396struct bitwise_helper : public bytewise_bitwise_helper<T> {};
397
398// For integers or non-trivial scalars, use binary operators.
399template<typename T>
400struct bitwise_helper<T,
401 typename internal::enable_if<
402 is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>::type
403 > : public operator_bitwise_helper<T> {};
404
406template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
407pand(const Packet& a, const Packet& b) {
408 return bitwise_helper<Packet>::bitwise_and(a, b);
409}
410
412template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
413por(const Packet& a, const Packet& b) {
414 return bitwise_helper<Packet>::bitwise_or(a, b);
415}
416
418template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
419pxor(const Packet& a, const Packet& b) {
420 return bitwise_helper<Packet>::bitwise_xor(a, b);
421}
422
424template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
425pnot(const Packet& a) {
426 return bitwise_helper<Packet>::bitwise_not(a);
427}
428
430template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
431pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); }
432
433// In the general case, use bitwise select.
434template<typename Packet, typename EnableIf = void>
435struct pselect_impl {
436 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
437 return por(pand(a,mask),pandnot(b,mask));
438 }
439};
440
441// For scalars, use ternary select.
442template<typename Packet>
443struct pselect_impl<Packet,
444 typename internal::enable_if<is_scalar<Packet>::value>::type > {
445 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
446 return numext::equal_strict(mask, Packet(0)) ? b : a;
447 }
448};
449
451template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
452pselect(const Packet& mask, const Packet& a, const Packet& b) {
453 return pselect_impl<Packet>::run(mask, a, b);
454}
455
456template<> EIGEN_DEVICE_FUNC inline bool pselect<bool>(
457 const bool& cond, const bool& a, const bool& b) {
458 return cond ? a : b;
459}
460
463template<int NaNPropagation>
464struct pminmax_impl {
465 template <typename Packet, typename Op>
466 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
467 return op(a,b);
468 }
469};
470
473template<>
474struct pminmax_impl<PropagateNaN> {
475 template <typename Packet, typename Op>
476 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
477 Packet not_nan_mask_a = pcmp_eq(a, a);
478 Packet not_nan_mask_b = pcmp_eq(b, b);
479 return pselect(not_nan_mask_a,
480 pselect(not_nan_mask_b, op(a, b), b),
481 a);
482 }
483};
484
488template<>
489struct pminmax_impl<PropagateNumbers> {
490 template <typename Packet, typename Op>
491 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
492 Packet not_nan_mask_a = pcmp_eq(a, a);
493 Packet not_nan_mask_b = pcmp_eq(b, b);
494 return pselect(not_nan_mask_a,
495 pselect(not_nan_mask_b, op(a, b), a),
496 b);
497 }
498};
499
500
501#ifndef SYCL_DEVICE_ONLY
502#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) Func
503#else
504#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) \
505[](const Type& a, const Type& b) { \
506 return Func(a, b);}
507#endif
508
511template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
512pmin(const Packet& a, const Packet& b) { return numext::mini(a,b); }
513
516template <int NaNPropagation, typename Packet>
517EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
518 return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
519}
520
523template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
524pmax(const Packet& a, const Packet& b) { return numext::maxi(a, b); }
525
528template <int NaNPropagation, typename Packet>
529EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
530 return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet,(pmax<Packet>)));
531}
532
534template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
535pabs(const Packet& a) { return numext::abs(a); }
536template<> EIGEN_DEVICE_FUNC inline unsigned int
537pabs(const unsigned int& a) { return a; }
538template<> EIGEN_DEVICE_FUNC inline unsigned long
539pabs(const unsigned long& a) { return a; }
540template<> EIGEN_DEVICE_FUNC inline unsigned long long
541pabs(const unsigned long long& a) { return a; }
542
544template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
545paddsub(const Packet& a, const Packet& b) {
546 return pselect(peven_mask(a), padd(a, b), psub(a, b));
547 }
548
550template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
551parg(const Packet& a) { using numext::arg; return arg(a); }
552
553
555template<int N> EIGEN_DEVICE_FUNC inline int
556parithmetic_shift_right(const int& a) { return a >> N; }
557template<int N> EIGEN_DEVICE_FUNC inline long int
558parithmetic_shift_right(const long int& a) { return a >> N; }
559
561template<int N> EIGEN_DEVICE_FUNC inline int
562plogical_shift_right(const int& a) { return static_cast<int>(static_cast<unsigned int>(a) >> N); }
563template<int N> EIGEN_DEVICE_FUNC inline long int
564plogical_shift_right(const long int& a) { return static_cast<long>(static_cast<unsigned long>(a) >> N); }
565
567template<int N> EIGEN_DEVICE_FUNC inline int
568plogical_shift_left(const int& a) { return a << N; }
569template<int N> EIGEN_DEVICE_FUNC inline long int
570plogical_shift_left(const long int& a) { return a << N; }
571
575template <typename Packet>
576EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
577 int exp;
578 EIGEN_USING_STD(frexp);
579 Packet result = static_cast<Packet>(frexp(a, &exp));
580 exponent = static_cast<Packet>(exp);
581 return result;
582}
583
587template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
588pldexp(const Packet &a, const Packet &exponent) {
589 EIGEN_USING_STD(ldexp)
590 return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
591}
592
594template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
595pabsdiff(const Packet& a, const Packet& b) { return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b)); }
596
598template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
599pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
600
602template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
603ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
604
609template<typename Packet> EIGEN_DEVICE_FUNC inline
610typename enable_if<unpacket_traits<Packet>::masked_load_available, Packet>::type
611ploadu(const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
612
614template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
615pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
616
618template<typename Packet,typename BitsType> EIGEN_DEVICE_FUNC inline Packet
619pset1frombits(BitsType a);
620
622template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
623pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
624
630template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
631ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
632
639template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
640ploadquad(const typename unpacket_traits<Packet>::type* from)
641{ return pload1<Packet>(from); }
642
652template<typename Packet> EIGEN_DEVICE_FUNC
653inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
654 Packet& a0, Packet& a1, Packet& a2, Packet& a3)
655{
656 a0 = pload1<Packet>(a+0);
657 a1 = pload1<Packet>(a+1);
658 a2 = pload1<Packet>(a+2);
659 a3 = pload1<Packet>(a+3);
660}
661
669template<typename Packet> EIGEN_DEVICE_FUNC
670inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
671 Packet& a0, Packet& a1)
672{
673 a0 = pload1<Packet>(a+0);
674 a1 = pload1<Packet>(a+1);
675}
676
678template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
679plset(const typename unpacket_traits<Packet>::type& a) { return a; }
680
683template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
684peven_mask(const Packet& /*a*/) {
685 typedef typename unpacket_traits<Packet>::type Scalar;
686 const size_t n = unpacket_traits<Packet>::size;
687 EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
688 for(size_t i = 0; i < n; ++i) {
689 memset(elements+i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
690 }
691 return ploadu<Packet>(elements);
692}
693
694
696template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
697{ (*to) = from; }
698
700template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
701{ (*to) = from; }
702
707template<typename Scalar, typename Packet>
708EIGEN_DEVICE_FUNC inline
709typename enable_if<unpacket_traits<Packet>::masked_store_available, void>::type
710pstoreu(Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
711
712 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
713 { return ploadu<Packet>(from); }
714
715 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
716 { pstore(to, from); }
717
719template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
720{
721#if defined(EIGEN_HIP_DEVICE_COMPILE)
722 // do nothing
723#elif defined(EIGEN_CUDA_ARCH)
724#if defined(__LP64__) || EIGEN_OS_WIN64
725 // 64-bit pointer operand constraint for inlined asm
726 asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
727#else
728 // 32-bit pointer operand constraint for inlined asm
729 asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
730#endif
731#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
732 __builtin_prefetch(addr);
733#endif
734}
735
737template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
738{ return a; }
739
741template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
742{
743 return Packet(numext::imag(a),numext::real(a));
744}
745
746/**************************
747* Special math functions
748***************************/
749
751template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
752Packet psin(const Packet& a) { EIGEN_USING_STD(sin); return sin(a); }
753
755template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
756Packet pcos(const Packet& a) { EIGEN_USING_STD(cos); return cos(a); }
757
759template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
760Packet ptan(const Packet& a) { EIGEN_USING_STD(tan); return tan(a); }
761
763template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
764Packet pasin(const Packet& a) { EIGEN_USING_STD(asin); return asin(a); }
765
767template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
768Packet pacos(const Packet& a) { EIGEN_USING_STD(acos); return acos(a); }
769
771template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
772Packet patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }
773
775template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
776Packet psinh(const Packet& a) { EIGEN_USING_STD(sinh); return sinh(a); }
777
779template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
780Packet pcosh(const Packet& a) { EIGEN_USING_STD(cosh); return cosh(a); }
781
783template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
784Packet ptanh(const Packet& a) { EIGEN_USING_STD(tanh); return tanh(a); }
785
787template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
788Packet pexp(const Packet& a) { EIGEN_USING_STD(exp); return exp(a); }
789
791template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
792Packet pexpm1(const Packet& a) { return numext::expm1(a); }
793
795template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
796Packet plog(const Packet& a) { EIGEN_USING_STD(log); return log(a); }
797
799template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
800Packet plog1p(const Packet& a) { return numext::log1p(a); }
801
803template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
804Packet plog10(const Packet& a) { EIGEN_USING_STD(log10); return log10(a); }
805
807template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
808Packet plog2(const Packet& a) {
809 typedef typename internal::unpacket_traits<Packet>::type Scalar;
810 return pmul(pset1<Packet>(Scalar(EIGEN_LOG2E)), plog(a));
811}
812
814template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
815Packet psqrt(const Packet& a) { return numext::sqrt(a); }
816
818template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
819Packet prsqrt(const Packet& a) {
820 typedef typename internal::unpacket_traits<Packet>::type Scalar;
821 return pdiv(pset1<Packet>(Scalar(1)), psqrt(a));
822}
823
825template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
826Packet pround(const Packet& a) { using numext::round; return round(a); }
827
829template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
830Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
831
834template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
835Packet print(const Packet& a) { using numext::rint; return rint(a); }
836
838template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
839Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
840
842template<typename Packet>
843EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
844pfirst(const Packet& a)
845{ return a; }
846
851template<typename Packet>
852EIGEN_DEVICE_FUNC inline typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
853predux_half_dowto4(const Packet& a)
854{ return a; }
855
856// Slow generic implementation of Packet reduction.
857template <typename Packet, typename Op>
858EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
859predux_helper(const Packet& a, Op op) {
860 typedef typename unpacket_traits<Packet>::type Scalar;
861 const size_t n = unpacket_traits<Packet>::size;
862 EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
863 pstoreu<Scalar>(elements, a);
864 for(size_t k = n / 2; k > 0; k /= 2) {
865 for(size_t i = 0; i < k; ++i) {
866 elements[i] = op(elements[i], elements[i + k]);
867 }
868 }
869 return elements[0];
870}
871
873template<typename Packet>
874EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
875predux(const Packet& a)
876{
877 return a;
878}
879
881template <typename Packet>
882EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(
883 const Packet& a) {
884 typedef typename unpacket_traits<Packet>::type Scalar;
885 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
886}
887
889template <typename Packet>
890EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
891 const Packet &a) {
892 typedef typename unpacket_traits<Packet>::type Scalar;
893 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<PropagateFast, Scalar>)));
894}
895
896template <int NaNPropagation, typename Packet>
897EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
898 const Packet& a) {
899 typedef typename unpacket_traits<Packet>::type Scalar;
900 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
901}
902
904template <typename Packet>
905EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
906 const Packet &a) {
907 typedef typename unpacket_traits<Packet>::type Scalar;
908 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<PropagateFast, Scalar>)));
909}
910
911template <int NaNPropagation, typename Packet>
912EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
913 const Packet& a) {
914 typedef typename unpacket_traits<Packet>::type Scalar;
915 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
916}
917
918#undef EIGEN_BINARY_OP_NAN_PROPAGATION
919
923// not needed yet
924// template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
925// { return bool(a); }
926
930template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a)
931{
932 // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
933 // It is expected that "true" is either:
934 // - Scalar(1)
935 // - bits full of ones (NaN for floats),
936 // - or first bit equals to 1 (1 for ints, smallest denormal for floats).
937 // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars.
938 typedef typename unpacket_traits<Packet>::type Scalar;
939 return numext::not_equal_strict(predux(a), Scalar(0));
940}
941
942/***************************************************************************
943* The following functions might not have to be overwritten for vectorized types
944***************************************************************************/
945
947// NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
948template<typename Packet>
949inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
950{
951 pstore(to, pset1<Packet>(a));
952}
953
955template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
956pmadd(const Packet& a,
957 const Packet& b,
958 const Packet& c)
959{ return padd(pmul(a, b),c); }
960
963template<typename Packet, int Alignment>
964EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
965{
966 if(Alignment >= unpacket_traits<Packet>::alignment)
967 return pload<Packet>(from);
968 else
969 return ploadu<Packet>(from);
970}
971
974template<typename Scalar, typename Packet, int Alignment>
975EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
976{
977 if(Alignment >= unpacket_traits<Packet>::alignment)
978 pstore(to, from);
979 else
980 pstoreu(to, from);
981}
982
988template<typename Packet, int LoadMode>
989EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
990{
991 return ploadt<Packet, LoadMode>(from);
992}
993
994/***************************************************************************
995* Fast complex products (GCC generates a function call which is very slow)
996***************************************************************************/
997
998// Eigen+CUDA does not support complexes.
999#if !defined(EIGEN_GPUCC)
1000
1001template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
1002{ return std::complex<float>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
1003
1004template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
1005{ return std::complex<double>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
1006
1007#endif
1008
1009
1010/***************************************************************************
1011 * PacketBlock, that is a collection of N packets where the number of words
1012 * in the packet is a multiple of N.
1013***************************************************************************/
1014template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
1015 Packet packet[N];
1016};
1017
1018template<typename Packet> EIGEN_DEVICE_FUNC inline void
1019ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
1020 // Nothing to do in the scalar case, i.e. a 1x1 matrix.
1021}
1022
1023/***************************************************************************
1024 * Selector, i.e. vector of N boolean values used to select (i.e. blend)
1025 * words from 2 packets.
1026***************************************************************************/
1027template <size_t N> struct Selector {
1028 bool select[N];
1029};
1030
1031template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
1032pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
1033 return ifPacket.select[0] ? thenPacket : elsePacket;
1034}
1035
1036} // end namespace internal
1037
1038} // end namespace Eigen
1039
1040#endif // EIGEN_GENERIC_PACKET_MATH_H
@ PropagateNaN
Definition: Constants.h:343
@ PropagateNumbers
Definition: Constants.h:345
Namespace containing all symbols from the Eigen library.
Definition: Core:141
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_arg_op< typename Derived::Scalar >, const Derived > arg(const Eigen::ArrayBase< Derived > &x)