16#ifndef EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H
17#define EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H
23template<
typename T>
struct make_integer;
24template<>
struct make_integer<float> {
typedef numext::int32_t type; };
25template<>
struct make_integer<double> {
typedef numext::int64_t type; };
26template<>
struct make_integer<half> {
typedef numext::int16_t type; };
27template<>
struct make_integer<bfloat16> {
typedef numext::int16_t type; };
29template<
typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
30Packet pfrexp_generic_get_biased_exponent(
const Packet& a) {
31 typedef typename unpacket_traits<Packet>::type Scalar;
32 typedef typename unpacket_traits<Packet>::integer_packet PacketI;
33 enum { mantissa_bits = numext::numeric_limits<Scalar>::digits - 1};
34 return pcast<PacketI, Packet>(plogical_shift_right<mantissa_bits>(preinterpret<PacketI>(pabs(a))));
39template<
typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
40Packet pfrexp_generic(
const Packet& a, Packet& exponent) {
41 typedef typename unpacket_traits<Packet>::type Scalar;
42 typedef typename make_unsigned<typename make_integer<Scalar>::type>::type ScalarUI;
44 TotalBits =
sizeof(Scalar) * CHAR_BIT,
45 MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
46 ExponentBits =
int(TotalBits) - int(MantissaBits) - 1
49 EIGEN_CONSTEXPR ScalarUI scalar_sign_mantissa_mask =
50 ~(((ScalarUI(1) << int(ExponentBits)) - ScalarUI(1)) <<
int(MantissaBits));
51 const Packet sign_mantissa_mask = pset1frombits<Packet>(
static_cast<ScalarUI
>(scalar_sign_mantissa_mask));
52 const Packet half = pset1<Packet>(Scalar(0.5));
53 const Packet zero = pzero(a);
54 const Packet normal_min = pset1<Packet>((numext::numeric_limits<Scalar>::min)());
57 const Packet is_denormal = pcmp_lt(pabs(a), normal_min);
58 EIGEN_CONSTEXPR ScalarUI scalar_normalization_offset = ScalarUI(
int(MantissaBits) + 1);
60 const Scalar scalar_normalization_factor = Scalar(ScalarUI(1) <<
int(scalar_normalization_offset));
61 const Packet normalization_factor = pset1<Packet>(scalar_normalization_factor);
62 const Packet normalized_a = pselect(is_denormal, pmul(a, normalization_factor), a);
65 const Scalar scalar_exponent_offset = -Scalar((ScalarUI(1)<<(
int(ExponentBits)-1)) - ScalarUI(2));
66 Packet exponent_offset = pset1<Packet>(scalar_exponent_offset);
67 const Packet normalization_offset = pset1<Packet>(-Scalar(scalar_normalization_offset));
68 exponent_offset = pselect(is_denormal, padd(exponent_offset, normalization_offset), exponent_offset);
71 exponent = pfrexp_generic_get_biased_exponent(normalized_a);
74 const Scalar scalar_non_finite_exponent = Scalar((ScalarUI(1) <<
int(ExponentBits)) - ScalarUI(1));
75 const Packet non_finite_exponent = pset1<Packet>(scalar_non_finite_exponent);
76 const Packet is_zero_or_not_finite = por(pcmp_eq(a, zero), pcmp_eq(exponent, non_finite_exponent));
77 const Packet m = pselect(is_zero_or_not_finite, a, por(pand(normalized_a, sign_mantissa_mask), half));
78 exponent = pselect(is_zero_or_not_finite, zero, padd(exponent, exponent_offset));
84template<
typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
85Packet pldexp_generic(
const Packet& a,
const Packet& exponent) {
108 typedef typename unpacket_traits<Packet>::integer_packet PacketI;
109 typedef typename unpacket_traits<Packet>::type Scalar;
110 typedef typename unpacket_traits<PacketI>::type ScalarI;
112 TotalBits =
sizeof(Scalar) * CHAR_BIT,
113 MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
114 ExponentBits =
int(TotalBits) - int(MantissaBits) - 1
117 const Packet max_exponent = pset1<Packet>(Scalar((ScalarI(1)<<
int(ExponentBits)) + ScalarI(
int(MantissaBits) - 1)));
118 const PacketI bias = pset1<PacketI>((ScalarI(1)<<(
int(ExponentBits)-1)) - ScalarI(1));
119 const PacketI e = pcast<Packet, PacketI>(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
120 PacketI b = parithmetic_shift_right<2>(e);
121 Packet c = preinterpret<Packet>(plogical_shift_left<
int(MantissaBits)>(padd(b, bias)));
122 Packet out = pmul(pmul(pmul(a, c), c), c);
123 b = psub(psub(psub(e, b), b), b);
124 c = preinterpret<Packet>(plogical_shift_left<
int(MantissaBits)>(padd(b, bias)));
138template<
typename Packet>
139struct pldexp_fast_impl {
140 typedef typename unpacket_traits<Packet>::integer_packet PacketI;
141 typedef typename unpacket_traits<Packet>::type Scalar;
142 typedef typename unpacket_traits<PacketI>::type ScalarI;
144 TotalBits =
sizeof(Scalar) * CHAR_BIT,
145 MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
146 ExponentBits =
int(TotalBits) - int(MantissaBits) - 1
149 static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
150 Packet run(
const Packet& a,
const Packet& exponent) {
151 const Packet bias = pset1<Packet>(Scalar((ScalarI(1)<<(
int(ExponentBits)-1)) - ScalarI(1)));
152 const Packet limit = pset1<Packet>(Scalar((ScalarI(1)<<
int(ExponentBits)) - ScalarI(1)));
154 const PacketI e = pcast<Packet, PacketI>(pmin(pmax(padd(exponent, bias), pzero(limit)), limit));
156 return pmul(a, preinterpret<Packet>(plogical_shift_left<
int(MantissaBits)>(e)));
166template <
typename Packet,
bool base2>
167EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
169Packet plog_impl_float(
const Packet _x)
173 const Packet cst_1 = pset1<Packet>(1.0f);
174 const Packet cst_neg_half = pset1<Packet>(-0.5f);
176 const Packet cst_min_norm_pos = pset1frombits<Packet>( 0x00800000u);
177 const Packet cst_minus_inf = pset1frombits<Packet>( 0xff800000u);
178 const Packet cst_pos_inf = pset1frombits<Packet>( 0x7f800000u);
181 const Packet cst_cephes_SQRTHF = pset1<Packet>(0.707106781186547524f);
182 const Packet cst_cephes_log_p0 = pset1<Packet>(7.0376836292E-2f);
183 const Packet cst_cephes_log_p1 = pset1<Packet>(-1.1514610310E-1f);
184 const Packet cst_cephes_log_p2 = pset1<Packet>(1.1676998740E-1f);
185 const Packet cst_cephes_log_p3 = pset1<Packet>(-1.2420140846E-1f);
186 const Packet cst_cephes_log_p4 = pset1<Packet>(+1.4249322787E-1f);
187 const Packet cst_cephes_log_p5 = pset1<Packet>(-1.6668057665E-1f);
188 const Packet cst_cephes_log_p6 = pset1<Packet>(+2.0000714765E-1f);
189 const Packet cst_cephes_log_p7 = pset1<Packet>(-2.4999993993E-1f);
190 const Packet cst_cephes_log_p8 = pset1<Packet>(+3.3333331174E-1f);
193 x = pmax(x, cst_min_norm_pos);
206 Packet mask = pcmp_lt(x, cst_cephes_SQRTHF);
207 Packet tmp = pand(x, mask);
209 e = psub(e, pand(cst_1, mask));
212 Packet x2 = pmul(x, x);
213 Packet x3 = pmul(x2, x);
218 y = pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);
219 y1 = pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4);
220 y2 = pmadd(cst_cephes_log_p6, x, cst_cephes_log_p7);
221 y = pmadd(y, x, cst_cephes_log_p2);
222 y1 = pmadd(y1, x, cst_cephes_log_p5);
223 y2 = pmadd(y2, x, cst_cephes_log_p8);
224 y = pmadd(y, x3, y1);
225 y = pmadd(y, x3, y2);
228 y = pmadd(cst_neg_half, x2, y);
233 const Packet cst_log2e = pset1<Packet>(
static_cast<float>(EIGEN_LOG2E));
234 x = pmadd(x, cst_log2e, e);
236 const Packet cst_ln2 = pset1<Packet>(
static_cast<float>(EIGEN_LN2));
237 x = pmadd(e, cst_ln2, x);
240 Packet invalid_mask = pcmp_lt_or_nan(_x, pzero(_x));
241 Packet iszero_mask = pcmp_eq(_x,pzero(_x));
242 Packet pos_inf_mask = pcmp_eq(_x,cst_pos_inf);
247 return pselect(iszero_mask, cst_minus_inf,
248 por(pselect(pos_inf_mask,cst_pos_inf,x), invalid_mask));
251template <
typename Packet>
252EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
254Packet plog_float(
const Packet _x)
256 return plog_impl_float<Packet,
false>(_x);
259template <
typename Packet>
260EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
262Packet plog2_float(
const Packet _x)
264 return plog_impl_float<Packet,
true>(_x);
276template <
typename Packet,
bool base2>
277EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
279Packet plog_impl_double(
const Packet _x)
283 const Packet cst_1 = pset1<Packet>(1.0);
284 const Packet cst_neg_half = pset1<Packet>(-0.5);
286 const Packet cst_min_norm_pos = pset1frombits<Packet>(
static_cast<uint64_t
>(0x0010000000000000ull));
287 const Packet cst_minus_inf = pset1frombits<Packet>(
static_cast<uint64_t
>(0xfff0000000000000ull));
288 const Packet cst_pos_inf = pset1frombits<Packet>(
static_cast<uint64_t
>(0x7ff0000000000000ull));
293 const Packet cst_cephes_SQRTHF = pset1<Packet>(0.70710678118654752440E0);
294 const Packet cst_cephes_log_p0 = pset1<Packet>(1.01875663804580931796E-4);
295 const Packet cst_cephes_log_p1 = pset1<Packet>(4.97494994976747001425E-1);
296 const Packet cst_cephes_log_p2 = pset1<Packet>(4.70579119878881725854E0);
297 const Packet cst_cephes_log_p3 = pset1<Packet>(1.44989225341610930846E1);
298 const Packet cst_cephes_log_p4 = pset1<Packet>(1.79368678507819816313E1);
299 const Packet cst_cephes_log_p5 = pset1<Packet>(7.70838733755885391666E0);
301 const Packet cst_cephes_log_q0 = pset1<Packet>(1.0);
302 const Packet cst_cephes_log_q1 = pset1<Packet>(1.12873587189167450590E1);
303 const Packet cst_cephes_log_q2 = pset1<Packet>(4.52279145837532221105E1);
304 const Packet cst_cephes_log_q3 = pset1<Packet>(8.29875266912776603211E1);
305 const Packet cst_cephes_log_q4 = pset1<Packet>(7.11544750618563894466E1);
306 const Packet cst_cephes_log_q5 = pset1<Packet>(2.31251620126765340583E1);
309 x = pmax(x, cst_min_norm_pos);
322 Packet mask = pcmp_lt(x, cst_cephes_SQRTHF);
323 Packet tmp = pand(x, mask);
325 e = psub(e, pand(cst_1, mask));
328 Packet x2 = pmul(x, x);
329 Packet x3 = pmul(x2, x);
334 y = pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);
335 y1 = pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4);
336 y = pmadd(y, x, cst_cephes_log_p2);
337 y1 = pmadd(y1, x, cst_cephes_log_p5);
338 y_ = pmadd(y, x3, y1);
340 y = pmadd(cst_cephes_log_q0, x, cst_cephes_log_q1);
341 y1 = pmadd(cst_cephes_log_q3, x, cst_cephes_log_q4);
342 y = pmadd(y, x, cst_cephes_log_q2);
343 y1 = pmadd(y1, x, cst_cephes_log_q5);
344 y = pmadd(y, x3, y1);
349 y = pmadd(cst_neg_half, x2, y);
354 const Packet cst_log2e = pset1<Packet>(
static_cast<double>(EIGEN_LOG2E));
355 x = pmadd(x, cst_log2e, e);
357 const Packet cst_ln2 = pset1<Packet>(
static_cast<double>(EIGEN_LN2));
358 x = pmadd(e, cst_ln2, x);
361 Packet invalid_mask = pcmp_lt_or_nan(_x, pzero(_x));
362 Packet iszero_mask = pcmp_eq(_x,pzero(_x));
363 Packet pos_inf_mask = pcmp_eq(_x,cst_pos_inf);
368 return pselect(iszero_mask, cst_minus_inf,
369 por(pselect(pos_inf_mask,cst_pos_inf,x), invalid_mask));
372template <
typename Packet>
373EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
375Packet plog_double(
const Packet _x)
377 return plog_impl_double<Packet,
false>(_x);
380template <
typename Packet>
381EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
383Packet plog2_double(
const Packet _x)
385 return plog_impl_double<Packet,
true>(_x);
391template<
typename Packet>
392Packet generic_plog1p(
const Packet& x)
394 typedef typename unpacket_traits<Packet>::type ScalarType;
395 const Packet one = pset1<Packet>(ScalarType(1));
396 Packet xp1 = padd(x, one);
397 Packet small_mask = pcmp_eq(xp1, one);
398 Packet log1 = plog(xp1);
399 Packet inf_mask = pcmp_eq(xp1, log1);
400 Packet log_large = pmul(x, pdiv(log1, psub(xp1, one)));
401 return pselect(por(small_mask, inf_mask), x, log_large);
407template<
typename Packet>
408Packet generic_expm1(
const Packet& x)
410 typedef typename unpacket_traits<Packet>::type ScalarType;
411 const Packet one = pset1<Packet>(ScalarType(1));
412 const Packet neg_one = pset1<Packet>(ScalarType(-1));
414 Packet one_mask = pcmp_eq(u, one);
415 Packet u_minus_one = psub(u, one);
416 Packet neg_one_mask = pcmp_eq(u_minus_one, neg_one);
417 Packet logu = plog(u);
422 Packet pos_inf_mask = pcmp_eq(logu, u);
423 Packet expm1 = pmul(u_minus_one, pdiv(x, logu));
424 expm1 = pselect(pos_inf_mask, u, expm1);
425 return pselect(one_mask,
427 pselect(neg_one_mask,
436template <
typename Packet>
437EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
439Packet pexp_float(
const Packet _x)
441 const Packet cst_1 = pset1<Packet>(1.0f);
442 const Packet cst_half = pset1<Packet>(0.5f);
443 const Packet cst_exp_hi = pset1<Packet>( 88.723f);
444 const Packet cst_exp_lo = pset1<Packet>(-88.723f);
446 const Packet cst_cephes_LOG2EF = pset1<Packet>(1.44269504088896341f);
447 const Packet cst_cephes_exp_p0 = pset1<Packet>(1.9875691500E-4f);
448 const Packet cst_cephes_exp_p1 = pset1<Packet>(1.3981999507E-3f);
449 const Packet cst_cephes_exp_p2 = pset1<Packet>(8.3334519073E-3f);
450 const Packet cst_cephes_exp_p3 = pset1<Packet>(4.1665795894E-2f);
451 const Packet cst_cephes_exp_p4 = pset1<Packet>(1.6666665459E-1f);
452 const Packet cst_cephes_exp_p5 = pset1<Packet>(5.0000001201E-1f);
455 Packet x = pmax(pmin(_x, cst_exp_hi), cst_exp_lo);
459 Packet m = pfloor(pmadd(x, cst_cephes_LOG2EF, cst_half));
464 const Packet cst_cephes_exp_C1 = pset1<Packet>(-0.693359375f);
465 const Packet cst_cephes_exp_C2 = pset1<Packet>(2.12194440e-4f);
466 Packet r = pmadd(m, cst_cephes_exp_C1, x);
467 r = pmadd(m, cst_cephes_exp_C2, r);
469 Packet r2 = pmul(r, r);
470 Packet r3 = pmul(r2, r);
474 y = pmadd(cst_cephes_exp_p0, r, cst_cephes_exp_p1);
475 y1 = pmadd(cst_cephes_exp_p3, r, cst_cephes_exp_p4);
477 y = pmadd(y, r, cst_cephes_exp_p2);
478 y1 = pmadd(y1, r, cst_cephes_exp_p5);
479 y = pmadd(y, r3, y1);
480 y = pmadd(y, r2, y2);
484 return pmax(pldexp(y,m), _x);
487template <
typename Packet>
488EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
490Packet pexp_double(
const Packet _x)
494 const Packet cst_1 = pset1<Packet>(1.0);
495 const Packet cst_2 = pset1<Packet>(2.0);
496 const Packet cst_half = pset1<Packet>(0.5);
498 const Packet cst_exp_hi = pset1<Packet>(709.784);
499 const Packet cst_exp_lo = pset1<Packet>(-709.784);
501 const Packet cst_cephes_LOG2EF = pset1<Packet>(1.4426950408889634073599);
502 const Packet cst_cephes_exp_p0 = pset1<Packet>(1.26177193074810590878e-4);
503 const Packet cst_cephes_exp_p1 = pset1<Packet>(3.02994407707441961300e-2);
504 const Packet cst_cephes_exp_p2 = pset1<Packet>(9.99999999999999999910e-1);
505 const Packet cst_cephes_exp_q0 = pset1<Packet>(3.00198505138664455042e-6);
506 const Packet cst_cephes_exp_q1 = pset1<Packet>(2.52448340349684104192e-3);
507 const Packet cst_cephes_exp_q2 = pset1<Packet>(2.27265548208155028766e-1);
508 const Packet cst_cephes_exp_q3 = pset1<Packet>(2.00000000000000000009e0);
509 const Packet cst_cephes_exp_C1 = pset1<Packet>(0.693145751953125);
510 const Packet cst_cephes_exp_C2 = pset1<Packet>(1.42860682030941723212e-6);
515 x = pmax(pmin(x, cst_exp_hi), cst_exp_lo);
517 fx = pmadd(cst_cephes_LOG2EF, x, cst_half);
525 tmp = pmul(fx, cst_cephes_exp_C1);
526 Packet z = pmul(fx, cst_cephes_exp_C2);
530 Packet x2 = pmul(x, x);
533 Packet px = cst_cephes_exp_p0;
534 px = pmadd(px, x2, cst_cephes_exp_p1);
535 px = pmadd(px, x2, cst_cephes_exp_p2);
539 Packet qx = cst_cephes_exp_q0;
540 qx = pmadd(qx, x2, cst_cephes_exp_q1);
541 qx = pmadd(qx, x2, cst_cephes_exp_q2);
542 qx = pmadd(qx, x2, cst_cephes_exp_q3);
547 x = pdiv(px, psub(qx, px));
548 x = pmadd(cst_2, x, cst_1);
553 return pmax(pldexp(x,fx), _x);
565inline float trig_reduce_huge (
float xf,
int *quadrant)
567 using Eigen::numext::int32_t;
568 using Eigen::numext::uint32_t;
569 using Eigen::numext::int64_t;
570 using Eigen::numext::uint64_t;
572 const double pio2_62 = 3.4061215800865545e-19;
573 const uint64_t zero_dot_five = uint64_t(1) << 61;
577 static const uint32_t two_over_pi [] =
579 0x00000028, 0x000028be, 0x0028be60, 0x28be60db,
580 0xbe60db93, 0x60db9391, 0xdb939105, 0x9391054a,
581 0x91054a7f, 0x054a7f09, 0x4a7f09d5, 0x7f09d5f4,
582 0x09d5f47d, 0xd5f47d4d, 0xf47d4d37, 0x7d4d3770,
583 0x4d377036, 0x377036d8, 0x7036d8a5, 0x36d8a566,
584 0xd8a5664f, 0xa5664f10, 0x664f10e4, 0x4f10e410,
585 0x10e41000, 0xe4100000
588 uint32_t xi = numext::bit_cast<uint32_t>(xf);
593 uint32_t e = (xi >> 23) - 118;
595 xi = ((xi & 0x007fffffu)| 0x00800000u) << (e & 0x7);
598 uint32_t twoopi_1 = two_over_pi[i-1];
599 uint32_t twoopi_2 = two_over_pi[i+3];
600 uint32_t twoopi_3 = two_over_pi[i+7];
604 p = uint64_t(xi) * twoopi_3;
605 p = uint64_t(xi) * twoopi_2 + (p >> 32);
606 p = (uint64_t(xi * twoopi_1) << 32) + p;
609 uint64_t q = (p + zero_dot_five) >> 62;
616 return float(
double(int64_t(p)) * pio2_62);
619template<
bool ComputeSine,
typename Packet>
620EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
622#if EIGEN_GNUC_AT_LEAST(4,4) && EIGEN_COMP_GNUC_STRICT
623__attribute__((optimize(
"-fno-unsafe-math-optimizations")))
625Packet psincos_float(
const Packet& _x)
627 typedef typename unpacket_traits<Packet>::integer_packet PacketI;
629 const Packet cst_2oPI = pset1<Packet>(0.636619746685028076171875f);
630 const Packet cst_rounding_magic = pset1<Packet>(12582912);
631 const PacketI csti_1 = pset1<PacketI>(1);
632 const Packet cst_sign_mask = pset1frombits<Packet>(0x80000000u);
637 Packet y = pmul(x, cst_2oPI);
640 Packet y_round = padd(y, cst_rounding_magic);
641 EIGEN_OPTIMIZATION_BARRIER(y_round)
642 PacketI y_int = preinterpret<PacketI>(y_round);
643 y = psub(y_round, cst_rounding_magic);
647 #if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD)
650 const float huge_th = ComputeSine ? 117435.992f : 71476.0625f;
651 x = pmadd(y, pset1<Packet>(-1.57079601287841796875f), x);
652 x = pmadd(y, pset1<Packet>(-3.1391647326017846353352069854736328125e-07f), x);
653 x = pmadd(y, pset1<Packet>(-5.390302529957764765544681040410068817436695098876953125e-15f), x);
661 const float huge_th = ComputeSine ? 25966.f : 18838.f;
662 x = pmadd(y, pset1<Packet>(-1.5703125), x);
663 EIGEN_OPTIMIZATION_BARRIER(x)
664 x = pmadd(y, pset1<Packet>(-0.000483989715576171875), x);
665 EIGEN_OPTIMIZATION_BARRIER(x)
666 x = pmadd(y, pset1<Packet>(1.62865035235881805419921875e-07), x);
667 x = pmadd(y, pset1<Packet>(5.5644315544167710640977020375430583953857421875e-11), x);
683 if(predux_any(pcmp_le(pset1<Packet>(huge_th),pabs(_x))))
685 const int PacketSize = unpacket_traits<Packet>::size;
686 EIGEN_ALIGN_TO_BOUNDARY(
sizeof(Packet))
float vals[PacketSize];
687 EIGEN_ALIGN_TO_BOUNDARY(
sizeof(Packet))
float x_cpy[PacketSize];
688 EIGEN_ALIGN_TO_BOUNDARY(
sizeof(Packet))
int y_int2[PacketSize];
689 pstoreu(vals, pabs(_x));
691 pstoreu(y_int2, y_int);
692 for(
int k=0; k<PacketSize;++k)
695 if(val>=huge_th && (numext::isfinite)(val))
696 x_cpy[k] = trig_reduce_huge(val,&y_int2[k]);
698 x = ploadu<Packet>(x_cpy);
699 y_int = ploadu<PacketI>(y_int2);
705 Packet sign_bit = ComputeSine ? pxor(_x, preinterpret<Packet>(plogical_shift_left<30>(y_int)))
706 : preinterpret<Packet>(plogical_shift_left<30>(padd(y_int,csti_1)));
707 sign_bit = pand(sign_bit, cst_sign_mask);
711 Packet poly_mask = preinterpret<Packet>(pcmp_eq(pand(y_int, csti_1), pzero(y_int)));
713 Packet x2 = pmul(x,x);
716 Packet y1 = pset1<Packet>(2.4372266125283204019069671630859375e-05f);
717 y1 = pmadd(y1, x2, pset1<Packet>(-0.00138865201734006404876708984375f ));
718 y1 = pmadd(y1, x2, pset1<Packet>(0.041666619479656219482421875f ));
719 y1 = pmadd(y1, x2, pset1<Packet>(-0.5f));
720 y1 = pmadd(y1, x2, pset1<Packet>(1.f));
730 Packet y2 = pset1<Packet>(-0.0001959234114083702898469196984621021329076029360294342041015625f);
731 y2 = pmadd(y2, x2, pset1<Packet>( 0.0083326873655616851693794799871284340042620897293090820312500000f));
732 y2 = pmadd(y2, x2, pset1<Packet>(-0.1666666203982298255503735617821803316473960876464843750000000000f));
734 y2 = pmadd(y2, x, x);
737 y = ComputeSine ? pselect(poly_mask,y2,y1)
738 : pselect(poly_mask,y1,y2);
741 return pxor(y, sign_bit);
744template<
typename Packet>
745EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
747Packet psin_float(
const Packet& x)
749 return psincos_float<true>(x);
752template<
typename Packet>
753EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
755Packet pcos_float(
const Packet& x)
757 return psincos_float<false>(x);
761template<
typename Packet>
762EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
764Packet psqrt_complex(
const Packet& a) {
765 typedef typename unpacket_traits<Packet>::type Scalar;
766 typedef typename Scalar::value_type RealScalar;
767 typedef typename unpacket_traits<Packet>::as_real RealPacket;
805 RealPacket a_abs = pabs(a.v);
806 RealPacket a_abs_flip = pcplxflip(Packet(a_abs)).v;
807 RealPacket a_max = pmax(a_abs, a_abs_flip);
808 RealPacket a_min = pmin(a_abs, a_abs_flip);
809 RealPacket a_min_zero_mask = pcmp_eq(a_min, pzero(a_min));
810 RealPacket a_max_zero_mask = pcmp_eq(a_max, pzero(a_max));
811 RealPacket r = pdiv(a_min, a_max);
812 const RealPacket cst_one = pset1<RealPacket>(RealScalar(1));
813 RealPacket l = pmul(a_max, psqrt(padd(cst_one, pmul(r, r))));
815 l = pselect(a_min_zero_mask, a_max, l);
820 const RealPacket cst_half = pset1<RealPacket>(RealScalar(0.5));
822 rho.v = psqrt(pmul(cst_half, padd(a_abs, l)));
827 RealPacket eta = pandnot(pmul(cst_half, pdiv(a.v, pcplxflip(rho).v)), a_max_zero_mask);
828 RealPacket real_mask = peven_mask(a.v);
829 Packet positive_real_result;
831 positive_real_result.v = pselect(real_mask, rho.v, eta);
835 const RealScalar neg_zero = RealScalar(numext::bit_cast<float>(0x80000000u));
836 const RealPacket cst_imag_sign_mask = pset1<Packet>(Scalar(RealScalar(0.0), neg_zero)).v;
837 RealPacket imag_signs = pand(a.v, cst_imag_sign_mask);
838 Packet negative_real_result;
840 negative_real_result.v = por(pabs(pcplxflip(positive_real_result).v), imag_signs);
843 Packet negative_real_mask;
844 negative_real_mask.v = pcmp_lt(pand(real_mask, a.v), pzero(a.v));
845 negative_real_mask.v = por(negative_real_mask.v, pcplxflip(negative_real_mask).v);
846 Packet result = pselect(negative_real_mask, negative_real_result, positive_real_result);
853 const RealPacket cst_pos_inf = pset1<RealPacket>(NumTraits<RealScalar>::infinity());
855 is_inf.v = pcmp_eq(a_abs, cst_pos_inf);
857 is_real_inf.v = pand(is_inf.v, real_mask);
858 is_real_inf = por(is_real_inf, pcplxflip(is_real_inf));
860 Packet real_inf_result;
861 real_inf_result.v = pmul(a_abs, pset1<Packet>(Scalar(RealScalar(1.0), RealScalar(0.0))).v);
862 real_inf_result.v = pselect(negative_real_mask.v, pcplxflip(real_inf_result).v, real_inf_result.v);
865 is_imag_inf.v = pandnot(is_inf.v, real_mask);
866 is_imag_inf = por(is_imag_inf, pcplxflip(is_imag_inf));
867 Packet imag_inf_result;
868 imag_inf_result.v = por(pand(cst_pos_inf, real_mask), pandnot(a.v, real_mask));
870 return pselect(is_imag_inf, imag_inf_result,
871 pselect(is_real_inf, real_inf_result,result));
881template<
typename Packet>
883void absolute_split(
const Packet& x, Packet& n, Packet& r) {
890template<
typename Packet>
892void fast_twosum(
const Packet& x,
const Packet& y, Packet& s_hi, Packet& s_lo) {
894 const Packet t = psub(s_hi, x);
898#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
903template<
typename Packet>
905void twoprod(
const Packet& x,
const Packet& y,
906 Packet& p_hi, Packet& p_lo) {
908 p_lo = pmadd(x, y, pnegate(p_hi));
918template<
typename Packet>
920void veltkamp_splitting(
const Packet& x, Packet& x_hi, Packet& x_lo) {
921 typedef typename unpacket_traits<Packet>::type Scalar;
922 EIGEN_CONSTEXPR
int shift = (NumTraits<Scalar>::digits() + 1) / 2;
923 const Scalar shift_scale = Scalar(uint64_t(1) << shift);
924 const Packet gamma = pmul(pset1<Packet>(shift_scale + Scalar(1)), x);
925 Packet rho = psub(x, gamma);
926 x_hi = padd(rho, gamma);
927 x_lo = psub(x, x_hi);
934template<
typename Packet>
936void twoprod(
const Packet& x,
const Packet& y,
937 Packet& p_hi, Packet& p_lo) {
938 Packet x_hi, x_lo, y_hi, y_lo;
939 veltkamp_splitting(x, x_hi, x_lo);
940 veltkamp_splitting(y, y_hi, y_lo);
943 p_lo = pmadd(x_hi, y_hi, pnegate(p_hi));
944 p_lo = pmadd(x_hi, y_lo, p_lo);
945 p_lo = pmadd(x_lo, y_hi, p_lo);
946 p_lo = pmadd(x_lo, y_lo, p_lo);
958template<
typename Packet>
960 void twosum(
const Packet& x_hi,
const Packet& x_lo,
961 const Packet& y_hi,
const Packet& y_lo,
962 Packet& s_hi, Packet& s_lo) {
963 const Packet x_greater_mask = pcmp_lt(pabs(y_hi), pabs(x_hi));
964 Packet r_hi_1, r_lo_1;
965 fast_twosum(x_hi, y_hi,r_hi_1, r_lo_1);
966 Packet r_hi_2, r_lo_2;
967 fast_twosum(y_hi, x_hi,r_hi_2, r_lo_2);
968 const Packet r_hi = pselect(x_greater_mask, r_hi_1, r_hi_2);
970 const Packet s1 = padd(padd(y_lo, r_lo_1), x_lo);
971 const Packet s2 = padd(padd(x_lo, r_lo_2), y_lo);
972 const Packet s = pselect(x_greater_mask, s1, s2);
974 fast_twosum(r_hi, s, s_hi, s_lo);
979template<
typename Packet>
981 void fast_twosum(
const Packet& x_hi,
const Packet& x_lo,
982 const Packet& y_hi,
const Packet& y_lo,
983 Packet& s_hi, Packet& s_lo) {
985 fast_twosum(x_hi, y_hi, r_hi, r_lo);
986 const Packet s = padd(padd(y_lo, r_lo), x_lo);
987 fast_twosum(r_hi, s, s_hi, s_lo);
993template<
typename Packet>
995void fast_twosum(
const Packet& x,
996 const Packet& y_hi,
const Packet& y_lo,
997 Packet& s_hi, Packet& s_lo) {
999 fast_twosum(x, y_hi, r_hi, r_lo);
1000 const Packet s = padd(y_lo, r_lo);
1001 fast_twosum(r_hi, s, s_hi, s_lo);
1012template<
typename Packet>
1014void twoprod(
const Packet& x_hi,
const Packet& x_lo,
const Packet& y,
1015 Packet& p_hi, Packet& p_lo) {
1017 twoprod(x_hi, y, c_hi, c_lo1);
1018 const Packet c_lo2 = pmul(x_lo, y);
1020 fast_twosum(c_hi, c_lo2, t_hi, t_lo1);
1021 const Packet t_lo2 = padd(t_lo1, c_lo1);
1022 fast_twosum(t_hi, t_lo2, p_hi, p_lo);
1031template<
typename Packet>
1033void twoprod(
const Packet& x_hi,
const Packet& x_lo,
1034 const Packet& y_hi,
const Packet& y_lo,
1035 Packet& p_hi, Packet& p_lo) {
1036 Packet p_hi_hi, p_hi_lo;
1037 twoprod(x_hi, x_lo, y_hi, p_hi_hi, p_hi_lo);
1038 Packet p_lo_hi, p_lo_lo;
1039 twoprod(x_hi, x_lo, y_lo, p_lo_hi, p_lo_lo);
1040 fast_twosum(p_hi_hi, p_hi_lo, p_lo_hi, p_lo_lo, p_hi, p_lo);
1045template <
typename Packet>
1046void doubleword_reciprocal(
const Packet& x, Packet& recip_hi, Packet& recip_lo) {
1047 typedef typename unpacket_traits<Packet>::type Scalar;
1049 Packet approx_recip = prsqrt(x);
1050 approx_recip = pmul(approx_recip, approx_recip);
1057 Packet t1_hi, t1_lo;
1058 twoprod(pnegate(x), approx_recip, t1_hi, t1_lo);
1060 Packet t2_hi, t2_lo;
1061 fast_twosum(pset1<Packet>(Scalar(2)), t1_hi, t2_hi, t2_lo);
1062 Packet t3_hi, t3_lo;
1063 fast_twosum(t2_hi, padd(t2_lo, t1_lo), t3_hi, t3_lo);
1065 twoprod(t3_hi, t3_lo, approx_recip, recip_hi, recip_lo);
1070template <
typename Scalar>
1071struct accurate_log2 {
1072 template <
typename Packet>
1074 void operator()(
const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {
1075 log2_x_hi = plog2(x);
1076 log2_x_lo = pzero(x);
1087struct accurate_log2<float> {
1088 template <
typename Packet>
1090 void operator()(
const Packet& z, Packet& log2_x_hi, Packet& log2_x_lo) {
1106 const Packet p6 = pset1<Packet>( 9.703654795885e-2f);
1107 const Packet p5 = pset1<Packet>(-0.1690667718648f);
1108 const Packet p4 = pset1<Packet>( 0.1720575392246f);
1109 const Packet p3 = pset1<Packet>(-0.1789081543684f);
1110 const Packet p2 = pset1<Packet>( 0.2050433009862f);
1111 const Packet p1 = pset1<Packet>(-0.2404672354459f);
1112 const Packet p0 = pset1<Packet>( 0.2885761857032f);
1114 const Packet C3_hi = pset1<Packet>(-0.360674142838f);
1115 const Packet C3_lo = pset1<Packet>(-6.13283912543e-09f);
1116 const Packet C2_hi = pset1<Packet>(0.480897903442f);
1117 const Packet C2_lo = pset1<Packet>(-1.44861207474e-08f);
1118 const Packet C1_hi = pset1<Packet>(-0.721347510815f);
1119 const Packet C1_lo = pset1<Packet>(-4.84483164698e-09f);
1120 const Packet C0_hi = pset1<Packet>(1.44269502163f);
1121 const Packet C0_lo = pset1<Packet>(2.01711713999e-08f);
1122 const Packet one = pset1<Packet>(1.0f);
1124 const Packet x = psub(z, one);
1128 Packet x2 = pmul(x,x);
1129 Packet p_even = pmadd(p6, x2, p4);
1130 p_even = pmadd(p_even, x2, p2);
1131 p_even = pmadd(p_even, x2, p0);
1132 Packet p_odd = pmadd(p5, x2, p3);
1133 p_odd = pmadd(p_odd, x2, p1);
1134 Packet p = pmadd(p_odd, x, p_even);
1143 twoprod(p, x, t_hi, t_lo);
1144 fast_twosum(C3_hi, C3_lo, t_hi, t_lo, q_hi, q_lo);
1146 twoprod(q_hi, q_lo, x, t_hi, t_lo);
1147 fast_twosum(C2_hi, C2_lo, t_hi, t_lo, q_hi, q_lo);
1149 twoprod(q_hi, q_lo, x, t_hi, t_lo);
1150 fast_twosum(C1_hi, C1_lo, t_hi, t_lo, q_hi, q_lo);
1152 twoprod(q_hi, q_lo, x, t_hi, t_lo);
1153 fast_twosum(C0_hi, C0_lo, t_hi, t_lo, q_hi, q_lo);
1156 twoprod(q_hi, q_lo, x, log2_x_hi, log2_x_lo);
1168struct accurate_log2<double> {
1169 template <
typename Packet>
1171 void operator()(
const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {
1193 const Packet q12 = pset1<Packet>(2.87074255468000586e-9);
1194 const Packet q10 = pset1<Packet>(2.38957980901884082e-8);
1195 const Packet q8 = pset1<Packet>(2.31032094540014656e-7);
1196 const Packet q6 = pset1<Packet>(2.27279857398537278e-6);
1197 const Packet q4 = pset1<Packet>(2.31271023278625638e-5);
1198 const Packet q2 = pset1<Packet>(2.47556738444535513e-4);
1199 const Packet q0 = pset1<Packet>(2.88543873228900172e-3);
1200 const Packet C_hi = pset1<Packet>(0.0400377511598501157);
1201 const Packet C_lo = pset1<Packet>(-4.77726582251425391e-19);
1202 const Packet one = pset1<Packet>(1.0);
1204 const Packet cst_2_log2e_hi = pset1<Packet>(2.88539008177792677);
1205 const Packet cst_2_log2e_lo = pset1<Packet>(4.07660016854549667e-17);
1207 Packet num_hi, num_lo;
1208 twoprod(cst_2_log2e_hi, cst_2_log2e_lo, psub(x, one), num_hi, num_lo);
1212 Packet denom_hi, denom_lo;
1213 doubleword_reciprocal(padd(x, one), denom_hi, denom_lo);
1216 twoprod(num_hi, num_lo, denom_hi, denom_lo, r_hi, r_lo);
1218 Packet r2_hi, r2_lo;
1219 twoprod(r_hi, r_lo, r_hi, r_lo, r2_hi, r2_lo);
1221 Packet r4_hi, r4_lo;
1222 twoprod(r2_hi, r2_lo, r2_hi, r2_lo, r4_hi, r4_lo);
1226 Packet q_even = pmadd(q12, r4_hi, q8);
1227 Packet q_odd = pmadd(q10, r4_hi, q6);
1228 q_even = pmadd(q_even, r4_hi, q4);
1229 q_odd = pmadd(q_odd, r4_hi, q2);
1230 q_even = pmadd(q_even, r4_hi, q0);
1231 Packet q = pmadd(q_odd, r2_hi, q_even);
1239 twoprod(r2_hi, r2_lo, q, p_hi, p_lo);
1241 Packet p1_hi, p1_lo;
1242 fast_twosum(C_hi, C_lo, p_hi, p_lo, p1_hi, p1_lo);
1244 Packet p2_hi, p2_lo;
1245 twoprod(r2_hi, r2_lo, p1_hi, p1_lo, p2_hi, p2_lo);
1247 Packet p3_hi, p3_lo;
1248 fast_twosum(one, p2_hi, p2_lo, p3_hi, p3_lo);
1251 twoprod(p3_hi, p3_lo, r_hi, r_lo, log2_x_hi, log2_x_lo);
1256template <
typename Scalar>
1257struct fast_accurate_exp2 {
1258 template <
typename Packet>
1260 Packet operator()(
const Packet& x) {
1262 return pexp(pmul(pset1<Packet>(Scalar(EIGEN_LN2)), x));
1271struct fast_accurate_exp2<float> {
1272 template <
typename Packet>
1274 Packet operator()(
const Packet& x) {
1286 const Packet p4 = pset1<Packet>(1.539513905e-4f);
1287 const Packet p3 = pset1<Packet>(1.340007293e-3f);
1288 const Packet p2 = pset1<Packet>(9.618283249e-3f);
1289 const Packet p1 = pset1<Packet>(5.550328270e-2f);
1290 const Packet p0 = pset1<Packet>(0.2402264923f);
1292 const Packet C_hi = pset1<Packet>(0.6931471825f);
1293 const Packet C_lo = pset1<Packet>(2.36836577e-08f);
1294 const Packet one = pset1<Packet>(1.0f);
1299 Packet x2 = pmul(x,x);
1300 Packet p_even = pmadd(p4, x2, p2);
1301 Packet p_odd = pmadd(p3, x2, p1);
1302 p_even = pmadd(p_even, x2, p0);
1303 Packet p = pmadd(p_odd, x, p_even);
1309 twoprod(p, x, p_hi, p_lo);
1311 Packet q1_hi, q1_lo;
1312 twosum(p_hi, p_lo, C_hi, C_lo, q1_hi, q1_lo);
1314 Packet q2_hi, q2_lo;
1315 twoprod(q1_hi, q1_lo, x, q2_hi, q2_lo);
1317 Packet q3_hi, q3_lo;
1320 fast_twosum(one, q2_hi, q3_hi, q3_lo);
1321 return padd(q3_hi, padd(q2_lo, q3_lo));
1329struct fast_accurate_exp2<double> {
1330 template <
typename Packet>
1332 Packet operator()(
const Packet& x) {
1344 const Packet p9 = pset1<Packet>(4.431642109085495276e-10);
1345 const Packet p8 = pset1<Packet>(7.073829923303358410e-9);
1346 const Packet p7 = pset1<Packet>(1.017822306737031311e-7);
1347 const Packet p6 = pset1<Packet>(1.321543498017646657e-6);
1348 const Packet p5 = pset1<Packet>(1.525273342728892877e-5);
1349 const Packet p4 = pset1<Packet>(1.540353045780084423e-4);
1350 const Packet p3 = pset1<Packet>(1.333355814685869807e-3);
1351 const Packet p2 = pset1<Packet>(9.618129107593478832e-3);
1352 const Packet p1 = pset1<Packet>(5.550410866481961247e-2);
1353 const Packet p0 = pset1<Packet>(0.240226506959101332);
1354 const Packet C_hi = pset1<Packet>(0.693147180559945286);
1355 const Packet C_lo = pset1<Packet>(4.81927865669806721e-17);
1356 const Packet one = pset1<Packet>(1.0);
1361 Packet x2 = pmul(x,x);
1362 Packet p_even = pmadd(p8, x2, p6);
1363 Packet p_odd = pmadd(p9, x2, p7);
1364 p_even = pmadd(p_even, x2, p4);
1365 p_odd = pmadd(p_odd, x2, p5);
1366 p_even = pmadd(p_even, x2, p2);
1367 p_odd = pmadd(p_odd, x2, p3);
1368 p_even = pmadd(p_even, x2, p0);
1369 p_odd = pmadd(p_odd, x2, p1);
1370 Packet p = pmadd(p_odd, x, p_even);
1376 twoprod(p, x, p_hi, p_lo);
1378 Packet q1_hi, q1_lo;
1379 twosum(p_hi, p_lo, C_hi, C_lo, q1_hi, q1_lo);
1381 Packet q2_hi, q2_lo;
1382 twoprod(q1_hi, q1_lo, x, q2_hi, q2_lo);
1384 Packet q3_hi, q3_lo;
1387 fast_twosum(one, q2_hi, q3_hi, q3_lo);
1388 return padd(q3_hi, padd(q2_lo, q3_lo));
1397template <
typename Packet>
1398EIGEN_STRONG_INLINE Packet generic_pow_impl(
const Packet& x,
const Packet& y) {
1399 typedef typename unpacket_traits<Packet>::type Scalar;
1402 Packet m_x = pfrexp(x, e_x);
1405 EIGEN_CONSTEXPR Scalar sqrt_half = Scalar(0.70710678118654752440);
1406 const Packet m_x_scale_mask = pcmp_lt(m_x, pset1<Packet>(sqrt_half));
1407 m_x = pselect(m_x_scale_mask, pmul(pset1<Packet>(Scalar(2)), m_x), m_x);
1408 e_x = pselect(m_x_scale_mask, psub(e_x, pset1<Packet>(Scalar(1))), e_x);
1411 Packet rx_hi, rx_lo;
1412 accurate_log2<Scalar>()(m_x, rx_hi, rx_lo);
1416 Packet f1_hi, f1_lo, f2_hi, f2_lo;
1417 twoprod(e_x, y, f1_hi, f1_lo);
1418 twoprod(rx_hi, rx_lo, y, f2_hi, f2_lo);
1426 fast_twosum(f1_hi, f1_lo, f2_hi, f2_lo, f_hi, f_lo);
1430 absolute_split(f_hi, n_z, r_z);
1431 r_z = padd(r_z, f_lo);
1433 absolute_split(r_z, n_r, r_z);
1434 n_z = padd(n_z, n_r);
1441 const Packet e_r = fast_accurate_exp2<Scalar>()(r_z);
1442 return pldexp(e_r, n_z);
1446template<
typename Packet>
1447EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
1449Packet generic_pow(
const Packet& x,
const Packet& y) {
1450 typedef typename unpacket_traits<Packet>::type Scalar;
1452 const Packet cst_pos_inf = pset1<Packet>(NumTraits<Scalar>::infinity());
1453 const Packet cst_zero = pset1<Packet>(Scalar(0));
1454 const Packet cst_one = pset1<Packet>(Scalar(1));
1455 const Packet cst_nan = pset1<Packet>(NumTraits<Scalar>::quiet_NaN());
1457 const Packet abs_x = pabs(x);
1459 const Packet x_is_zero = pcmp_eq(x, cst_zero);
1460 const Packet x_is_neg = pcmp_lt(x, cst_zero);
1461 const Packet abs_x_is_inf = pcmp_eq(abs_x, cst_pos_inf);
1462 const Packet abs_x_is_one = pcmp_eq(abs_x, cst_one);
1463 const Packet abs_x_is_gt_one = pcmp_lt(cst_one, abs_x);
1464 const Packet abs_x_is_lt_one = pcmp_lt(abs_x, cst_one);
1465 const Packet x_is_one = pandnot(abs_x_is_one, x_is_neg);
1466 const Packet x_is_neg_one = pand(abs_x_is_one, x_is_neg);
1467 const Packet x_is_nan = pandnot(ptrue(x), pcmp_eq(x, x));
1470 const Packet y_is_one = pcmp_eq(y, cst_one);
1471 const Packet y_is_zero = pcmp_eq(y, cst_zero);
1472 const Packet y_is_neg = pcmp_lt(y, cst_zero);
1473 const Packet y_is_pos = pandnot(ptrue(y), por(y_is_zero, y_is_neg));
1474 const Packet y_is_nan = pandnot(ptrue(y), pcmp_eq(y, y));
1475 const Packet abs_y_is_inf = pcmp_eq(pabs(y), cst_pos_inf);
1476 EIGEN_CONSTEXPR Scalar huge_exponent =
1477 (NumTraits<Scalar>::max_exponent() * Scalar(EIGEN_LN2)) /
1478 NumTraits<Scalar>::epsilon();
1479 const Packet abs_y_is_huge = pcmp_le(pset1<Packet>(huge_exponent), pabs(y));
1482 const Packet y_is_int = pcmp_eq(pfloor(y), y);
1483 const Packet y_div_2 = pmul(y, pset1<Packet>(Scalar(0.5)));
1484 const Packet y_is_even = pcmp_eq(pround(y_div_2), y_div_2);
1487 const Packet invalid_negative_x = pandnot(pandnot(pandnot(x_is_neg, abs_x_is_inf),
1490 const Packet pow_is_one = por(por(x_is_one, y_is_zero),
1492 por(abs_y_is_inf, pandnot(y_is_even, invalid_negative_x))));
1493 const Packet pow_is_nan = por(invalid_negative_x, por(x_is_nan, y_is_nan));
1494 const Packet pow_is_zero = por(por(por(pand(x_is_zero, y_is_pos),
1495 pand(abs_x_is_inf, y_is_neg)),
1496 pand(pand(abs_x_is_lt_one, abs_y_is_huge),
1498 pand(pand(abs_x_is_gt_one, abs_y_is_huge),
1500 const Packet pow_is_inf = por(por(por(pand(x_is_zero, y_is_neg),
1501 pand(abs_x_is_inf, y_is_pos)),
1502 pand(pand(abs_x_is_lt_one, abs_y_is_huge),
1504 pand(pand(abs_x_is_gt_one, abs_y_is_huge),
1508 const Packet negate_pow_abs = pandnot(x_is_neg, y_is_even);
1509 const Packet pow_abs = generic_pow_impl(abs_x, y);
1510 return pselect(y_is_one, x,
1511 pselect(pow_is_one, cst_one,
1512 pselect(pow_is_nan, cst_nan,
1513 pselect(pow_is_inf, cst_pos_inf,
1514 pselect(pow_is_zero, cst_zero,
1515 pselect(negate_pow_abs, pnegate(pow_abs), pow_abs))))));
1559template <
typename Packet,
int N>
1561 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(
const Packet& x,
const typename unpacket_traits<Packet>::type coeff[]) {
1562 EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
1563 return pmadd(ppolevl<Packet, N-1>::run(x, coeff), x, pset1<Packet>(coeff[N]));
1567template <
typename Packet>
1568struct ppolevl<Packet, 0> {
1569 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(
const Packet& x,
const typename unpacket_traits<Packet>::type coeff[]) {
1570 EIGEN_UNUSED_VARIABLE(x);
1571 return pset1<Packet>(coeff[0]);
1627template <
typename Packet,
int N>
1630 static EIGEN_STRONG_INLINE Packet run(Packet x,
const typename unpacket_traits<Packet>::type coef[]) {
1631 typedef typename unpacket_traits<Packet>::type Scalar;
1632 Packet b0 = pset1<Packet>(coef[0]);
1633 Packet b1 = pset1<Packet>(
static_cast<Scalar
>(0.f));
1636 for (
int i = 1; i < N; i++) {
1639 b0 = psub(pmadd(x, b1, pset1<Packet>(coef[i])), b2);
1642 return pmul(pset1<Packet>(
static_cast<Scalar
>(0.5f)), psub(b0, b2));
Namespace containing all symbols from the Eigen library.
Definition: Core:141