Please, help us to better know about our user community by answering the following short survey: https://forms.gle/wpyrxWi18ox9Z5ae9
Eigen  3.4.0
 
Loading...
Searching...
No Matches
MSA/PacketMath.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2018 Wave Computing, Inc.
5// Written by:
6// Chris Larsen
7// Alexey Frunze (afrunze@wavecomp.com)
8//
9// This Source Code Form is subject to the terms of the Mozilla
10// Public License v. 2.0. If a copy of the MPL was not distributed
11// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
12
13#ifndef EIGEN_PACKET_MATH_MSA_H
14#define EIGEN_PACKET_MATH_MSA_H
15
16#include <iostream>
17#include <string>
18
19namespace Eigen {
20
21namespace internal {
22
23#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
24#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
25#endif
26
27#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
28#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
29#endif
30
31#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
32#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
33#endif
34
35#if 0
36#define EIGEN_MSA_DEBUG \
37 static bool firstTime = true; \
38 do { \
39 if (firstTime) { \
40 std::cout << __FILE__ << ':' << __LINE__ << ':' << __FUNCTION__ << std::endl; \
41 firstTime = false; \
42 } \
43 } while (0)
44#else
45#define EIGEN_MSA_DEBUG
46#endif
47
48#define EIGEN_MSA_SHF_I8(a, b, c, d) (((d) << 6) | ((c) << 4) | ((b) << 2) | (a))
49
50typedef v4f32 Packet4f;
51typedef v4i32 Packet4i;
52typedef v4u32 Packet4ui;
53
54#define _EIGEN_DECLARE_CONST_Packet4f(NAME, X) const Packet4f p4f_##NAME = { X, X, X, X }
55#define _EIGEN_DECLARE_CONST_Packet4i(NAME, X) const Packet4i p4i_##NAME = { X, X, X, X }
56#define _EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = { X, X, X, X }
57
58inline std::ostream& operator<<(std::ostream& os, const Packet4f& value) {
59 os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
60 return os;
61}
62
63inline std::ostream& operator<<(std::ostream& os, const Packet4i& value) {
64 os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
65 return os;
66}
67
68inline std::ostream& operator<<(std::ostream& os, const Packet4ui& value) {
69 os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
70 return os;
71}
72
73template <>
74struct packet_traits<float> : default_packet_traits {
75 typedef Packet4f type;
76 typedef Packet4f half; // Packet2f intrinsics not implemented yet
77 enum {
78 Vectorizable = 1,
79 AlignedOnScalar = 1,
80 size = 4,
81 HasHalfPacket = 0, // Packet2f intrinsics not implemented yet
82 // FIXME check the Has*
83 HasDiv = 1,
84 HasSin = EIGEN_FAST_MATH,
85 HasCos = EIGEN_FAST_MATH,
86 HasTanh = EIGEN_FAST_MATH,
87 HasErf = EIGEN_FAST_MATH,
88 HasLog = 1,
89 HasExp = 1,
90 HasSqrt = 1,
91 HasRsqrt = 1,
92 HasRound = 1,
93 HasFloor = 1,
94 HasCeil = 1,
95 HasBlend = 1
96 };
97};
98
99template <>
100struct packet_traits<int32_t> : default_packet_traits {
101 typedef Packet4i type;
102 typedef Packet4i half; // Packet2i intrinsics not implemented yet
103 enum {
104 Vectorizable = 1,
105 AlignedOnScalar = 1,
106 size = 4,
107 HasHalfPacket = 0, // Packet2i intrinsics not implemented yet
108 // FIXME check the Has*
109 HasDiv = 1,
110 HasBlend = 1
111 };
112};
113
114template <>
115struct unpacket_traits<Packet4f> {
116 typedef float type;
117 enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };
118 typedef Packet4f half;
119};
120
121template <>
122struct unpacket_traits<Packet4i> {
123 typedef int32_t type;
124 enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };
125 typedef Packet4i half;
126};
127
128template <>
129EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
130 EIGEN_MSA_DEBUG;
131
132 Packet4f v = { from, from, from, from };
133 return v;
134}
135
136template <>
137EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) {
138 EIGEN_MSA_DEBUG;
139
140 return __builtin_msa_fill_w(from);
141}
142
143template <>
144EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float* from) {
145 EIGEN_MSA_DEBUG;
146
147 float f = *from;
148 Packet4f v = { f, f, f, f };
149 return v;
150}
151
152template <>
153EIGEN_STRONG_INLINE Packet4i pload1<Packet4i>(const int32_t* from) {
154 EIGEN_MSA_DEBUG;
155
156 return __builtin_msa_fill_w(*from);
157}
158
159template <>
160EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) {
161 EIGEN_MSA_DEBUG;
162
163 return __builtin_msa_fadd_w(a, b);
164}
165
166template <>
167EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) {
168 EIGEN_MSA_DEBUG;
169
170 return __builtin_msa_addv_w(a, b);
171}
172
173template <>
174EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) {
175 EIGEN_MSA_DEBUG;
176
177 static const Packet4f countdown = { 0.0f, 1.0f, 2.0f, 3.0f };
178 return padd(pset1<Packet4f>(a), countdown);
179}
180
181template <>
182EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int32_t& a) {
183 EIGEN_MSA_DEBUG;
184
185 static const Packet4i countdown = { 0, 1, 2, 3 };
186 return padd(pset1<Packet4i>(a), countdown);
187}
188
189template <>
190EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) {
191 EIGEN_MSA_DEBUG;
192
193 return __builtin_msa_fsub_w(a, b);
194}
195
196template <>
197EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) {
198 EIGEN_MSA_DEBUG;
199
200 return __builtin_msa_subv_w(a, b);
201}
202
203template <>
204EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) {
205 EIGEN_MSA_DEBUG;
206
207 return (Packet4f)__builtin_msa_bnegi_w((v4u32)a, 31);
208}
209
210template <>
211EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) {
212 EIGEN_MSA_DEBUG;
213
214 return __builtin_msa_addvi_w((v4i32)__builtin_msa_nori_b((v16u8)a, 0), 1);
215}
216
217template <>
218EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) {
219 EIGEN_MSA_DEBUG;
220
221 return a;
222}
223
224template <>
225EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) {
226 EIGEN_MSA_DEBUG;
227
228 return a;
229}
230
231template <>
232EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) {
233 EIGEN_MSA_DEBUG;
234
235 return __builtin_msa_fmul_w(a, b);
236}
237
238template <>
239EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) {
240 EIGEN_MSA_DEBUG;
241
242 return __builtin_msa_mulv_w(a, b);
243}
244
245template <>
246EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) {
247 EIGEN_MSA_DEBUG;
248
249 return __builtin_msa_fdiv_w(a, b);
250}
251
252template <>
253EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) {
254 EIGEN_MSA_DEBUG;
255
256 return __builtin_msa_div_s_w(a, b);
257}
258
259template <>
260EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
261 EIGEN_MSA_DEBUG;
262
263 return __builtin_msa_fmadd_w(c, a, b);
264}
265
266template <>
267EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) {
268 EIGEN_MSA_DEBUG;
269
270 // Use "asm" construct to avoid __builtin_msa_maddv_w GNU C bug.
271 Packet4i value = c;
272 __asm__("maddv.w %w[value], %w[a], %w[b]\n"
273 // Outputs
274 : [value] "+f"(value)
275 // Inputs
276 : [a] "f"(a), [b] "f"(b));
277 return value;
278}
279
280template <>
281EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) {
282 EIGEN_MSA_DEBUG;
283
284 return (Packet4f)__builtin_msa_and_v((v16u8)a, (v16u8)b);
285}
286
287template <>
288EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) {
289 EIGEN_MSA_DEBUG;
290
291 return (Packet4i)__builtin_msa_and_v((v16u8)a, (v16u8)b);
292}
293
294template <>
295EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) {
296 EIGEN_MSA_DEBUG;
297
298 return (Packet4f)__builtin_msa_or_v((v16u8)a, (v16u8)b);
299}
300
301template <>
302EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) {
303 EIGEN_MSA_DEBUG;
304
305 return (Packet4i)__builtin_msa_or_v((v16u8)a, (v16u8)b);
306}
307
308template <>
309EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) {
310 EIGEN_MSA_DEBUG;
311
312 return (Packet4f)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
313}
314
315template <>
316EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) {
317 EIGEN_MSA_DEBUG;
318
319 return (Packet4i)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
320}
321
322template <>
323EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) {
324 EIGEN_MSA_DEBUG;
325
326 return pand(a, (Packet4f)__builtin_msa_xori_b((v16u8)b, 255));
327}
328
329template <>
330EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) {
331 EIGEN_MSA_DEBUG;
332
333 return pand(a, (Packet4i)__builtin_msa_xori_b((v16u8)b, 255));
334}
335
336template <>
337EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {
338 EIGEN_MSA_DEBUG;
339
340#if EIGEN_FAST_MATH
341 // This prefers numbers to NaNs.
342 return __builtin_msa_fmin_w(a, b);
343#else
344 // This prefers NaNs to numbers.
345 Packet4i aNaN = __builtin_msa_fcun_w(a, a);
346 Packet4i aMinOrNaN = por(__builtin_msa_fclt_w(a, b), aNaN);
347 return (Packet4f)__builtin_msa_bsel_v((v16u8)aMinOrNaN, (v16u8)b, (v16u8)a);
348#endif
349}
350
351template <>
352EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) {
353 EIGEN_MSA_DEBUG;
354
355 return __builtin_msa_min_s_w(a, b);
356}
357
358template <>
359EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) {
360 EIGEN_MSA_DEBUG;
361
362#if EIGEN_FAST_MATH
363 // This prefers numbers to NaNs.
364 return __builtin_msa_fmax_w(a, b);
365#else
366 // This prefers NaNs to numbers.
367 Packet4i aNaN = __builtin_msa_fcun_w(a, a);
368 Packet4i aMaxOrNaN = por(__builtin_msa_fclt_w(b, a), aNaN);
369 return (Packet4f)__builtin_msa_bsel_v((v16u8)aMaxOrNaN, (v16u8)b, (v16u8)a);
370#endif
371}
372
373template <>
374EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) {
375 EIGEN_MSA_DEBUG;
376
377 return __builtin_msa_max_s_w(a, b);
378}
379
380template <>
381EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) {
382 EIGEN_MSA_DEBUG;
383
384 EIGEN_DEBUG_ALIGNED_LOAD return (Packet4f)__builtin_msa_ld_w(const_cast<float*>(from), 0);
385}
386
387template <>
388EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from) {
389 EIGEN_MSA_DEBUG;
390
391 EIGEN_DEBUG_ALIGNED_LOAD return __builtin_msa_ld_w(const_cast<int32_t*>(from), 0);
392}
393
394template <>
395EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
396 EIGEN_MSA_DEBUG;
397
398 EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4f)__builtin_msa_ld_w(const_cast<float*>(from), 0);
399}
400
401template <>
402EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int32_t* from) {
403 EIGEN_MSA_DEBUG;
404
405 EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4i)__builtin_msa_ld_w(const_cast<int32_t*>(from), 0);
406}
407
408template <>
409EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) {
410 EIGEN_MSA_DEBUG;
411
412 float f0 = from[0], f1 = from[1];
413 Packet4f v0 = { f0, f0, f0, f0 };
414 Packet4f v1 = { f1, f1, f1, f1 };
415 return (Packet4f)__builtin_msa_ilvr_d((v2i64)v1, (v2i64)v0);
416}
417
418template <>
419EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int32_t* from) {
420 EIGEN_MSA_DEBUG;
421
422 int32_t i0 = from[0], i1 = from[1];
423 Packet4i v0 = { i0, i0, i0, i0 };
424 Packet4i v1 = { i1, i1, i1, i1 };
425 return (Packet4i)__builtin_msa_ilvr_d((v2i64)v1, (v2i64)v0);
426}
427
428template <>
429EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) {
430 EIGEN_MSA_DEBUG;
431
432 EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_w((Packet4i)from, to, 0);
433}
434
435template <>
436EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from) {
437 EIGEN_MSA_DEBUG;
438
439 EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_w(from, to, 0);
440}
441
442template <>
443EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) {
444 EIGEN_MSA_DEBUG;
445
446 EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_w((Packet4i)from, to, 0);
447}
448
449template <>
450EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from) {
451 EIGEN_MSA_DEBUG;
452
453 EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_w(from, to, 0);
454}
455
456template <>
457EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) {
458 EIGEN_MSA_DEBUG;
459
460 float f = *from;
461 Packet4f v = { f, f, f, f };
462 v[1] = from[stride];
463 v[2] = from[2 * stride];
464 v[3] = from[3 * stride];
465 return v;
466}
467
468template <>
469EIGEN_DEVICE_FUNC inline Packet4i pgather<int32_t, Packet4i>(const int32_t* from, Index stride) {
470 EIGEN_MSA_DEBUG;
471
472 int32_t i = *from;
473 Packet4i v = { i, i, i, i };
474 v[1] = from[stride];
475 v[2] = from[2 * stride];
476 v[3] = from[3 * stride];
477 return v;
478}
479
480template <>
481EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from,
482 Index stride) {
483 EIGEN_MSA_DEBUG;
484
485 *to = from[0];
486 to += stride;
487 *to = from[1];
488 to += stride;
489 *to = from[2];
490 to += stride;
491 *to = from[3];
492}
493
494template <>
495EIGEN_DEVICE_FUNC inline void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from,
496 Index stride) {
497 EIGEN_MSA_DEBUG;
498
499 *to = from[0];
500 to += stride;
501 *to = from[1];
502 to += stride;
503 *to = from[2];
504 to += stride;
505 *to = from[3];
506}
507
508template <>
509EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) {
510 EIGEN_MSA_DEBUG;
511
512 __builtin_prefetch(addr);
513}
514
515template <>
516EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t* addr) {
517 EIGEN_MSA_DEBUG;
518
519 __builtin_prefetch(addr);
520}
521
522template <>
523EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
524 EIGEN_MSA_DEBUG;
525
526 return a[0];
527}
528
529template <>
530EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) {
531 EIGEN_MSA_DEBUG;
532
533 return a[0];
534}
535
536template <>
537EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
538 EIGEN_MSA_DEBUG;
539
540 return (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(3, 2, 1, 0));
541}
542
543template <>
544EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
545 EIGEN_MSA_DEBUG;
546
547 return __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(3, 2, 1, 0));
548}
549
550template <>
551EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) {
552 EIGEN_MSA_DEBUG;
553
554 return (Packet4f)__builtin_msa_bclri_w((v4u32)a, 31);
555}
556
557template <>
558EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) {
559 EIGEN_MSA_DEBUG;
560
561 Packet4i zero = __builtin_msa_ldi_w(0);
562 return __builtin_msa_add_a_w(zero, a);
563}
564
565template <>
566EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) {
567 EIGEN_MSA_DEBUG;
568
569 Packet4f s = padd(a, (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
570 s = padd(s, (Packet4f)__builtin_msa_shf_w((v4i32)s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
571 return s[0];
572}
573
574
575template <>
576EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a) {
577 EIGEN_MSA_DEBUG;
578
579 Packet4i s = padd(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
580 s = padd(s, __builtin_msa_shf_w(s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
581 return s[0];
582}
583
584// Other reduction functions:
585// mul
586template <>
587EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a) {
588 EIGEN_MSA_DEBUG;
589
590 Packet4f p = pmul(a, (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
591 p = pmul(p, (Packet4f)__builtin_msa_shf_w((v4i32)p, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
592 return p[0];
593}
594
595template <>
596EIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a) {
597 EIGEN_MSA_DEBUG;
598
599 Packet4i p = pmul(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
600 p = pmul(p, __builtin_msa_shf_w(p, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
601 return p[0];
602}
603
604// min
605template <>
606EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a) {
607 EIGEN_MSA_DEBUG;
608
609 // Swap 64-bit halves of a.
610 Packet4f swapped = (Packet4f)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
611#if !EIGEN_FAST_MATH
612 // Detect presence of NaNs from pairs a[0]-a[2] and a[1]-a[3] as two 32-bit
613 // masks of all zeroes/ones in low 64 bits.
614 v16u8 unord = (v16u8)__builtin_msa_fcun_w(a, swapped);
615 // Combine the two masks into one: 64 ones if no NaNs, otherwise 64 zeroes.
616 unord = (v16u8)__builtin_msa_ceqi_d((v2i64)unord, 0);
617#endif
618 // Continue with min computation.
619 Packet4f v = __builtin_msa_fmin_w(a, swapped);
620 v = __builtin_msa_fmin_w(
621 v, (Packet4f)__builtin_msa_shf_w((Packet4i)v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
622#if !EIGEN_FAST_MATH
623 // Based on the mask select between v and 4 qNaNs.
624 v16u8 qnans = (v16u8)__builtin_msa_fill_w(0x7FC00000);
625 v = (Packet4f)__builtin_msa_bsel_v(unord, qnans, (v16u8)v);
626#endif
627 return v[0];
628}
629
630template <>
631EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a) {
632 EIGEN_MSA_DEBUG;
633
634 Packet4i m = pmin(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
635 m = pmin(m, __builtin_msa_shf_w(m, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
636 return m[0];
637}
638
639// max
640template <>
641EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a) {
642 EIGEN_MSA_DEBUG;
643
644 // Swap 64-bit halves of a.
645 Packet4f swapped = (Packet4f)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
646#if !EIGEN_FAST_MATH
647 // Detect presence of NaNs from pairs a[0]-a[2] and a[1]-a[3] as two 32-bit
648 // masks of all zeroes/ones in low 64 bits.
649 v16u8 unord = (v16u8)__builtin_msa_fcun_w(a, swapped);
650 // Combine the two masks into one: 64 ones if no NaNs, otherwise 64 zeroes.
651 unord = (v16u8)__builtin_msa_ceqi_d((v2i64)unord, 0);
652#endif
653 // Continue with max computation.
654 Packet4f v = __builtin_msa_fmax_w(a, swapped);
655 v = __builtin_msa_fmax_w(
656 v, (Packet4f)__builtin_msa_shf_w((Packet4i)v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
657#if !EIGEN_FAST_MATH
658 // Based on the mask select between v and 4 qNaNs.
659 v16u8 qnans = (v16u8)__builtin_msa_fill_w(0x7FC00000);
660 v = (Packet4f)__builtin_msa_bsel_v(unord, qnans, (v16u8)v);
661#endif
662 return v[0];
663}
664
665template <>
666EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a) {
667 EIGEN_MSA_DEBUG;
668
669 Packet4i m = pmax(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
670 m = pmax(m, __builtin_msa_shf_w(m, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
671 return m[0];
672}
673
674inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4f, 4>& value) {
675 os << "[ " << value.packet[0] << "," << std::endl
676 << " " << value.packet[1] << "," << std::endl
677 << " " << value.packet[2] << "," << std::endl
678 << " " << value.packet[3] << " ]";
679 return os;
680}
681
682EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f, 4>& kernel) {
683 EIGEN_MSA_DEBUG;
684
685 v4i32 tmp1, tmp2, tmp3, tmp4;
686
687 tmp1 = __builtin_msa_ilvr_w((v4i32)kernel.packet[1], (v4i32)kernel.packet[0]);
688 tmp2 = __builtin_msa_ilvr_w((v4i32)kernel.packet[3], (v4i32)kernel.packet[2]);
689 tmp3 = __builtin_msa_ilvl_w((v4i32)kernel.packet[1], (v4i32)kernel.packet[0]);
690 tmp4 = __builtin_msa_ilvl_w((v4i32)kernel.packet[3], (v4i32)kernel.packet[2]);
691
692 kernel.packet[0] = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);
693 kernel.packet[1] = (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1);
694 kernel.packet[2] = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3);
695 kernel.packet[3] = (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3);
696}
697
698inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4i, 4>& value) {
699 os << "[ " << value.packet[0] << "," << std::endl
700 << " " << value.packet[1] << "," << std::endl
701 << " " << value.packet[2] << "," << std::endl
702 << " " << value.packet[3] << " ]";
703 return os;
704}
705
706EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4i, 4>& kernel) {
707 EIGEN_MSA_DEBUG;
708
709 v4i32 tmp1, tmp2, tmp3, tmp4;
710
711 tmp1 = __builtin_msa_ilvr_w(kernel.packet[1], kernel.packet[0]);
712 tmp2 = __builtin_msa_ilvr_w(kernel.packet[3], kernel.packet[2]);
713 tmp3 = __builtin_msa_ilvl_w(kernel.packet[1], kernel.packet[0]);
714 tmp4 = __builtin_msa_ilvl_w(kernel.packet[3], kernel.packet[2]);
715
716 kernel.packet[0] = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);
717 kernel.packet[1] = (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1);
718 kernel.packet[2] = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3);
719 kernel.packet[3] = (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3);
720}
721
722template <>
723EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f& a) {
724 EIGEN_MSA_DEBUG;
725
726 return __builtin_msa_fsqrt_w(a);
727}
728
729template <>
730EIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f& a) {
731 EIGEN_MSA_DEBUG;
732
733#if EIGEN_FAST_MATH
734 return __builtin_msa_frsqrt_w(a);
735#else
736 Packet4f ones = __builtin_msa_ffint_s_w(__builtin_msa_ldi_w(1));
737 return pdiv(ones, psqrt(a));
738#endif
739}
740
741template <>
742EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) {
743 Packet4f v = a;
744 int32_t old_mode, new_mode;
745 asm volatile(
746 "cfcmsa %[old_mode], $1\n"
747 "ori %[new_mode], %[old_mode], 3\n" // 3 = round towards -INFINITY.
748 "ctcmsa $1, %[new_mode]\n"
749 "frint.w %w[v], %w[v]\n"
750 "ctcmsa $1, %[old_mode]\n"
751 : // outputs
752 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
753 [v] "+f"(v)
754 : // inputs
755 : // clobbers
756 );
757 return v;
758}
759
760template <>
761EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) {
762 Packet4f v = a;
763 int32_t old_mode, new_mode;
764 asm volatile(
765 "cfcmsa %[old_mode], $1\n"
766 "ori %[new_mode], %[old_mode], 3\n"
767 "xori %[new_mode], %[new_mode], 1\n" // 2 = round towards +INFINITY.
768 "ctcmsa $1, %[new_mode]\n"
769 "frint.w %w[v], %w[v]\n"
770 "ctcmsa $1, %[old_mode]\n"
771 : // outputs
772 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
773 [v] "+f"(v)
774 : // inputs
775 : // clobbers
776 );
777 return v;
778}
779
780template <>
781EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) {
782 Packet4f v = a;
783 int32_t old_mode, new_mode;
784 asm volatile(
785 "cfcmsa %[old_mode], $1\n"
786 "ori %[new_mode], %[old_mode], 3\n"
787 "xori %[new_mode], %[new_mode], 3\n" // 0 = round to nearest, ties to even.
788 "ctcmsa $1, %[new_mode]\n"
789 "frint.w %w[v], %w[v]\n"
790 "ctcmsa $1, %[old_mode]\n"
791 : // outputs
792 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
793 [v] "+f"(v)
794 : // inputs
795 : // clobbers
796 );
797 return v;
798}
799
800template <>
801EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket,
802 const Packet4f& elsePacket) {
803 Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2],
804 ifPacket.select[3] };
805 Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0);
806 return (Packet4f)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
807}
808
809template <>
810EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket,
811 const Packet4i& elsePacket) {
812 Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2],
813 ifPacket.select[3] };
814 Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0);
815 return (Packet4i)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
816}
817
818//---------- double ----------
819
820typedef v2f64 Packet2d;
821typedef v2i64 Packet2l;
822typedef v2u64 Packet2ul;
823
824#define _EIGEN_DECLARE_CONST_Packet2d(NAME, X) const Packet2d p2d_##NAME = { X, X }
825#define _EIGEN_DECLARE_CONST_Packet2l(NAME, X) const Packet2l p2l_##NAME = { X, X }
826#define _EIGEN_DECLARE_CONST_Packet2ul(NAME, X) const Packet2ul p2ul_##NAME = { X, X }
827
828inline std::ostream& operator<<(std::ostream& os, const Packet2d& value) {
829 os << "[ " << value[0] << ", " << value[1] << " ]";
830 return os;
831}
832
833inline std::ostream& operator<<(std::ostream& os, const Packet2l& value) {
834 os << "[ " << value[0] << ", " << value[1] << " ]";
835 return os;
836}
837
838inline std::ostream& operator<<(std::ostream& os, const Packet2ul& value) {
839 os << "[ " << value[0] << ", " << value[1] << " ]";
840 return os;
841}
842
843template <>
844struct packet_traits<double> : default_packet_traits {
845 typedef Packet2d type;
846 typedef Packet2d half;
847 enum {
848 Vectorizable = 1,
849 AlignedOnScalar = 1,
850 size = 2,
851 HasHalfPacket = 0,
852 // FIXME check the Has*
853 HasDiv = 1,
854 HasExp = 1,
855 HasSqrt = 1,
856 HasRsqrt = 1,
857 HasRound = 1,
858 HasFloor = 1,
859 HasCeil = 1,
860 HasBlend = 1
861 };
862};
863
864template <>
865struct unpacket_traits<Packet2d> {
866 typedef double type;
867 enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };
868 typedef Packet2d half;
869};
870
871template <>
872EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
873 EIGEN_MSA_DEBUG;
874
875 Packet2d value = { from, from };
876 return value;
877}
878
879template <>
880EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) {
881 EIGEN_MSA_DEBUG;
882
883 return __builtin_msa_fadd_d(a, b);
884}
885
886template <>
887EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) {
888 EIGEN_MSA_DEBUG;
889
890 static const Packet2d countdown = { 0.0, 1.0 };
891 return padd(pset1<Packet2d>(a), countdown);
892}
893
894template <>
895EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) {
896 EIGEN_MSA_DEBUG;
897
898 return __builtin_msa_fsub_d(a, b);
899}
900
901template <>
902EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) {
903 EIGEN_MSA_DEBUG;
904
905 return (Packet2d)__builtin_msa_bnegi_d((v2u64)a, 63);
906}
907
908template <>
909EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) {
910 EIGEN_MSA_DEBUG;
911
912 return a;
913}
914
915template <>
916EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) {
917 EIGEN_MSA_DEBUG;
918
919 return __builtin_msa_fmul_d(a, b);
920}
921
922template <>
923EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) {
924 EIGEN_MSA_DEBUG;
925
926 return __builtin_msa_fdiv_d(a, b);
927}
928
929template <>
930EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) {
931 EIGEN_MSA_DEBUG;
932
933 return __builtin_msa_fmadd_d(c, a, b);
934}
935
936// Logical Operations are not supported for float, so we have to reinterpret casts using MSA
937// intrinsics
938template <>
939EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) {
940 EIGEN_MSA_DEBUG;
941
942 return (Packet2d)__builtin_msa_and_v((v16u8)a, (v16u8)b);
943}
944
945template <>
946EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) {
947 EIGEN_MSA_DEBUG;
948
949 return (Packet2d)__builtin_msa_or_v((v16u8)a, (v16u8)b);
950}
951
952template <>
953EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) {
954 EIGEN_MSA_DEBUG;
955
956 return (Packet2d)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
957}
958
959template <>
960EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) {
961 EIGEN_MSA_DEBUG;
962
963 return pand(a, (Packet2d)__builtin_msa_xori_b((v16u8)b, 255));
964}
965
966template <>
967EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) {
968 EIGEN_MSA_DEBUG;
969
970 EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__builtin_msa_ld_d(const_cast<double*>(from), 0);
971}
972
973template <>
974EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) {
975 EIGEN_MSA_DEBUG;
976
977#if EIGEN_FAST_MATH
978 // This prefers numbers to NaNs.
979 return __builtin_msa_fmin_d(a, b);
980#else
981 // This prefers NaNs to numbers.
982 v2i64 aNaN = __builtin_msa_fcun_d(a, a);
983 v2i64 aMinOrNaN = por(__builtin_msa_fclt_d(a, b), aNaN);
984 return (Packet2d)__builtin_msa_bsel_v((v16u8)aMinOrNaN, (v16u8)b, (v16u8)a);
985#endif
986}
987
988template <>
989EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) {
990 EIGEN_MSA_DEBUG;
991
992#if EIGEN_FAST_MATH
993 // This prefers numbers to NaNs.
994 return __builtin_msa_fmax_d(a, b);
995#else
996 // This prefers NaNs to numbers.
997 v2i64 aNaN = __builtin_msa_fcun_d(a, a);
998 v2i64 aMaxOrNaN = por(__builtin_msa_fclt_d(b, a), aNaN);
999 return (Packet2d)__builtin_msa_bsel_v((v16u8)aMaxOrNaN, (v16u8)b, (v16u8)a);
1000#endif
1001}
1002
1003template <>
1004EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) {
1005 EIGEN_MSA_DEBUG;
1006
1007 EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__builtin_msa_ld_d(const_cast<double*>(from), 0);
1008}
1009
1010template <>
1011EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from) {
1012 EIGEN_MSA_DEBUG;
1013
1014 Packet2d value = { *from, *from };
1015 return value;
1016}
1017
1018template <>
1019EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) {
1020 EIGEN_MSA_DEBUG;
1021
1022 EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_d((v2i64)from, to, 0);
1023}
1024
1025template <>
1026EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
1027 EIGEN_MSA_DEBUG;
1028
1029 EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_d((v2i64)from, to, 0);
1030}
1031
1032template <>
1033EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) {
1034 EIGEN_MSA_DEBUG;
1035
1036 Packet2d value;
1037 value[0] = *from;
1038 from += stride;
1039 value[1] = *from;
1040 return value;
1041}
1042
1043template <>
1044EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from,
1045 Index stride) {
1046 EIGEN_MSA_DEBUG;
1047
1048 *to = from[0];
1049 to += stride;
1050 *to = from[1];
1051}
1052
1053template <>
1054EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) {
1055 EIGEN_MSA_DEBUG;
1056
1057 __builtin_prefetch(addr);
1058}
1059
1060template <>
1061EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
1062 EIGEN_MSA_DEBUG;
1063
1064 return a[0];
1065}
1066
1067template <>
1068EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) {
1069 EIGEN_MSA_DEBUG;
1070
1071 return (Packet2d)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1072}
1073
1074template <>
1075EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) {
1076 EIGEN_MSA_DEBUG;
1077
1078 return (Packet2d)__builtin_msa_bclri_d((v2u64)a, 63);
1079}
1080
1081template <>
1082EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) {
1083 EIGEN_MSA_DEBUG;
1084
1085 Packet2d s = padd(a, preverse(a));
1086 return s[0];
1087}
1088
1089// Other reduction functions:
1090// mul
1091template <>
1092EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) {
1093 EIGEN_MSA_DEBUG;
1094
1095 Packet2d p = pmul(a, preverse(a));
1096 return p[0];
1097}
1098
1099// min
1100template <>
1101EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) {
1102 EIGEN_MSA_DEBUG;
1103
1104#if EIGEN_FAST_MATH
1105 Packet2d swapped = (Packet2d)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1106 Packet2d v = __builtin_msa_fmin_d(a, swapped);
1107 return v[0];
1108#else
1109 double a0 = a[0], a1 = a[1];
1110 return ((numext::isnan)(a0) || a0 < a1) ? a0 : a1;
1111#endif
1112}
1113
1114// max
1115template <>
1116EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) {
1117 EIGEN_MSA_DEBUG;
1118
1119#if EIGEN_FAST_MATH
1120 Packet2d swapped = (Packet2d)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1121 Packet2d v = __builtin_msa_fmax_d(a, swapped);
1122 return v[0];
1123#else
1124 double a0 = a[0], a1 = a[1];
1125 return ((numext::isnan)(a0) || a0 > a1) ? a0 : a1;
1126#endif
1127}
1128
1129template <>
1130EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& a) {
1131 EIGEN_MSA_DEBUG;
1132
1133 return __builtin_msa_fsqrt_d(a);
1134}
1135
1136template <>
1137EIGEN_STRONG_INLINE Packet2d prsqrt(const Packet2d& a) {
1138 EIGEN_MSA_DEBUG;
1139
1140#if EIGEN_FAST_MATH
1141 return __builtin_msa_frsqrt_d(a);
1142#else
1143 Packet2d ones = __builtin_msa_ffint_s_d(__builtin_msa_ldi_d(1));
1144 return pdiv(ones, psqrt(a));
1145#endif
1146}
1147
1148inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet2d, 2>& value) {
1149 os << "[ " << value.packet[0] << "," << std::endl << " " << value.packet[1] << " ]";
1150 return os;
1151}
1152
1153EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2d, 2>& kernel) {
1154 EIGEN_MSA_DEBUG;
1155
1156 Packet2d trn1 = (Packet2d)__builtin_msa_ilvev_d((v2i64)kernel.packet[1], (v2i64)kernel.packet[0]);
1157 Packet2d trn2 = (Packet2d)__builtin_msa_ilvod_d((v2i64)kernel.packet[1], (v2i64)kernel.packet[0]);
1158 kernel.packet[0] = trn1;
1159 kernel.packet[1] = trn2;
1160}
1161
1162template <>
1163EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) {
1164 Packet2d v = a;
1165 int32_t old_mode, new_mode;
1166 asm volatile(
1167 "cfcmsa %[old_mode], $1\n"
1168 "ori %[new_mode], %[old_mode], 3\n" // 3 = round towards -INFINITY.
1169 "ctcmsa $1, %[new_mode]\n"
1170 "frint.d %w[v], %w[v]\n"
1171 "ctcmsa $1, %[old_mode]\n"
1172 : // outputs
1173 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1174 [v] "+f"(v)
1175 : // inputs
1176 : // clobbers
1177 );
1178 return v;
1179}
1180
1181template <>
1182EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) {
1183 Packet2d v = a;
1184 int32_t old_mode, new_mode;
1185 asm volatile(
1186 "cfcmsa %[old_mode], $1\n"
1187 "ori %[new_mode], %[old_mode], 3\n"
1188 "xori %[new_mode], %[new_mode], 1\n" // 2 = round towards +INFINITY.
1189 "ctcmsa $1, %[new_mode]\n"
1190 "frint.d %w[v], %w[v]\n"
1191 "ctcmsa $1, %[old_mode]\n"
1192 : // outputs
1193 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1194 [v] "+f"(v)
1195 : // inputs
1196 : // clobbers
1197 );
1198 return v;
1199}
1200
1201template <>
1202EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) {
1203 Packet2d v = a;
1204 int32_t old_mode, new_mode;
1205 asm volatile(
1206 "cfcmsa %[old_mode], $1\n"
1207 "ori %[new_mode], %[old_mode], 3\n"
1208 "xori %[new_mode], %[new_mode], 3\n" // 0 = round to nearest, ties to even.
1209 "ctcmsa $1, %[new_mode]\n"
1210 "frint.d %w[v], %w[v]\n"
1211 "ctcmsa $1, %[old_mode]\n"
1212 : // outputs
1213 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1214 [v] "+f"(v)
1215 : // inputs
1216 : // clobbers
1217 );
1218 return v;
1219}
1220
1221template <>
1222EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket,
1223 const Packet2d& elsePacket) {
1224 Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
1225 Packet2l mask = __builtin_msa_ceqi_d((Packet2l)select, 0);
1226 return (Packet2d)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
1227}
1228
1229} // end namespace internal
1230
1231} // end namespace Eigen
1232
1233#endif // EIGEN_PACKET_MATH_MSA_H
@ Aligned16
Definition: Constants.h:235
Namespace containing all symbols from the Eigen library.
Definition: Core:141
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74