Please, help us to better know about our user community by answering the following short survey: https://forms.gle/wpyrxWi18ox9Z5ae9
Eigen  3.4.0
 
Loading...
Searching...
No Matches
Redux.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_REDUX_H
12#define EIGEN_REDUX_H
13
14namespace Eigen {
15
16namespace internal {
17
18// TODO
19// * implement other kind of vectorization
20// * factorize code
21
22/***************************************************************************
23* Part 1 : the logic deciding a strategy for vectorization and unrolling
24***************************************************************************/
25
26template<typename Func, typename Evaluator>
27struct redux_traits
28{
29public:
30 typedef typename find_best_packet<typename Evaluator::Scalar,Evaluator::SizeAtCompileTime>::type PacketType;
31 enum {
32 PacketSize = unpacket_traits<PacketType>::size,
33 InnerMaxSize = int(Evaluator::IsRowMajor)
34 ? Evaluator::MaxColsAtCompileTime
35 : Evaluator::MaxRowsAtCompileTime,
36 OuterMaxSize = int(Evaluator::IsRowMajor)
37 ? Evaluator::MaxRowsAtCompileTime
38 : Evaluator::MaxColsAtCompileTime,
39 SliceVectorizedWork = int(InnerMaxSize)==Dynamic ? Dynamic
40 : int(OuterMaxSize)==Dynamic ? (int(InnerMaxSize)>=int(PacketSize) ? Dynamic : 0)
41 : (int(InnerMaxSize)/int(PacketSize)) * int(OuterMaxSize)
42 };
43
44 enum {
45 MightVectorize = (int(Evaluator::Flags)&ActualPacketAccessBit)
46 && (functor_traits<Func>::PacketAccess),
47 MayLinearVectorize = bool(MightVectorize) && (int(Evaluator::Flags)&LinearAccessBit),
48 MaySliceVectorize = bool(MightVectorize) && (int(SliceVectorizedWork)==Dynamic || int(SliceVectorizedWork)>=3)
49 };
50
51public:
52 enum {
53 Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
54 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
55 : int(DefaultTraversal)
56 };
57
58public:
59 enum {
60 Cost = Evaluator::SizeAtCompileTime == Dynamic ? HugeCost
61 : int(Evaluator::SizeAtCompileTime) * int(Evaluator::CoeffReadCost) + (Evaluator::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
62 UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
63 };
64
65public:
66 enum {
67 Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
68 };
69
70#ifdef EIGEN_DEBUG_ASSIGN
71 static void debug()
72 {
73 std::cerr << "Xpr: " << typeid(typename Evaluator::XprType).name() << std::endl;
74 std::cerr.setf(std::ios::hex, std::ios::basefield);
75 EIGEN_DEBUG_VAR(Evaluator::Flags)
76 std::cerr.unsetf(std::ios::hex);
77 EIGEN_DEBUG_VAR(InnerMaxSize)
78 EIGEN_DEBUG_VAR(OuterMaxSize)
79 EIGEN_DEBUG_VAR(SliceVectorizedWork)
80 EIGEN_DEBUG_VAR(PacketSize)
81 EIGEN_DEBUG_VAR(MightVectorize)
82 EIGEN_DEBUG_VAR(MayLinearVectorize)
83 EIGEN_DEBUG_VAR(MaySliceVectorize)
84 std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
85 EIGEN_DEBUG_VAR(UnrollingLimit)
86 std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
87 std::cerr << std::endl;
88 }
89#endif
90};
91
92/***************************************************************************
93* Part 2 : unrollers
94***************************************************************************/
95
96/*** no vectorization ***/
97
98template<typename Func, typename Evaluator, int Start, int Length>
99struct redux_novec_unroller
100{
101 enum {
102 HalfLength = Length/2
103 };
104
105 typedef typename Evaluator::Scalar Scalar;
106
107 EIGEN_DEVICE_FUNC
108 static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func& func)
109 {
110 return func(redux_novec_unroller<Func, Evaluator, Start, HalfLength>::run(eval,func),
111 redux_novec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::run(eval,func));
112 }
113};
114
115template<typename Func, typename Evaluator, int Start>
116struct redux_novec_unroller<Func, Evaluator, Start, 1>
117{
118 enum {
119 outer = Start / Evaluator::InnerSizeAtCompileTime,
120 inner = Start % Evaluator::InnerSizeAtCompileTime
121 };
122
123 typedef typename Evaluator::Scalar Scalar;
124
125 EIGEN_DEVICE_FUNC
126 static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func&)
127 {
128 return eval.coeffByOuterInner(outer, inner);
129 }
130};
131
132// This is actually dead code and will never be called. It is required
133// to prevent false warnings regarding failed inlining though
134// for 0 length run() will never be called at all.
135template<typename Func, typename Evaluator, int Start>
136struct redux_novec_unroller<Func, Evaluator, Start, 0>
137{
138 typedef typename Evaluator::Scalar Scalar;
139 EIGEN_DEVICE_FUNC
140 static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
141};
142
143/*** vectorization ***/
144
145template<typename Func, typename Evaluator, int Start, int Length>
146struct redux_vec_unroller
147{
148 template<typename PacketType>
149 EIGEN_DEVICE_FUNC
150 static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
151 {
152 enum {
153 PacketSize = unpacket_traits<PacketType>::size,
154 HalfLength = Length/2
155 };
156
157 return func.packetOp(
158 redux_vec_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func),
159 redux_vec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::template run<PacketType>(eval,func) );
160 }
161};
162
163template<typename Func, typename Evaluator, int Start>
164struct redux_vec_unroller<Func, Evaluator, Start, 1>
165{
166 template<typename PacketType>
167 EIGEN_DEVICE_FUNC
168 static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
169 {
170 enum {
171 PacketSize = unpacket_traits<PacketType>::size,
172 index = Start * PacketSize,
173 outer = index / int(Evaluator::InnerSizeAtCompileTime),
174 inner = index % int(Evaluator::InnerSizeAtCompileTime),
175 alignment = Evaluator::Alignment
176 };
177 return eval.template packetByOuterInner<alignment,PacketType>(outer, inner);
178 }
179};
180
181/***************************************************************************
182* Part 3 : implementation of all cases
183***************************************************************************/
184
185template<typename Func, typename Evaluator,
186 int Traversal = redux_traits<Func, Evaluator>::Traversal,
187 int Unrolling = redux_traits<Func, Evaluator>::Unrolling
188>
189struct redux_impl;
190
191template<typename Func, typename Evaluator>
192struct redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>
193{
194 typedef typename Evaluator::Scalar Scalar;
195
196 template<typename XprType>
197 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
198 Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
199 {
200 eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
201 Scalar res;
202 res = eval.coeffByOuterInner(0, 0);
203 for(Index i = 1; i < xpr.innerSize(); ++i)
204 res = func(res, eval.coeffByOuterInner(0, i));
205 for(Index i = 1; i < xpr.outerSize(); ++i)
206 for(Index j = 0; j < xpr.innerSize(); ++j)
207 res = func(res, eval.coeffByOuterInner(i, j));
208 return res;
209 }
210};
211
212template<typename Func, typename Evaluator>
213struct redux_impl<Func,Evaluator, DefaultTraversal, CompleteUnrolling>
214 : redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime>
215{
216 typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
217 typedef typename Evaluator::Scalar Scalar;
218 template<typename XprType>
219 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
220 Scalar run(const Evaluator &eval, const Func& func, const XprType& /*xpr*/)
221 {
222 return Base::run(eval,func);
223 }
224};
225
226template<typename Func, typename Evaluator>
227struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
228{
229 typedef typename Evaluator::Scalar Scalar;
230 typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
231
232 template<typename XprType>
233 static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
234 {
235 const Index size = xpr.size();
236
237 const Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
238 const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
239 enum {
240 alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
241 alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Evaluator::Alignment)
242 };
243 const Index alignedStart = internal::first_default_aligned(xpr);
244 const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
245 const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
246 const Index alignedEnd2 = alignedStart + alignedSize2;
247 const Index alignedEnd = alignedStart + alignedSize;
248 Scalar res;
249 if(alignedSize)
250 {
251 PacketScalar packet_res0 = eval.template packet<alignment,PacketScalar>(alignedStart);
252 if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
253 {
254 PacketScalar packet_res1 = eval.template packet<alignment,PacketScalar>(alignedStart+packetSize);
255 for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
256 {
257 packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(index));
258 packet_res1 = func.packetOp(packet_res1, eval.template packet<alignment,PacketScalar>(index+packetSize));
259 }
260
261 packet_res0 = func.packetOp(packet_res0,packet_res1);
262 if(alignedEnd>alignedEnd2)
263 packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(alignedEnd2));
264 }
265 res = func.predux(packet_res0);
266
267 for(Index index = 0; index < alignedStart; ++index)
268 res = func(res,eval.coeff(index));
269
270 for(Index index = alignedEnd; index < size; ++index)
271 res = func(res,eval.coeff(index));
272 }
273 else // too small to vectorize anything.
274 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
275 {
276 res = eval.coeff(0);
277 for(Index index = 1; index < size; ++index)
278 res = func(res,eval.coeff(index));
279 }
280
281 return res;
282 }
283};
284
285// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
286template<typename Func, typename Evaluator, int Unrolling>
287struct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling>
288{
289 typedef typename Evaluator::Scalar Scalar;
290 typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
291
292 template<typename XprType>
293 EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
294 {
295 eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
296 const Index innerSize = xpr.innerSize();
297 const Index outerSize = xpr.outerSize();
298 enum {
299 packetSize = redux_traits<Func, Evaluator>::PacketSize
300 };
301 const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
302 Scalar res;
303 if(packetedInnerSize)
304 {
305 PacketType packet_res = eval.template packet<Unaligned,PacketType>(0,0);
306 for(Index j=0; j<outerSize; ++j)
307 for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
308 packet_res = func.packetOp(packet_res, eval.template packetByOuterInner<Unaligned,PacketType>(j,i));
309
310 res = func.predux(packet_res);
311 for(Index j=0; j<outerSize; ++j)
312 for(Index i=packetedInnerSize; i<innerSize; ++i)
313 res = func(res, eval.coeffByOuterInner(j,i));
314 }
315 else // too small to vectorize anything.
316 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
317 {
318 res = redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>::run(eval, func, xpr);
319 }
320
321 return res;
322 }
323};
324
325template<typename Func, typename Evaluator>
326struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>
327{
328 typedef typename Evaluator::Scalar Scalar;
329
330 typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
331 enum {
332 PacketSize = redux_traits<Func, Evaluator>::PacketSize,
333 Size = Evaluator::SizeAtCompileTime,
334 VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize)
335 };
336
337 template<typename XprType>
338 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
339 Scalar run(const Evaluator &eval, const Func& func, const XprType &xpr)
340 {
341 EIGEN_ONLY_USED_FOR_DEBUG(xpr)
342 eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
343 if (VectorizedSize > 0) {
344 Scalar res = func.predux(redux_vec_unroller<Func, Evaluator, 0, Size / PacketSize>::template run<PacketType>(eval,func));
345 if (VectorizedSize != Size)
346 res = func(res,redux_novec_unroller<Func, Evaluator, VectorizedSize, Size-VectorizedSize>::run(eval,func));
347 return res;
348 }
349 else {
350 return redux_novec_unroller<Func, Evaluator, 0, Size>::run(eval,func);
351 }
352 }
353};
354
355// evaluator adaptor
356template<typename _XprType>
357class redux_evaluator : public internal::evaluator<_XprType>
358{
359 typedef internal::evaluator<_XprType> Base;
360public:
361 typedef _XprType XprType;
362 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
363 explicit redux_evaluator(const XprType &xpr) : Base(xpr) {}
364
365 typedef typename XprType::Scalar Scalar;
366 typedef typename XprType::CoeffReturnType CoeffReturnType;
367 typedef typename XprType::PacketScalar PacketScalar;
368
369 enum {
370 MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
371 MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
372 // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
373 Flags = Base::Flags & ~DirectAccessBit,
374 IsRowMajor = XprType::IsRowMajor,
375 SizeAtCompileTime = XprType::SizeAtCompileTime,
376 InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime
377 };
378
379 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
380 CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
381 { return Base::coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
382
383 template<int LoadMode, typename PacketType>
384 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
385 PacketType packetByOuterInner(Index outer, Index inner) const
386 { return Base::template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
387
388};
389
390} // end namespace internal
391
392/***************************************************************************
393* Part 4 : public API
394***************************************************************************/
395
396
406template<typename Derived>
407template<typename Func>
408EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
409DenseBase<Derived>::redux(const Func& func) const
410{
411 eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
412
413 typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
414 ThisEvaluator thisEval(derived());
415
416 // The initial expression is passed to the reducer as an additional argument instead of
417 // passing it as a member of redux_evaluator to help
418 return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func, derived());
419}
420
428template<typename Derived>
429template<int NaNPropagation>
430EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
432{
433 return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar, NaNPropagation>());
434}
435
443template<typename Derived>
444template<int NaNPropagation>
445EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
448 return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar, NaNPropagation>());
449}
457template<typename Derived>
458EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
460{
461 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
462 return Scalar(0);
463 return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
464}
465
470template<typename Derived>
471EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
473{
474#ifdef __INTEL_COMPILER
475 #pragma warning push
476 #pragma warning ( disable : 2259 )
477#endif
478 return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
479#ifdef __INTEL_COMPILER
480 #pragma warning pop
481#endif
482}
483
491template<typename Derived>
492EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
494{
495 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
496 return Scalar(1);
497 return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
498}
499
506template<typename Derived>
507EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
509{
510 return derived().diagonal().sum();
511}
512
513} // end namespace Eigen
514
515#endif // EIGEN_REDUX_H
Base class for all dense matrices, vectors, and arrays.
Definition: DenseBase.h:47
internal::traits< Derived >::Scalar minCoeff() const
Definition: Redux.h:431
Scalar mean() const
Definition: Redux.h:472
internal::traits< Derived >::Scalar Scalar
Definition: DenseBase.h:66
internal::traits< Derived >::Scalar maxCoeff() const
Definition: Redux.h:446
Scalar sum() const
Definition: Redux.h:459
Scalar prod() const
Definition: Redux.h:493
Scalar trace() const
Definition: Redux.h:508
const unsigned int ActualPacketAccessBit
Definition: Constants.h:105
const unsigned int LinearAccessBit
Definition: Constants.h:130
Namespace containing all symbols from the Eigen library.
Definition: Core:141
const int HugeCost
Definition: Constants.h:44
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
const int Dynamic
Definition: Constants.h:22