10#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
11#define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
27template<
typename Derived,
typename Device>
30 typedef typename Derived::Index Index;
31 typedef typename Derived::Scalar Scalar;
32 typedef typename Derived::Scalar CoeffReturnType;
33 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
34 typedef typename Derived::Dimensions Dimensions;
35 typedef Derived XprType;
36 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
37 typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType;
38 typedef StorageMemory<Scalar, Device> Storage;
39 typedef typename Storage::Type EvaluatorPointerType;
42 static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
43 internal::traits<Derived>::NumDimensions : 0;
46 IsAligned = Derived::IsAligned,
47 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
48 BlockAccess = internal::is_arithmetic<
typename internal::remove_const<Scalar>::type>::value,
49 PreferBlockAccess =
false,
50 Layout = Derived::Layout,
51 CoordAccess = NumCoords > 0,
55 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
58 typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
59 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
61 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
66 EIGEN_STRONG_INLINE
TensorEvaluator(
const Derived& m,
const Device& device)
67 : m_data(device.get((
const_cast<TensorPointerType
>(m.data())))),
68 m_dims(m.dimensions()),
73 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dims; }
75 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
76 if (!
NumTraits<
typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) {
77 m_device.memcpy((
void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() *
sizeof(Scalar));
83#ifdef EIGEN_USE_THREADS
84 template <
typename EvalSubExprsCallback>
85 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
86 EvaluatorPointerType dest, EvalSubExprsCallback done) {
88 done(evalSubExprsIfNeeded(dest));
92 EIGEN_STRONG_INLINE
void cleanup() {}
94 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
95 eigen_assert(m_data != NULL);
99 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) {
100 eigen_assert(m_data != NULL);
101 return m_data[index];
104 template<
int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
105 PacketReturnType packet(Index index)
const
107 return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
115 template <
typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
116 typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
117 partialPacket(Index index,
typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask)
const
119 return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
122 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
123 void writePacket(Index index,
const PacketReturnType& x)
125 return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x);
128 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
const array<DenseIndex, NumCoords>& coords)
const {
129 eigen_assert(m_data != NULL);
130 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
131 return m_data[m_dims.IndexOfColMajor(coords)];
133 return m_data[m_dims.IndexOfRowMajor(coords)];
137 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType&
138 coeffRef(
const array<DenseIndex, NumCoords>& coords) {
139 eigen_assert(m_data != NULL);
140 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
141 return m_data[m_dims.IndexOfColMajor(coords)];
143 return m_data[m_dims.IndexOfRowMajor(coords)];
147 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
148 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
149 PacketType<CoeffReturnType, Device>::size);
152 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
153 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
154 return internal::TensorBlockResourceRequirements::any();
157 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
158 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
159 bool =
false)
const {
160 assert(m_data != NULL);
161 return TensorBlock::materialize(m_data, m_dims, desc, scratch);
164 template<
typename TensorBlock>
165 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writeBlock(
166 const TensorBlockDesc& desc,
const TensorBlock& block) {
167 assert(m_data != NULL);
170 typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr,
174 TensorBlockAssign::Run(
175 TensorBlockAssign::target(desc.dimensions(),
176 internal::strides<Layout>(m_dims), m_data,
181 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return m_data; }
185 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
190 EvaluatorPointerType m_data;
192 const Device EIGEN_DEVICE_REF m_device;
196template <
typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
197T loadConstant(
const T* address) {
201#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
202template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
203float loadConstant(
const float* address) {
204 return __ldg(address);
206template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
207double loadConstant(
const double* address) {
208 return __ldg(address);
210template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
211Eigen::half loadConstant(
const Eigen::half* address) {
212 return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
217template <cl::sycl::access::mode AcMd,
typename T>
218T &loadConstant(
const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address) {
226template<
typename Derived,
typename Device>
227struct TensorEvaluator<const Derived, Device>
229 typedef typename Derived::Index Index;
230 typedef typename Derived::Scalar Scalar;
231 typedef typename Derived::Scalar CoeffReturnType;
232 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
233 typedef typename Derived::Dimensions Dimensions;
234 typedef const Derived XprType;
235 typedef typename internal::traits<Derived>::template MakePointer<const Scalar>::Type TensorPointerType;
236 typedef StorageMemory<const Scalar, Device> Storage;
237 typedef typename Storage::Type EvaluatorPointerType;
239 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
242 static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
243 internal::traits<Derived>::NumDimensions : 0;
244 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
247 IsAligned = Derived::IsAligned,
248 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
249 BlockAccess = internal::is_arithmetic<ScalarNoConst>::value,
250 PreferBlockAccess =
false,
251 Layout = Derived::Layout,
252 CoordAccess = NumCoords > 0,
257 typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
258 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
260 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
265 EIGEN_STRONG_INLINE TensorEvaluator(
const Derived& m,
const Device& device)
266 : m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device)
269 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dims; }
271 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
272 if (!NumTraits<
typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
273 m_device.memcpy((
void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() *
sizeof(Scalar));
279#ifdef EIGEN_USE_THREADS
280 template <
typename EvalSubExprsCallback>
281 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
282 EvaluatorPointerType dest, EvalSubExprsCallback done) {
284 done(evalSubExprsIfNeeded(dest));
288 EIGEN_STRONG_INLINE
void cleanup() { }
290 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
291 eigen_assert(m_data != NULL);
292 return loadConstant(m_data+index);
295 template<
int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
296 PacketReturnType packet(Index index)
const
298 return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
306 template <
typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
307 typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
308 partialPacket(Index index,
typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask)
const
310 return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
313 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
const array<DenseIndex, NumCoords>& coords)
const {
314 eigen_assert(m_data != NULL);
315 const Index index = (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) ? m_dims.IndexOfColMajor(coords)
316 : m_dims.IndexOfRowMajor(coords);
317 return loadConstant(m_data+index);
320 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
321 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
322 PacketType<CoeffReturnType, Device>::size);
325 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
326 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
327 return internal::TensorBlockResourceRequirements::any();
330 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
331 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
332 bool =
false)
const {
333 assert(m_data != NULL);
334 return TensorBlock::materialize(m_data, m_dims, desc, scratch);
337 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return m_data; }
340 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
345 EvaluatorPointerType m_data;
347 const Device EIGEN_DEVICE_REF m_device;
355template<
typename NullaryOp,
typename ArgType,
typename Device>
356struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
358 typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
360 TensorEvaluator(
const XprType& op,
const Device& device)
361 : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
364 typedef typename XprType::Index
Index;
365 typedef typename XprType::Scalar Scalar;
366 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
367 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
368 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
369 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
370 typedef StorageMemory<CoeffReturnType, Device> Storage;
371 typedef typename Storage::Type EvaluatorPointerType;
375 PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess
376 #ifdef EIGEN_USE_SYCL
377 && (PacketType<CoeffReturnType, Device>::size >1)
381 PreferBlockAccess =
false,
382 Layout = TensorEvaluator<ArgType, Device>::Layout,
388 typedef internal::TensorBlockNotImplemented TensorBlock;
391 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const {
return m_argImpl.dimensions(); }
393 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
return true; }
395#ifdef EIGEN_USE_THREADS
396 template <
typename EvalSubExprsCallback>
397 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
398 EvaluatorPointerType, EvalSubExprsCallback done) {
403 EIGEN_STRONG_INLINE
void cleanup() { }
405 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const
407 return m_wrapper(m_functor, index);
410 template<
int LoadMode>
411 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
413 return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index);
416 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
417 costPerCoeff(
bool vectorized)
const {
418 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
419 PacketType<CoeffReturnType, Device>::size);
422 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
426 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
432 const NullaryOp m_functor;
433 TensorEvaluator<ArgType, Device> m_argImpl;
434 const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
441template<
typename UnaryOp,
typename ArgType,
typename Device>
442struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
444 typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
447 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
448 PacketAccess = int(TensorEvaluator<ArgType, Device>::PacketAccess) &
449 int(internal::functor_traits<UnaryOp>::PacketAccess),
450 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
451 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
452 Layout = TensorEvaluator<ArgType, Device>::Layout,
457 TensorEvaluator(
const XprType& op,
const Device& device)
459 m_functor(op.functor()),
460 m_argImpl(op.nestedExpression(), device)
463 typedef typename XprType::Index
Index;
464 typedef typename XprType::Scalar Scalar;
465 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
466 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
467 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
468 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
469 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
470 typedef StorageMemory<CoeffReturnType, Device> Storage;
471 typedef typename Storage::Type EvaluatorPointerType;
472 static const int NumDims = internal::array_size<Dimensions>::value;
475 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
476 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
478 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
481 typedef internal::TensorCwiseUnaryBlock<UnaryOp, ArgTensorBlock>
485 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const {
return m_argImpl.dimensions(); }
487 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
488 m_argImpl.evalSubExprsIfNeeded(NULL);
492#ifdef EIGEN_USE_THREADS
493 template <
typename EvalSubExprsCallback>
494 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
495 EvaluatorPointerType, EvalSubExprsCallback done) {
496 m_argImpl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
500 EIGEN_STRONG_INLINE
void cleanup() {
504 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const
506 return m_functor(m_argImpl.coeff(index));
509 template<
int LoadMode>
510 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
512 return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
515 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
516 const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
517 return m_argImpl.costPerCoeff(vectorized) +
518 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
521 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
522 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
523 static const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
524 return m_argImpl.getResourceRequirements().addCostPerCoeff(
525 {0, 0, functor_cost / PacketSize});
528 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
529 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
530 bool =
false)
const {
531 return TensorBlock(m_argImpl.block(desc, scratch), m_functor);
534 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
538 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const{
545 const Device EIGEN_DEVICE_REF m_device;
546 const UnaryOp m_functor;
547 TensorEvaluator<ArgType, Device> m_argImpl;
553template<
typename BinaryOp,
typename LeftArgType,
typename RightArgType,
typename Device>
554struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device>
556 typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
559 IsAligned = int(TensorEvaluator<LeftArgType, Device>::IsAligned) &
560 int(TensorEvaluator<RightArgType, Device>::IsAligned),
561 PacketAccess = int(TensorEvaluator<LeftArgType, Device>::PacketAccess) &
562 int(TensorEvaluator<RightArgType, Device>::PacketAccess) &
563 int(internal::functor_traits<BinaryOp>::PacketAccess),
564 BlockAccess = int(TensorEvaluator<LeftArgType, Device>::BlockAccess) &
565 int(TensorEvaluator<RightArgType, Device>::BlockAccess),
566 PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) |
567 int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess),
568 Layout = TensorEvaluator<LeftArgType, Device>::Layout,
573 TensorEvaluator(
const XprType& op,
const Device& device)
575 m_functor(op.functor()),
576 m_leftImpl(op.lhsExpression(), device),
577 m_rightImpl(op.rhsExpression(), device)
579 EIGEN_STATIC_ASSERT((
static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) ==
static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
580 eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
583 typedef typename XprType::Index
Index;
584 typedef typename XprType::Scalar Scalar;
585 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
586 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
587 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
588 typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
589 typedef StorageMemory<CoeffReturnType, Device> Storage;
590 typedef typename Storage::Type EvaluatorPointerType;
592 static const int NumDims = internal::array_size<
593 typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
596 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
597 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
599 typedef typename TensorEvaluator<const LeftArgType, Device>::TensorBlock
601 typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlock
604 typedef internal::TensorCwiseBinaryBlock<BinaryOp, LeftTensorBlock,
609 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const
612 return m_leftImpl.dimensions();
615 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
616 m_leftImpl.evalSubExprsIfNeeded(NULL);
617 m_rightImpl.evalSubExprsIfNeeded(NULL);
621#ifdef EIGEN_USE_THREADS
622 template <
typename EvalSubExprsCallback>
623 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
624 EvaluatorPointerType, EvalSubExprsCallback done) {
626 m_leftImpl.evalSubExprsIfNeededAsync(
nullptr, [
this, done](
bool) {
627 m_rightImpl.evalSubExprsIfNeededAsync(
nullptr,
628 [done](
bool) { done(
true); });
633 EIGEN_STRONG_INLINE
void cleanup() {
634 m_leftImpl.cleanup();
635 m_rightImpl.cleanup();
638 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const
640 return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
642 template<
int LoadMode>
643 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
645 return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
648 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
649 costPerCoeff(
bool vectorized)
const {
650 const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
651 return m_leftImpl.costPerCoeff(vectorized) +
652 m_rightImpl.costPerCoeff(vectorized) +
653 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
656 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
657 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
658 static const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
659 return internal::TensorBlockResourceRequirements::merge(
660 m_leftImpl.getResourceRequirements(),
661 m_rightImpl.getResourceRequirements())
662 .addCostPerCoeff({0, 0, functor_cost / PacketSize});
665 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
666 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
667 bool =
false)
const {
668 desc.DropDestinationBuffer();
669 return TensorBlock(m_leftImpl.block(desc, scratch),
670 m_rightImpl.block(desc, scratch), m_functor);
673 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
675 #ifdef EIGEN_USE_SYCL
677 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
678 m_leftImpl.bind(cgh);
679 m_rightImpl.bind(cgh);
683 const Device EIGEN_DEVICE_REF m_device;
684 const BinaryOp m_functor;
685 TensorEvaluator<LeftArgType, Device> m_leftImpl;
686 TensorEvaluator<RightArgType, Device> m_rightImpl;
691template<
typename TernaryOp,
typename Arg1Type,
typename Arg2Type,
typename Arg3Type,
typename Device>
692struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device>
694 typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType;
697 IsAligned = TensorEvaluator<Arg1Type, Device>::IsAligned & TensorEvaluator<Arg2Type, Device>::IsAligned & TensorEvaluator<Arg3Type, Device>::IsAligned,
698 PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess &&
699 TensorEvaluator<Arg2Type, Device>::PacketAccess &&
700 TensorEvaluator<Arg3Type, Device>::PacketAccess &&
701 internal::functor_traits<TernaryOp>::PacketAccess,
703 PreferBlockAccess = TensorEvaluator<Arg1Type, Device>::PreferBlockAccess ||
704 TensorEvaluator<Arg2Type, Device>::PreferBlockAccess ||
705 TensorEvaluator<Arg3Type, Device>::PreferBlockAccess,
706 Layout = TensorEvaluator<Arg1Type, Device>::Layout,
711 TensorEvaluator(
const XprType& op,
const Device& device)
712 : m_functor(op.functor()),
713 m_arg1Impl(op.arg1Expression(), device),
714 m_arg2Impl(op.arg2Expression(), device),
715 m_arg3Impl(op.arg3Expression(), device)
717 EIGEN_STATIC_ASSERT((
static_cast<int>(TensorEvaluator<Arg1Type, Device>::Layout) ==
static_cast<int>(TensorEvaluator<Arg3Type, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
719 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::StorageKind,
720 typename internal::traits<Arg2Type>::StorageKind>::value),
721 STORAGE_KIND_MUST_MATCH)
722 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::StorageKind,
723 typename internal::traits<Arg3Type>::StorageKind>::value),
724 STORAGE_KIND_MUST_MATCH)
725 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::Index,
726 typename internal::traits<Arg2Type>::Index>::value),
727 STORAGE_INDEX_MUST_MATCH)
728 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::Index,
729 typename internal::traits<Arg3Type>::Index>::value),
730 STORAGE_INDEX_MUST_MATCH)
732 eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
735 typedef typename XprType::Index
Index;
736 typedef typename XprType::Scalar Scalar;
737 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
738 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
739 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
740 typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
741 typedef StorageMemory<CoeffReturnType, Device> Storage;
742 typedef typename Storage::Type EvaluatorPointerType;
745 typedef internal::TensorBlockNotImplemented TensorBlock;
748 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const
751 return m_arg1Impl.dimensions();
754 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
755 m_arg1Impl.evalSubExprsIfNeeded(NULL);
756 m_arg2Impl.evalSubExprsIfNeeded(NULL);
757 m_arg3Impl.evalSubExprsIfNeeded(NULL);
760 EIGEN_STRONG_INLINE
void cleanup() {
761 m_arg1Impl.cleanup();
762 m_arg2Impl.cleanup();
763 m_arg3Impl.cleanup();
766 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const
768 return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
770 template<
int LoadMode>
771 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
773 return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index),
774 m_arg2Impl.template packet<LoadMode>(index),
775 m_arg3Impl.template packet<LoadMode>(index));
778 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
779 costPerCoeff(
bool vectorized)
const {
780 const double functor_cost = internal::functor_traits<TernaryOp>::Cost;
781 return m_arg1Impl.costPerCoeff(vectorized) +
782 m_arg2Impl.costPerCoeff(vectorized) +
783 m_arg3Impl.costPerCoeff(vectorized) +
784 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
787 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
791 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
792 m_arg1Impl.bind(cgh);
793 m_arg2Impl.bind(cgh);
794 m_arg3Impl.bind(cgh);
799 const TernaryOp m_functor;
800 TensorEvaluator<Arg1Type, Device> m_arg1Impl;
801 TensorEvaluator<Arg2Type, Device> m_arg2Impl;
802 TensorEvaluator<Arg3Type, Device> m_arg3Impl;
808template<
typename IfArgType,
typename ThenArgType,
typename ElseArgType,
typename Device>
809struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
811 typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
812 typedef typename XprType::Scalar Scalar;
815 IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned &
816 TensorEvaluator<ElseArgType, Device>::IsAligned,
817 PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess &
818 TensorEvaluator<ElseArgType, Device>::PacketAccess &
819 PacketType<Scalar, Device>::HasBlend,
820 BlockAccess = TensorEvaluator<IfArgType, Device>::BlockAccess &&
821 TensorEvaluator<ThenArgType, Device>::BlockAccess &&
822 TensorEvaluator<ElseArgType, Device>::BlockAccess,
823 PreferBlockAccess = TensorEvaluator<IfArgType, Device>::PreferBlockAccess ||
824 TensorEvaluator<ThenArgType, Device>::PreferBlockAccess ||
825 TensorEvaluator<ElseArgType, Device>::PreferBlockAccess,
826 Layout = TensorEvaluator<IfArgType, Device>::Layout,
831 TensorEvaluator(
const XprType& op,
const Device& device)
832 : m_condImpl(op.ifExpression(), device),
833 m_thenImpl(op.thenExpression(), device),
834 m_elseImpl(op.elseExpression(), device)
836 EIGEN_STATIC_ASSERT((
static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) ==
static_cast<int>(TensorEvaluator<ThenArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
837 EIGEN_STATIC_ASSERT((
static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) ==
static_cast<int>(TensorEvaluator<ElseArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
838 eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions()));
839 eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions()));
842 typedef typename XprType::Index
Index;
843 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
844 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
845 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
846 typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
847 typedef StorageMemory<CoeffReturnType, Device> Storage;
848 typedef typename Storage::Type EvaluatorPointerType;
850 static const int NumDims = internal::array_size<Dimensions>::value;
853 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
854 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
856 typedef typename TensorEvaluator<const IfArgType, Device>::TensorBlock
858 typedef typename TensorEvaluator<const ThenArgType, Device>::TensorBlock
860 typedef typename TensorEvaluator<const ElseArgType, Device>::TensorBlock
863 struct TensorSelectOpBlockFactory {
864 template <
typename IfArgXprType,
typename ThenArgXprType,
typename ElseArgXprType>
866 typedef TensorSelectOp<const IfArgXprType, const ThenArgXprType, const ElseArgXprType> type;
869 template <
typename IfArgXprType,
typename ThenArgXprType,
typename ElseArgXprType>
870 typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type expr(
871 const IfArgXprType& if_expr,
const ThenArgXprType& then_expr,
const ElseArgXprType& else_expr)
const {
872 return typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type(if_expr, then_expr, else_expr);
876 typedef internal::TensorTernaryExprBlock<TensorSelectOpBlockFactory,
877 IfArgTensorBlock, ThenArgTensorBlock,
882 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const
885 return m_condImpl.dimensions();
888 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
889 m_condImpl.evalSubExprsIfNeeded(NULL);
890 m_thenImpl.evalSubExprsIfNeeded(NULL);
891 m_elseImpl.evalSubExprsIfNeeded(NULL);
895#ifdef EIGEN_USE_THREADS
896 template <
typename EvalSubExprsCallback>
897 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
898 EvaluatorPointerType, EvalSubExprsCallback done) {
899 m_condImpl.evalSubExprsIfNeeded(
nullptr, [
this, done](
bool) {
900 m_thenImpl.evalSubExprsIfNeeded(
nullptr, [
this, done](
bool) {
901 m_elseImpl.evalSubExprsIfNeeded(
nullptr, [done](
bool) { done(
true); });
907 EIGEN_STRONG_INLINE
void cleanup() {
908 m_condImpl.cleanup();
909 m_thenImpl.cleanup();
910 m_elseImpl.cleanup();
913 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const
915 return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
917 template<
int LoadMode>
918 EIGEN_DEVICE_FUNC PacketReturnType packet(Index index)
const
920 internal::Selector<PacketSize> select;
922 for (Index i = 0; i < PacketSize; ++i) {
923 select.select[i] = m_condImpl.coeff(index+i);
925 return internal::pblend(select,
926 m_thenImpl.template packet<LoadMode>(index),
927 m_elseImpl.template packet<LoadMode>(index));
931 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
932 costPerCoeff(
bool vectorized)
const {
933 return m_condImpl.costPerCoeff(vectorized) +
934 m_thenImpl.costPerCoeff(vectorized)
935 .cwiseMax(m_elseImpl.costPerCoeff(vectorized));
938 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
939 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
940 auto then_req = m_thenImpl.getResourceRequirements();
941 auto else_req = m_elseImpl.getResourceRequirements();
944 internal::TensorBlockResourceRequirements::merge(then_req, else_req);
945 merged_req.cost_per_coeff =
946 then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff);
948 return internal::TensorBlockResourceRequirements::merge(
949 m_condImpl.getResourceRequirements(), merged_req);
952 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
953 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
954 bool =
false)
const {
957 desc.DropDestinationBuffer();
960 m_condImpl.block(desc, scratch), m_thenImpl.block(desc, scratch),
961 m_elseImpl.block(desc, scratch), TensorSelectOpBlockFactory());
964 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data()
const {
return NULL; }
968 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
969 m_condImpl.bind(cgh);
970 m_thenImpl.bind(cgh);
971 m_elseImpl.bind(cgh);
975 TensorEvaluator<IfArgType, Device> m_condImpl;
976 TensorEvaluator<ThenArgType, Device> m_thenImpl;
977 TensorEvaluator<ElseArgType, Device> m_elseImpl;
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:30
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:29