10#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
11#define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
23template<
typename TargetType,
typename XprType>
24struct traits<TensorConversionOp<TargetType, XprType> >
27 typedef TargetType Scalar;
28 typedef typename traits<XprType>::StorageKind StorageKind;
29 typedef typename traits<XprType>::Index
Index;
30 typedef typename XprType::Nested Nested;
31 typedef typename remove_reference<Nested>::type _Nested;
32 static const int NumDimensions = traits<XprType>::NumDimensions;
33 static const int Layout = traits<XprType>::Layout;
35 typedef typename TypeConversion<Scalar, typename traits<XprType>::PointerType>::type PointerType;
38template<
typename TargetType,
typename XprType>
39struct eval<TensorConversionOp<TargetType, XprType>,
Eigen::Dense>
41 typedef const TensorConversionOp<TargetType, XprType>& type;
44template<
typename TargetType,
typename XprType>
45struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorConversionOp<TargetType, XprType> >::type>
47 typedef TensorConversionOp<TargetType, XprType> type;
53template <
typename TensorEvaluator,
typename SrcPacket,
typename TgtPacket,
int SrcCoeffRatio,
int TgtCoeffRatio>
54struct PacketConverter;
56template <
typename TensorEvaluator,
typename SrcPacket,
typename TgtPacket>
57struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 1> {
58 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
59 PacketConverter(
const TensorEvaluator& impl)
62 template<
int LoadMode,
typename Index>
63 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(
Index index)
const {
64 return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index));
68 const TensorEvaluator& m_impl;
72template <
typename TensorEvaluator,
typename SrcPacket,
typename TgtPacket>
73struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> {
74 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
75 PacketConverter(
const TensorEvaluator& impl)
78 template<
int LoadMode,
typename Index>
79 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(
Index index)
const {
80 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
82 SrcPacket src1 = m_impl.template packet<LoadMode>(index);
83 SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
84 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2);
89 const TensorEvaluator& m_impl;
92template <
typename TensorEvaluator,
typename SrcPacket,
typename TgtPacket>
93struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
94 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
95 PacketConverter(
const TensorEvaluator& impl)
98 template<
int LoadMode,
typename Index>
99 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(
Index index)
const {
100 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
102 SrcPacket src1 = m_impl.template packet<LoadMode>(index);
103 SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
104 SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
105 SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
106 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4);
111 const TensorEvaluator& m_impl;
114template <
typename TensorEvaluator,
typename SrcPacket,
typename TgtPacket>
115struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 8, 1> {
116 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
117 PacketConverter(
const TensorEvaluator& impl)
120 template<
int LoadMode,
typename Index>
121 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(
Index index)
const {
122 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
124 SrcPacket src1 = m_impl.template packet<LoadMode>(index);
125 SrcPacket src2 = m_impl.template packet<LoadMode>(index + 1 * SrcPacketSize);
126 SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
127 SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
128 SrcPacket src5 = m_impl.template packet<LoadMode>(index + 4 * SrcPacketSize);
129 SrcPacket src6 = m_impl.template packet<LoadMode>(index + 5 * SrcPacketSize);
130 SrcPacket src7 = m_impl.template packet<LoadMode>(index + 6 * SrcPacketSize);
131 SrcPacket src8 = m_impl.template packet<LoadMode>(index + 7 * SrcPacketSize);
132 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4, src5, src6, src7, src8);
137 const TensorEvaluator& m_impl;
140template <
typename TensorEvaluator,
typename SrcPacket,
typename TgtPacket,
int TgtCoeffRatio>
141struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, TgtCoeffRatio> {
142 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
143 PacketConverter(
const TensorEvaluator& impl)
144 : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {}
146 template<
int LoadMode,
typename Index>
147 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(
Index index)
const {
148 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
152 if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) {
154 return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index));
156 const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
157 typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
158 typedef typename internal::unpacket_traits<TgtPacket>::type TgtType;
159 internal::scalar_cast_op<SrcType, TgtType> converter;
160 EIGEN_ALIGN_MAX
typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize];
162 for (
int i = 0; i < TgtPacketSize; ++i) {
163 values[i] = converter(m_impl.coeff(index+i));
165 TgtPacket rslt = internal::pload<TgtPacket>(values);
171 const TensorEvaluator& m_impl;
172 const typename TensorEvaluator::Index m_maxIndex;
175template<
typename TargetType,
typename XprType>
179 typedef typename internal::traits<TensorConversionOp>::Scalar Scalar;
180 typedef typename internal::traits<TensorConversionOp>::StorageKind StorageKind;
181 typedef typename internal::traits<TensorConversionOp>::Index Index;
182 typedef typename internal::nested<TensorConversionOp>::type Nested;
183 typedef Scalar CoeffReturnType;
190 const typename internal::remove_all<typename XprType::Nested>::type&
191 expression()
const {
return m_xpr; }
194 typename XprType::Nested m_xpr;
197template <
bool SameType,
typename Eval,
typename EvalPo
interType>
struct ConversionSubExprEval {
198 static EIGEN_STRONG_INLINE
bool run(Eval& impl, EvalPointerType) {
199 impl.evalSubExprsIfNeeded(NULL);
204template <
typename Eval,
typename EvalPo
interType>
struct ConversionSubExprEval<true, Eval, EvalPointerType> {
205 static EIGEN_STRONG_INLINE
bool run(Eval& impl, EvalPointerType data) {
206 return impl.evalSubExprsIfNeeded(data);
210#ifdef EIGEN_USE_THREADS
211template <
bool SameType,
typename Eval,
typename EvalPointerType,
212 typename EvalSubExprsCallback>
213struct ConversionSubExprEvalAsync {
214 static EIGEN_STRONG_INLINE
void run(Eval& impl, EvalPointerType, EvalSubExprsCallback done) {
215 impl.evalSubExprsIfNeededAsync(
nullptr, std::move(done));
219template <
typename Eval,
typename EvalPointerType,
220 typename EvalSubExprsCallback>
221struct ConversionSubExprEvalAsync<true, Eval, EvalPointerType,
222 EvalSubExprsCallback> {
223 static EIGEN_STRONG_INLINE
void run(Eval& impl, EvalPointerType data, EvalSubExprsCallback done) {
224 impl.evalSubExprsIfNeededAsync(data, std::move(done));
231template <
typename SrcType,
typename TargetType,
bool IsSameT>
233 template <
typename ArgType,
typename Device>
234 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(
const TensorEvaluator<ArgType, Device>& impl,
Index index) {
235 internal::scalar_cast_op<SrcType, TargetType> converter;
236 return converter(impl.coeff(index));
240template <
typename SrcType,
typename TargetType>
241struct CoeffConv<SrcType, TargetType, true> {
242 template <
typename ArgType,
typename Device>
243 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(
const TensorEvaluator<ArgType, Device>& impl,
Index index) {
244 return impl.coeff(index);
248template <
typename SrcPacket,
typename TargetPacket,
int LoadMode,
bool ActuallyVectorize,
bool IsSameT>
250 typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
251 typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
253 static const int PacketSize = internal::unpacket_traits<TargetPacket>::size;
255 template <
typename ArgType,
typename Device>
256 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(
const TensorEvaluator<ArgType, Device>& impl,
Index index) {
257 internal::scalar_cast_op<SrcType, TargetType> converter;
258 EIGEN_ALIGN_MAX
typename internal::remove_const<TargetType>::type values[PacketSize];
260 for (
int i = 0; i < PacketSize; ++i) {
261 values[i] = converter(impl.coeff(index+i));
263 TargetPacket rslt = internal::pload<TargetPacket>(values);
268template <
typename SrcPacket,
typename TargetPacket,
int LoadMode,
bool IsSameT>
269struct PacketConv<SrcPacket, TargetPacket, LoadMode, true, IsSameT> {
270 typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
271 typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
273 template <
typename ArgType,
typename Device>
274 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(
const TensorEvaluator<ArgType, Device>& impl,
Index index) {
275 const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
276 const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
277 PacketConverter<TensorEvaluator<ArgType, Device>, SrcPacket, TargetPacket,
278 SrcCoeffRatio, TgtCoeffRatio> converter(impl);
279 return converter.template packet<LoadMode>(index);
283template <
typename SrcPacket,
typename TargetPacket,
int LoadMode>
284struct PacketConv<SrcPacket, TargetPacket, LoadMode, false, true> {
285 typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
286 static const int PacketSize = internal::unpacket_traits<TargetPacket>::size;
288 template <
typename ArgType,
typename Device>
289 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(
const TensorEvaluator<ArgType, Device>& impl,
Index index) {
290 EIGEN_ALIGN_MAX
typename internal::remove_const<TargetType>::type values[PacketSize];
291 for (
int i = 0; i < PacketSize; ++i) values[i] = impl.coeff(index+i);
292 return internal::pload<TargetPacket>(values);
296template <
typename SrcPacket,
typename TargetPacket,
int LoadMode>
297struct PacketConv<SrcPacket, TargetPacket, LoadMode, true, true> {
298 template <
typename ArgType,
typename Device>
299 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(
const TensorEvaluator<ArgType, Device>& impl,
Index index) {
300 return impl.template packet<LoadMode>(index);
307template<
typename TargetType,
typename ArgType,
typename Device>
308struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
310 typedef TensorConversionOp<TargetType, ArgType> XprType;
311 typedef typename XprType::Index Index;
312 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
313 typedef TargetType Scalar;
314 typedef TargetType CoeffReturnType;
315 typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType;
316 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
317 typedef typename PacketType<SrcType, Device>::type PacketSourceType;
318 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
319 static const bool IsSameType = internal::is_same<TargetType, SrcType>::value;
320 typedef StorageMemory<CoeffReturnType, Device> Storage;
321 typedef typename Storage::Type EvaluatorPointerType;
326 #ifndef EIGEN_USE_SYCL
329 TensorEvaluator<ArgType, Device>::PacketAccess &
330 internal::type_casting_traits<SrcType, TargetType>::VectorizedCast,
332 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
333 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
334 Layout = TensorEvaluator<ArgType, Device>::Layout,
338 static const int NumDims = internal::array_size<Dimensions>::value;
341 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
342 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
344 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
347 struct TensorConversionOpBlockFactory {
348 template <
typename ArgXprType>
350 typedef TensorConversionOp<TargetType, const ArgXprType> type;
353 template <
typename ArgXprType>
354 typename XprType<ArgXprType>::type expr(
const ArgXprType& expr)
const {
355 return typename XprType<ArgXprType>::type(expr);
359 typedef internal::TensorUnaryExprBlock<TensorConversionOpBlockFactory,
364 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
365 : m_impl(op.expression(), device)
369 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_impl.dimensions(); }
371 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType data)
373 return ConversionSubExprEval<IsSameType, TensorEvaluator<ArgType, Device>, EvaluatorPointerType>::run(m_impl, data);
376#ifdef EIGEN_USE_THREADS
377 template <
typename EvalSubExprsCallback>
378 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
379 EvaluatorPointerType data, EvalSubExprsCallback done) {
380 ConversionSubExprEvalAsync<IsSameType, TensorEvaluator<ArgType, Device>,
381 EvaluatorPointerType,
382 EvalSubExprsCallback>::run(m_impl, data, std::move(done));
386 EIGEN_STRONG_INLINE
void cleanup()
391 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const
393 return internal::CoeffConv<SrcType, TargetType, IsSameType>::run(m_impl,index);
396 template<
int LoadMode>
397 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType
398 packet(Index index)
const {
402 const bool Vectorizable =
404 ? TensorEvaluator<ArgType, Device>::PacketAccess
405 : int(TensorEvaluator<ArgType, Device>::PacketAccess) &
406 int(internal::type_casting_traits<SrcType, TargetType>::VectorizedCast);
408 return internal::PacketConv<PacketSourceType, PacketReturnType, LoadMode,
409 Vectorizable, IsSameType>::run(m_impl, index);
412 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
413 costPerCoeff(
bool vectorized)
const {
414 const double cast_cost = TensorOpCost::CastCost<SrcType, TargetType>();
416 const double SrcCoeffRatio =
417 internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
418 const double TgtCoeffRatio =
419 internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
420 return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) +
421 TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize));
423 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost);
427 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
428 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
429 return m_impl.getResourceRequirements();
432 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
433 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
434 bool =
false)
const {
435 return TensorBlock(m_impl.block(desc, scratch),
436 TensorConversionOpBlockFactory());
439 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
442 const TensorEvaluator<ArgType, Device>& impl()
const {
return m_impl; }
445 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
451 TensorEvaluator<ArgType, Device> m_impl;
The tensor base class.
Definition: TensorForwardDeclarations.h:56
Tensor conversion class. This class makes it possible to vectorize type casting operations when the n...
Definition: TensorConversion.h:177
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index