10#ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
23template<
typename NewDimensions,
typename XprType>
24struct traits<TensorReshapingOp<NewDimensions, XprType> > :
public traits<XprType>
26 typedef typename XprType::Scalar Scalar;
27 typedef traits<XprType> XprTraits;
28 typedef typename XprTraits::StorageKind StorageKind;
29 typedef typename XprTraits::Index
Index;
30 typedef typename XprType::Nested Nested;
31 typedef typename remove_reference<Nested>::type _Nested;
32 static const int NumDimensions = array_size<NewDimensions>::value;
33 static const int Layout = XprTraits::Layout;
34 typedef typename XprTraits::PointerType PointerType;
37template<
typename NewDimensions,
typename XprType>
38struct eval<TensorReshapingOp<NewDimensions, XprType>,
Eigen::Dense>
40 typedef const TensorReshapingOp<NewDimensions, XprType>EIGEN_DEVICE_REF type;
43template<
typename NewDimensions,
typename XprType>
44struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
46 typedef TensorReshapingOp<NewDimensions, XprType> type;
53template<
typename NewDimensions,
typename XprType>
54class TensorReshapingOp :
public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
57 typedef TensorBase<TensorReshapingOp<NewDimensions, XprType>,
WriteAccessors> Base;
58 typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
59 typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
60 typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
61 typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
62 typedef typename Eigen::internal::traits<TensorReshapingOp>::Index
Index;
64 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(
const XprType& expr,
const NewDimensions& dims)
65 : m_xpr(expr), m_dims(dims) {}
68 const NewDimensions& dimensions()
const {
return m_dims; }
71 const typename internal::remove_all<typename XprType::Nested>::type&
72 expression()
const {
return m_xpr; }
74 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReshapingOp)
77 typename XprType::Nested m_xpr;
78 const NewDimensions m_dims;
83template<
typename NewDimensions,
typename ArgType,
typename Device>
84struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
86 typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
87 typedef NewDimensions Dimensions;
89 typedef typename XprType::Index Index;
90 typedef typename XprType::Scalar Scalar;
91 typedef typename XprType::CoeffReturnType CoeffReturnType;
92 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
93 typedef StorageMemory<CoeffReturnType, Device> Storage;
94 typedef typename Storage::Type EvaluatorPointerType;
95 typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
97 static const int NumOutputDims = internal::array_size<Dimensions>::value;
98 static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
109 static const ReshapingKind kind =
110#if defined(EIGEN_HAS_INDEX_LIST)
111 (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(0, 1)) ? OneByN
112 : (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(1, 1)) ? NByOne
120 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
121 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
125 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess &&
126 NumInputDims > 0 && NumOutputDims > 0,
127 PreferBlockAccess =
false,
128 Layout = TensorEvaluator<ArgType, Device>::Layout,
130 RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
133 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
136 typedef internal::TensorBlockDescriptor<NumOutputDims, Index> TensorBlockDesc;
137 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
140 typename internal::TensorMaterializedBlock<ScalarNoConst, NumOutputDims,
145 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
146 : m_impl(op.expression(), device), m_dimensions(op.dimensions())
150 eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
153 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
155#ifdef EIGEN_USE_THREADS
156 template <
typename EvalSubExprsCallback>
157 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
158 EvaluatorPointerType data, EvalSubExprsCallback done) {
159 m_impl.evalSubExprsIfNeededAsync(data, std::move(done));
163 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
164 return m_impl.evalSubExprsIfNeeded(data);
166 EIGEN_STRONG_INLINE
void cleanup() {
170 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const
172 return m_impl.coeff(index);
175 template<
int LoadMode>
176 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
178 return m_impl.template packet<LoadMode>(index);
181 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
182 return m_impl.costPerCoeff(vectorized);
185 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
186 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
187 return internal::TensorBlockResourceRequirements::any();
192 struct BlockIteratorState {
199 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
200 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
201 bool =
false)
const {
202 eigen_assert(m_impl.data() != NULL);
203 eigen_assert((kind == Runtime) ||
204 (kind == OneByN && desc.dimensions()[0] == 1) ||
205 (kind == NByOne && desc.dimensions()[1] == 1));
207 if (kind == OneByN || kind == NByOne) {
210 return TensorBlock(internal::TensorBlockKind::kView,
211 m_impl.data() + desc.offset(), desc.dimensions());
215 return TensorBlock::materialize(m_impl.data(), m_dimensions, desc,
220 EIGEN_DEVICE_FUNC
typename Storage::Type data()
const {
221 return constCast(m_impl.data());
224 EIGEN_DEVICE_FUNC
const TensorEvaluator<ArgType, Device>& impl()
const {
return m_impl; }
226 #ifdef EIGEN_USE_SYCL
228 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
233 TensorEvaluator<ArgType, Device> m_impl;
234 NewDimensions m_dimensions;
239template<
typename NewDimensions,
typename ArgType,
typename Device>
240 struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
241 :
public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
244 typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
245 typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
246 typedef NewDimensions Dimensions;
249 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
250 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
251 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
252 PreferBlockAccess =
false,
253 Layout = TensorEvaluator<ArgType, Device>::Layout,
255 RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
258 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
262 typedef typename XprType::Index
Index;
263 typedef typename XprType::Scalar Scalar;
264 typedef typename XprType::CoeffReturnType CoeffReturnType;
265 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
268 typedef internal::TensorBlockDescriptor<TensorEvaluator::NumOutputDims, Index>
272 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
274 return this->m_impl.coeffRef(index);
277 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
278 void writePacket(Index index,
const PacketReturnType& x)
280 this->m_impl.template writePacket<StoreMode>(index, x);
283 template <
typename TensorBlock>
284 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writeBlock(
285 const TensorBlockDesc& desc,
const TensorBlock& block) {
286 assert(this->m_impl.data() != NULL);
288 typedef typename TensorBlock::XprType TensorBlockExpr;
289 typedef internal::TensorBlockAssignment<
290 Scalar, TensorEvaluator::NumOutputDims, TensorBlockExpr,
Index>
293 TensorBlockAssign::Run(
294 TensorBlockAssign::target(desc.dimensions(),
295 internal::strides<Layout>(this->dimensions()),
296 this->m_impl.data(), desc.offset()),
310template<
typename StartIndices,
typename Sizes,
typename XprType>
311struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > :
public traits<XprType>
313 typedef typename XprType::Scalar Scalar;
314 typedef traits<XprType> XprTraits;
315 typedef typename XprTraits::StorageKind StorageKind;
316 typedef typename XprTraits::Index
Index;
317 typedef typename XprType::Nested Nested;
318 typedef typename remove_reference<Nested>::type _Nested;
319 static const int NumDimensions = array_size<StartIndices>::value;
320 static const int Layout = XprTraits::Layout;
321 typedef typename XprTraits::PointerType PointerType;
324template<
typename StartIndices,
typename Sizes,
typename XprType>
325struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>,
Eigen::Dense>
327 typedef const TensorSlicingOp<StartIndices, Sizes, XprType>EIGEN_DEVICE_REF type;
330template<
typename StartIndices,
typename Sizes,
typename XprType>
331struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
333 typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
340template<
typename StartIndices,
typename Sizes,
typename XprType>
341class TensorSlicingOp :
public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
344 typedef TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> > Base;
345 typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
346 typedef typename XprType::CoeffReturnType CoeffReturnType;
347 typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
348 typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
349 typedef typename Eigen::internal::traits<TensorSlicingOp>::Index
Index;
351 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(
const XprType& expr,
const StartIndices& indices,
const Sizes& sizes)
352 : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
355 const StartIndices& startIndices()
const {
return m_indices; }
357 const Sizes& sizes()
const {
return m_sizes; }
360 const typename internal::remove_all<typename XprType::Nested>::type&
361 expression()
const {
return m_xpr; }
363 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorSlicingOp)
366 typename XprType::Nested m_xpr;
367 const StartIndices m_indices;
374template <
typename Index,
typename Device,
bool BlockAccess>
struct MemcpyTriggerForSlicing {
375 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(
const Device& device) : threshold_(2 * device.numThreads()) { }
376 EIGEN_DEVICE_FUNC
bool operator ()(
Index total,
Index contiguous)
const {
377 const bool prefer_block_evaluation = BlockAccess && total > 32*1024;
378 return !prefer_block_evaluation && contiguous > threshold_;
388template <
typename Index,
bool BlockAccess>
struct MemcpyTriggerForSlicing<
Index, GpuDevice, BlockAccess> {
389 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(
const GpuDevice&) { }
390 EIGEN_DEVICE_FUNC
bool operator ()(
Index,
Index contiguous)
const {
return contiguous > 4*1024*1024; }
397template <
typename Index,
bool BlockAccess>
struct MemcpyTriggerForSlicing<
Index,
Eigen::SyclDevice, BlockAccess> {
398 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(
const SyclDevice&) { }
399 EIGEN_DEVICE_FUNC
bool operator ()(
Index,
Index contiguous)
const {
return contiguous > 4*1024*1024; }
406template<
typename StartIndices,
typename Sizes,
typename ArgType,
typename Device>
407struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
409 typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
410 static const int NumDims = internal::array_size<Sizes>::value;
412 typedef typename XprType::Index
Index;
413 typedef typename XprType::Scalar Scalar;
414 typedef typename XprType::CoeffReturnType CoeffReturnType;
415 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
416 typedef Sizes Dimensions;
417 typedef StorageMemory<CoeffReturnType, Device> Storage;
418 typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
419 typedef typename Storage::Type EvaluatorPointerType;
425 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
426 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess &&
428 !internal::is_same<typename internal::remove_const<Scalar>::type,
bool>::value,
429 PreferBlockAccess =
true,
430 Layout = TensorEvaluator<ArgType, Device>::Layout,
435 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
438 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
439 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
442 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
446 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
447 : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
449 m_is_identity =
true;
450 for (
int i = 0; i < internal::array_size<Dimensions>::value; ++i) {
451 eigen_assert(m_impl.dimensions()[i] >=
452 op.sizes()[i] + op.startIndices()[i]);
453 if (m_impl.dimensions()[i] != op.sizes()[i] ||
454 op.startIndices()[i] != 0) {
455 m_is_identity =
false;
460 if (NumDims == 0)
return;
462 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
463 const Sizes& output_dims = op.sizes();
464 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
465 m_inputStrides[0] = 1;
466 for (
int i = 1; i < NumDims; ++i) {
467 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
471 m_outputStrides[0] = 1;
472 for (
int i = 1; i < NumDims; ++i) {
473 m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
474 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
477 m_inputStrides[NumDims-1] = 1;
478 for (
int i = NumDims - 2; i >= 0; --i) {
479 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
483 m_outputStrides[NumDims-1] = 1;
484 for (
int i = NumDims - 2; i >= 0; --i) {
485 m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
486 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
491 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
493 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
494 m_impl.evalSubExprsIfNeeded(NULL);
495 if (!NumTraits<
typename internal::remove_const<Scalar>::type>::RequireInitialization
496 && data && m_impl.data()) {
497 Index contiguous_values = 1;
498 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
499 for (
int i = 0; i < NumDims; ++i) {
500 contiguous_values *= dimensions()[i];
501 if (dimensions()[i] != m_impl.dimensions()[i]) {
506 for (
int i = NumDims-1; i >= 0; --i) {
507 contiguous_values *= dimensions()[i];
508 if (dimensions()[i] != m_impl.dimensions()[i]) {
514 const MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
515 if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
516 EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
517 for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
518 Index offset = srcCoeff(i);
519 m_device.memcpy((
void*)(m_device.get(data + i)), m_device.get(src+offset), contiguous_values *
sizeof(Scalar));
527#ifdef EIGEN_USE_THREADS
528 template <
typename EvalSubExprsCallback>
529 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
530 EvaluatorPointerType , EvalSubExprsCallback done) {
531 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
535 EIGEN_STRONG_INLINE
void cleanup() {
539 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const
542 return m_impl.coeff(index);
544 return m_impl.coeff(srcCoeff(index));
548 template<
int LoadMode>
549 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
551 const int packetSize = PacketType<CoeffReturnType, Device>::size;
552 EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
553 eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
556 return m_impl.template packet<LoadMode>(index);
559 Index inputIndices[] = {0, 0};
560 Index indices[] = {index, index + packetSize - 1};
561 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
563 for (
int i = NumDims - 1; i > 0; --i) {
564 const Index idx0 = indices[0] / m_fastOutputStrides[i];
565 const Index idx1 = indices[1] / m_fastOutputStrides[i];
566 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
567 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
568 indices[0] -= idx0 * m_outputStrides[i];
569 indices[1] -= idx1 * m_outputStrides[i];
571 inputIndices[0] += (indices[0] + m_offsets[0]);
572 inputIndices[1] += (indices[1] + m_offsets[0]);
575 for (
int i = 0; i < NumDims - 1; ++i) {
576 const Index idx0 = indices[0] / m_fastOutputStrides[i];
577 const Index idx1 = indices[1] / m_fastOutputStrides[i];
578 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
579 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
580 indices[0] -= idx0 * m_outputStrides[i];
581 indices[1] -= idx1 * m_outputStrides[i];
583 inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
584 inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
586 if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
587 PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
591 EIGEN_ALIGN_MAX
typename internal::remove_const<CoeffReturnType>::type values[packetSize];
592 values[0] = m_impl.coeff(inputIndices[0]);
593 values[packetSize-1] = m_impl.coeff(inputIndices[1]);
595 for (
int i = 1; i < packetSize-1; ++i) {
596 values[i] = coeff(index+i);
598 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
603 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
604 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
607 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
608 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
609 const size_t target_size = m_device.lastLevelCacheSize();
610 return internal::TensorBlockResourceRequirements::merge(
611 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
612 m_impl.getResourceRequirements());
615 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
616 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
617 bool =
false)
const {
618 TensorBlockDesc arg_desc = desc.WithOffset(srcCoeff(desc.offset()));
619 TensorBlock block = m_impl.block(arg_desc, scratch);
620 if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer();
624 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename Storage::Type data()
const {
625 typename Storage::Type result = constCast(m_impl.data());
628 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
629 for (
int i = 0; i < NumDims; ++i) {
630 if (m_dimensions[i] != m_impl.dimensions()[i]) {
631 offset += m_offsets[i] * m_inputStrides[i];
632 for (
int j = i+1; j < NumDims; ++j) {
633 if (m_dimensions[j] > 1) {
636 offset += m_offsets[j] * m_inputStrides[j];
642 for (
int i = NumDims - 1; i >= 0; --i) {
643 if (m_dimensions[i] != m_impl.dimensions()[i]) {
644 offset += m_offsets[i] * m_inputStrides[i];
645 for (
int j = i-1; j >= 0; --j) {
646 if (m_dimensions[j] > 1) {
649 offset += m_offsets[j] * m_inputStrides[j];
655 return result + offset;
661 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
667 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Index srcCoeff(Index index)
const
669 Index inputIndex = 0;
670 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
672 for (
int i = NumDims - 1; i > 0; --i) {
673 const Index idx = index / m_fastOutputStrides[i];
674 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
675 index -= idx * m_outputStrides[i];
677 inputIndex += (index + m_offsets[0]);
680 for (
int i = 0; i < NumDims - 1; ++i) {
681 const Index idx = index / m_fastOutputStrides[i];
682 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
683 index -= idx * m_outputStrides[i];
685 inputIndex += (index + m_offsets[NumDims-1]);
690 array<Index, NumDims> m_outputStrides;
691 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
692 array<Index, NumDims> m_inputStrides;
693 TensorEvaluator<ArgType, Device> m_impl;
694 const Device EIGEN_DEVICE_REF m_device;
695 Dimensions m_dimensions;
697 const StartIndices m_offsets;
702template<
typename StartIndices,
typename Sizes,
typename ArgType,
typename Device>
703struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
704 :
public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
706 typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
707 typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
708 static const int NumDims = internal::array_size<Sizes>::value;
710 typedef typename XprType::Index
Index;
711 typedef typename XprType::Scalar Scalar;
712 typedef typename XprType::CoeffReturnType CoeffReturnType;
713 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
714 typedef Sizes Dimensions;
718 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
719 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
720 PreferBlockAccess =
true,
721 Layout = TensorEvaluator<ArgType, Device>::Layout,
723 RawAccess = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess
726 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
729 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
730 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
733 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
737 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
739 if (this->m_is_identity) {
740 return this->m_impl.coeffRef(index);
742 return this->m_impl.coeffRef(this->srcCoeff(index));
746 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
747 void writePacket(Index index,
const PacketReturnType& x)
749 if (this->m_is_identity) {
750 this->m_impl.template writePacket<StoreMode>(index, x);
754 const int packetSize = PacketType<CoeffReturnType, Device>::size;
755 Index inputIndices[] = {0, 0};
756 Index indices[] = {index, index + packetSize - 1};
757 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
759 for (
int i = NumDims - 1; i > 0; --i) {
760 const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
761 const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
762 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
763 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
764 indices[0] -= idx0 * this->m_outputStrides[i];
765 indices[1] -= idx1 * this->m_outputStrides[i];
767 inputIndices[0] += (indices[0] + this->m_offsets[0]);
768 inputIndices[1] += (indices[1] + this->m_offsets[0]);
771 for (
int i = 0; i < NumDims - 1; ++i) {
772 const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
773 const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
774 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
775 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
776 indices[0] -= idx0 * this->m_outputStrides[i];
777 indices[1] -= idx1 * this->m_outputStrides[i];
779 inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
780 inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
782 if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
783 this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
786 EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
787 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
788 this->m_impl.coeffRef(inputIndices[0]) = values[0];
789 this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
791 for (
int i = 1; i < packetSize-1; ++i) {
792 this->coeffRef(index+i) = values[i];
797 template<
typename TensorBlock>
798 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writeBlock(
799 const TensorBlockDesc& desc,
const TensorBlock& block) {
800 TensorBlockDesc arg_desc = desc.WithOffset(this->srcCoeff(desc.offset()));
801 this->m_impl.writeBlock(arg_desc, block);
806template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
807struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > :
public traits<XprType>
809 typedef typename XprType::Scalar Scalar;
810 typedef traits<XprType> XprTraits;
811 typedef typename XprTraits::StorageKind StorageKind;
812 typedef typename XprTraits::Index
Index;
813 typedef typename XprType::Nested Nested;
814 typedef typename remove_reference<Nested>::type _Nested;
815 static const int NumDimensions = array_size<StartIndices>::value;
816 static const int Layout = XprTraits::Layout;
817 typedef typename XprTraits::PointerType PointerType;
820template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
821struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>,
Eigen::Dense>
823 typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>EIGEN_DEVICE_REF type;
826template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
827struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type>
829 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type;
835template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
836class TensorStridingSlicingOp :
public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >
839 typedef TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > Base;
840 typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar;
841 typedef typename XprType::CoeffReturnType CoeffReturnType;
842 typedef typename internal::nested<TensorStridingSlicingOp>::type Nested;
843 typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind;
844 typedef typename internal::traits<TensorStridingSlicingOp>::Index
Index;
846 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp(
847 const XprType& expr,
const StartIndices& startIndices,
848 const StopIndices& stopIndices,
const Strides& strides)
849 : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices),
850 m_strides(strides) {}
853 const StartIndices& startIndices()
const {
return m_startIndices; }
855 const StartIndices& stopIndices()
const {
return m_stopIndices; }
857 const StartIndices& strides()
const {
return m_strides; }
860 const typename internal::remove_all<typename XprType::Nested>::type&
861 expression()
const {
return m_xpr; }
863 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingSlicingOp)
866 typename XprType::Nested m_xpr;
867 const StartIndices m_startIndices;
868 const StopIndices m_stopIndices;
869 const Strides m_strides;
873template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename ArgType,
typename Device>
874struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
876 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
877 static const int NumDims = internal::array_size<Strides>::value;
878 typedef typename XprType::Index
Index;
879 typedef typename XprType::Scalar Scalar;
880 typedef typename XprType::CoeffReturnType CoeffReturnType;
881 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
882 typedef StorageMemory<CoeffReturnType, Device> Storage;
883 typedef typename Storage::Type EvaluatorPointerType;
884 typedef Strides Dimensions;
890 PacketAccess =
false,
892 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
893 Layout = TensorEvaluator<ArgType, Device>::Layout,
898 typedef internal::TensorBlockNotImplemented TensorBlock;
901 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
902 : m_impl(op.expression(), device),
904 m_strides(op.strides())
907 DSizes<Index, NumDims> startIndicesClamped, stopIndicesClamped;
908 for (ptrdiff_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
909 eigen_assert(m_strides[i] != 0 &&
"0 stride is invalid");
910 if (m_strides[i] > 0) {
911 startIndicesClamped[i] =
912 clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
913 stopIndicesClamped[i] =
914 clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
917 startIndicesClamped[i] =
918 clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
919 stopIndicesClamped[i] =
920 clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
922 m_startIndices[i] = startIndicesClamped[i];
925 typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
926 const InputDimensions& input_dims = m_impl.dimensions();
929 m_is_identity =
true;
930 for (
int i = 0; i < NumDims; i++) {
931 Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
932 if (interval == 0 || ((interval < 0) != (m_strides[i] < 0))) {
936 (interval / m_strides[i]) + (interval % m_strides[i] != 0 ? 1 : 0);
937 eigen_assert(m_dimensions[i] >= 0);
939 if (m_strides[i] != 1 || interval != m_impl.dimensions()[i]) {
940 m_is_identity =
false;
944 Strides output_dims = m_dimensions;
946 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
947 m_inputStrides[0] = m_strides[0];
948 m_offsets[0] = startIndicesClamped[0];
949 Index previousDimProduct = 1;
950 for (
int i = 1; i < NumDims; ++i) {
951 previousDimProduct *= input_dims[i-1];
952 m_inputStrides[i] = previousDimProduct * m_strides[i];
953 m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
957 m_outputStrides[0] = 1;
958 for (
int i = 1; i < NumDims; ++i) {
959 m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
960 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
963 m_inputStrides[NumDims-1] = m_strides[NumDims-1];
964 m_offsets[NumDims-1] = startIndicesClamped[NumDims-1];
965 Index previousDimProduct = 1;
966 for (
int i = NumDims - 2; i >= 0; --i) {
967 previousDimProduct *= input_dims[i+1];
968 m_inputStrides[i] = previousDimProduct * m_strides[i];
969 m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
972 m_outputStrides[NumDims-1] = 1;
973 for (
int i = NumDims - 2; i >= 0; --i) {
974 m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
975 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
980 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
983 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
984 m_impl.evalSubExprsIfNeeded(NULL);
988 EIGEN_STRONG_INLINE
void cleanup() {
992 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const
995 return m_impl.coeff(index);
997 return m_impl.coeff(srcCoeff(index));
1001 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
1002 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
1005 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename Storage::Type data()
const {
1008#ifdef EIGEN_USE_SYCL
1010 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
1015 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Index srcCoeff(Index index)
const
1017 Index inputIndex = 0;
1018 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
1020 for (
int i = NumDims - 1; i >= 0; --i) {
1021 const Index idx = index / m_fastOutputStrides[i];
1022 inputIndex += idx * m_inputStrides[i] + m_offsets[i];
1023 index -= idx * m_outputStrides[i];
1027 for (
int i = 0; i < NumDims; ++i) {
1028 const Index idx = index / m_fastOutputStrides[i];
1029 inputIndex += idx * m_inputStrides[i] + m_offsets[i];
1030 index -= idx * m_outputStrides[i];
1036 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Index clamp(Index value, Index min, Index max) {
1037#ifndef SYCL_DEVICE_ONLY
1038 return numext::maxi(min, numext::mini(max,value));
1040 return cl::sycl::clamp(value, min, max);
1044 array<Index, NumDims> m_outputStrides;
1045 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
1046 array<Index, NumDims> m_inputStrides;
1048 TensorEvaluator<ArgType, Device> m_impl;
1049 const Device EIGEN_DEVICE_REF m_device;
1050 DSizes<Index, NumDims> m_startIndices;
1051 DSizes<Index, NumDims> m_dimensions;
1052 DSizes<Index, NumDims> m_offsets;
1053 const Strides m_strides;
1057template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename ArgType,
typename Device>
1058struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
1059 :
public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
1061 typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
1062 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
1063 static const int NumDims = internal::array_size<Strides>::value;
1067 PacketAccess =
false,
1068 BlockAccess =
false,
1069 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
1070 Layout = TensorEvaluator<ArgType, Device>::Layout,
1071 CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
1076 typedef internal::TensorBlockNotImplemented TensorBlock;
1079 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
1083 typedef typename XprType::Index
Index;
1084 typedef typename XprType::Scalar Scalar;
1085 typedef typename XprType::CoeffReturnType CoeffReturnType;
1086 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
1087 typedef Strides Dimensions;
1089 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
1091 if (this->m_is_identity) {
1092 return this->m_impl.coeffRef(index);
1094 return this->m_impl.coeffRef(this->srcCoeff(index));
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index