Please, help us to better know about our user community by answering the following short survey: https://forms.gle/wpyrxWi18ox9Z5ae9
 
Loading...
Searching...
No Matches
TensorShuffling.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
12
13namespace Eigen {
14
22namespace internal {
23template<typename Shuffle, typename XprType>
24struct traits<TensorShufflingOp<Shuffle, XprType> > : public traits<XprType>
25{
26 typedef typename XprType::Scalar Scalar;
27 typedef traits<XprType> XprTraits;
28 typedef typename XprTraits::StorageKind StorageKind;
29 typedef typename XprTraits::Index Index;
30 typedef typename XprType::Nested Nested;
31 typedef typename remove_reference<Nested>::type _Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
34 typedef typename XprTraits::PointerType PointerType;
35};
36
37template<typename Shuffle, typename XprType>
38struct eval<TensorShufflingOp<Shuffle, XprType>, Eigen::Dense>
39{
40 typedef const TensorShufflingOp<Shuffle, XprType>& type;
41};
42
43template<typename Shuffle, typename XprType>
44struct nested<TensorShufflingOp<Shuffle, XprType>, 1, typename eval<TensorShufflingOp<Shuffle, XprType> >::type>
45{
46 typedef TensorShufflingOp<Shuffle, XprType> type;
47};
48
49} // end namespace internal
50
51
52
53template<typename Shuffle, typename XprType>
54class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType> >
55{
56 public:
57 typedef TensorBase<TensorShufflingOp<Shuffle, XprType> > Base;
58 typedef typename Eigen::internal::traits<TensorShufflingOp>::Scalar Scalar;
59 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
60 typedef typename XprType::CoeffReturnType CoeffReturnType;
61 typedef typename Eigen::internal::nested<TensorShufflingOp>::type Nested;
62 typedef typename Eigen::internal::traits<TensorShufflingOp>::StorageKind StorageKind;
63 typedef typename Eigen::internal::traits<TensorShufflingOp>::Index Index;
64
65 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shfl)
66 : m_xpr(expr), m_shuffle(shfl) {}
67
68 EIGEN_DEVICE_FUNC
69 const Shuffle& shufflePermutation() const { return m_shuffle; }
70
71 EIGEN_DEVICE_FUNC
72 const typename internal::remove_all<typename XprType::Nested>::type&
73 expression() const { return m_xpr; }
74
75 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorShufflingOp)
76
77
78 protected:
79 typename XprType::Nested m_xpr;
80 const Shuffle m_shuffle;
81};
82
83
84// Eval as rvalue
85template<typename Shuffle, typename ArgType, typename Device>
86struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
87{
88 typedef TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> Self;
89 typedef TensorShufflingOp<Shuffle, ArgType> XprType;
90 typedef typename XprType::Index Index;
91 static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
92 typedef DSizes<Index, NumDims> Dimensions;
93 typedef typename XprType::Scalar Scalar;
94 typedef typename XprType::CoeffReturnType CoeffReturnType;
95 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
96 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
97 typedef StorageMemory<CoeffReturnType, Device> Storage;
98 typedef typename Storage::Type EvaluatorPointerType;
99
100 enum {
101 IsAligned = false,
102 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
103 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
104 PreferBlockAccess = true,
105 Layout = TensorEvaluator<ArgType, Device>::Layout,
106 CoordAccess = false, // to be implemented
107 RawAccess = false
108 };
109
110 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
111
112 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
113 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
114 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
115
116 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
117 Layout, Index>
118 TensorBlock;
119 //===--------------------------------------------------------------------===//
120
121 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
122 : m_device(device),
123 m_impl(op.expression(), device)
124 {
125 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
126 const Shuffle& shuffle = op.shufflePermutation();
127 m_is_identity = true;
128 for (int i = 0; i < NumDims; ++i) {
129 m_shuffle[i] = static_cast<int>(shuffle[i]);
130 m_dimensions[i] = input_dims[shuffle[i]];
131 m_inverseShuffle[shuffle[i]] = i;
132 if (m_is_identity && shuffle[i] != i) {
133 m_is_identity = false;
134 }
135 }
136
137 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
138 m_unshuffledInputStrides[0] = 1;
139 m_outputStrides[0] = 1;
140
141 for (int i = 1; i < NumDims; ++i) {
142 m_unshuffledInputStrides[i] =
143 m_unshuffledInputStrides[i - 1] * input_dims[i - 1];
144 m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
145 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(
146 m_outputStrides[i] > 0 ? m_outputStrides[i] : Index(1));
147 }
148 } else {
149 m_unshuffledInputStrides[NumDims - 1] = 1;
150 m_outputStrides[NumDims - 1] = 1;
151 for (int i = NumDims - 2; i >= 0; --i) {
152 m_unshuffledInputStrides[i] =
153 m_unshuffledInputStrides[i + 1] * input_dims[i + 1];
154 m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
155 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(
156 m_outputStrides[i] > 0 ? m_outputStrides[i] : Index(1));
157 }
158 }
159
160 for (int i = 0; i < NumDims; ++i) {
161 m_inputStrides[i] = m_unshuffledInputStrides[shuffle[i]];
162 }
163 }
164
165 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
166
167 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
168 m_impl.evalSubExprsIfNeeded(NULL);
169 return true;
170 }
171
172#ifdef EIGEN_USE_THREADS
173 template <typename EvalSubExprsCallback>
174 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
175 EvaluatorPointerType, EvalSubExprsCallback done) {
176 m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
177 }
178#endif // EIGEN_USE_THREADS
179
180 EIGEN_STRONG_INLINE void cleanup() {
181 m_impl.cleanup();
182 }
183
184 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
185 {
186 if (m_is_identity) {
187 return m_impl.coeff(index);
188 } else {
189 return m_impl.coeff(srcCoeff(index));
190 }
191 }
192
193 template <int LoadMode, typename Self, bool ImplPacketAccess>
194 struct PacketLoader {
195 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
196 static PacketReturnType Run(const Self& self, Index index) {
197 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
198 EIGEN_UNROLL_LOOP
199 for (int i = 0; i < PacketSize; ++i) {
200 values[i] = self.coeff(index + i);
201 }
202 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
203 return rslt;
204 }
205 };
206
207 template<int LoadMode, typename Self>
208 struct PacketLoader<LoadMode, Self, true> {
209 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
210 static PacketReturnType Run(const Self& self, Index index) {
211 if (self.m_is_identity) {
212 return self.m_impl.template packet<LoadMode>(index);
213 } else {
214 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
215 EIGEN_UNROLL_LOOP
216 for (int i = 0; i < PacketSize; ++i) {
217 values[i] = self.coeff(index + i);
218 }
219 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
220 return rslt;
221 }
222 }
223 };
224
225 template<int LoadMode>
226 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
227 {
228 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
229 eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
230 return PacketLoader<LoadMode, Self, TensorEvaluator<ArgType, Device>::PacketAccess>::Run(*this, index);
231 }
232
233 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
234 internal::TensorBlockResourceRequirements getResourceRequirements() const {
235 static const int inner_dim =
236 Layout == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
237
238 const size_t target_size = m_device.firstLevelCacheSize();
239 const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim;
240
241 // Shuffled inner dimensions leads to a random memory access, which is not
242 // captured by default cost model bytes loaded/stored. We add this cost
243 // explicitly. The number of cycles picked based on the benchmarks.
244 // TODO(ezhulenev): This number was picked based on a very questionable
245 // benchmarks, add benchmarks that are representative of real workloads.
246 using BlockRequirements = internal::TensorBlockResourceRequirements;
247 if (inner_dim_shuffled) {
248 return BlockRequirements::uniform<Scalar>(target_size)
249 .addCostPerCoeff({0, 0, NumDims * 28});
250 } else {
251 return BlockRequirements::skewed<Scalar>(target_size);
252 }
253 }
254
255 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
256 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
257 bool root_of_expr_ast = false) const {
258 assert(m_impl.data() != NULL);
259
260 typedef internal::TensorBlockIO<ScalarNoConst, Index, NumDims, Layout>
261 TensorBlockIO;
262 typedef typename TensorBlockIO::Dst TensorBlockIODst;
263 typedef typename TensorBlockIO::Src TensorBlockIOSrc;
264
265 const typename TensorBlock::Storage block_storage =
266 TensorBlock::prepareStorage(
267 desc, scratch, /*allow_strided_storage=*/root_of_expr_ast);
268
269 typename TensorBlockIO::Dimensions input_strides(m_unshuffledInputStrides);
270 TensorBlockIOSrc src(input_strides, m_impl.data(), srcCoeff(desc.offset()));
271
272 TensorBlockIODst dst(block_storage.dimensions(), block_storage.strides(),
273 block_storage.data());
274
275 typename TensorBlockIO::DimensionsMap dst_to_src_dim_map(m_shuffle);
276 TensorBlockIO::Copy(dst, src, dst_to_src_dim_map);
277
278 return block_storage.AsTensorMaterializedBlock();
279 }
280
281 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
282 const double compute_cost = m_is_identity ? TensorOpCost::AddCost<Index>() :
283 NumDims * (2 * TensorOpCost::AddCost<Index>() +
284 2 * TensorOpCost::MulCost<Index>() +
285 TensorOpCost::DivCost<Index>());
286 return m_impl.costPerCoeff(vectorized) +
287 TensorOpCost(0, 0, compute_cost, m_is_identity /* vectorized */, PacketSize);
288 }
289
290 EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
291
292#ifdef EIGEN_USE_SYCL
293 // binding placeholder accessors to a command group handler for SYCL
294 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
295 m_impl.bind(cgh);
296 }
297#endif
298 protected:
299 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index GetBlockOutputIndex(
300 Index input_index,
301 const DSizes<Index, NumDims>& input_block_strides,
302 const DSizes<Index, NumDims>& output_block_strides,
303 const DSizes<internal::TensorIntDivisor<Index>, NumDims>& fast_input_block_strides) const {
304 Index output_index = 0;
305 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
306 for (int i = NumDims - 1; i > 0; --i) {
307 const Index idx = input_index / fast_input_block_strides[i];
308 output_index += idx * output_block_strides[m_inverseShuffle[i]];
309 input_index -= idx * input_block_strides[i];
310 }
311 return output_index + input_index *
312 output_block_strides[m_inverseShuffle[0]];
313 } else {
314 for (int i = 0; i < NumDims - 1; ++i) {
315 const Index idx = input_index / fast_input_block_strides[i];
316 output_index += idx * output_block_strides[m_inverseShuffle[i]];
317 input_index -= idx * input_block_strides[i];
318 }
319 return output_index + input_index *
320 output_block_strides[m_inverseShuffle[NumDims - 1]];
321 }
322 }
323
324 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const {
325 Index inputIndex = 0;
326 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
327 for (int i = NumDims - 1; i > 0; --i) {
328 const Index idx = index / m_fastOutputStrides[i];
329 inputIndex += idx * m_inputStrides[i];
330 index -= idx * m_outputStrides[i];
331 }
332 return inputIndex + index * m_inputStrides[0];
333 } else {
334 for (int i = 0; i < NumDims - 1; ++i) {
335 const Index idx = index / m_fastOutputStrides[i];
336 inputIndex += idx * m_inputStrides[i];
337 index -= idx * m_outputStrides[i];
338 }
339 return inputIndex + index * m_inputStrides[NumDims - 1];
340 }
341 }
342
343 Dimensions m_dimensions;
344 bool m_is_identity;
345 array<int, NumDims> m_shuffle;
346 array<Index, NumDims> m_inverseShuffle; // TODO(ezhulenev): Make it int type.
347 array<Index, NumDims> m_outputStrides;
348 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
349 array<Index, NumDims> m_inputStrides;
350 array<Index, NumDims> m_unshuffledInputStrides;
351
352 const Device EIGEN_DEVICE_REF m_device;
353 TensorEvaluator<ArgType, Device> m_impl;
354};
355
356
357// Eval as lvalue
358template<typename Shuffle, typename ArgType, typename Device>
359struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
360 : public TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
361{
362 typedef TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> Base;
363
364 typedef TensorShufflingOp<Shuffle, ArgType> XprType;
365 typedef typename XprType::Index Index;
366 static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
367 typedef DSizes<Index, NumDims> Dimensions;
368 typedef typename XprType::Scalar Scalar;
369 typedef typename XprType::CoeffReturnType CoeffReturnType;
370 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
371 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
372
373 enum {
374 IsAligned = false,
375 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
376 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
377 PreferBlockAccess = true,
378 Layout = TensorEvaluator<ArgType, Device>::Layout,
379 RawAccess = false
380 };
381
382 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
383
384 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
385 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
386 //===--------------------------------------------------------------------===//
387
388 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
389 : Base(op, device)
390 { }
391
392 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
393 {
394 return this->m_impl.coeffRef(this->srcCoeff(index));
395 }
396
397 template <int StoreMode> EIGEN_STRONG_INLINE
398 void writePacket(Index index, const PacketReturnType& x)
399 {
400 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
401
402 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
403 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
404 EIGEN_UNROLL_LOOP
405 for (int i = 0; i < PacketSize; ++i) {
406 this->coeffRef(index+i) = values[i];
407 }
408 }
409
410 template <typename TensorBlock>
411 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
412 const TensorBlockDesc& desc, const TensorBlock& block) {
413 eigen_assert(this->m_impl.data() != NULL);
414
415 typedef internal::TensorBlockIO<ScalarNoConst, Index, NumDims, Layout>
416 TensorBlockIO;
417 typedef typename TensorBlockIO::Dst TensorBlockIODst;
418 typedef typename TensorBlockIO::Src TensorBlockIOSrc;
419
420 const Scalar* block_buffer = block.data();
421
422 // TODO(ezhulenev): TensorBlockIO should be able to read from any Eigen
423 // expression with coefficient and packet access as `src`.
424 void* mem = NULL;
425 if (block_buffer == NULL) {
426 mem = this->m_device.allocate(desc.size() * sizeof(Scalar));
427 ScalarNoConst* buf = static_cast<ScalarNoConst*>(mem);
428
429 typedef internal::TensorBlockAssignment<
430 ScalarNoConst, NumDims, typename TensorBlock::XprType, Index>
431 TensorBlockAssignment;
432
433 TensorBlockAssignment::Run(
434 TensorBlockAssignment::target(
435 desc.dimensions(), internal::strides<Layout>(desc.dimensions()),
436 buf),
437 block.expr());
438
439 block_buffer = buf;
440 }
441
442 // Read from block.
443 TensorBlockIOSrc src(internal::strides<Layout>(desc.dimensions()),
444 block_buffer);
445
446 // Write to the output buffer.
447 typename TensorBlockIO::Dimensions output_strides(
448 this->m_unshuffledInputStrides);
449 typename TensorBlockIO::Dimensions output_dimensions;
450 for (int i = 0; i < NumDims; ++i) {
451 output_dimensions[this->m_shuffle[i]] = desc.dimension(i);
452 }
453 TensorBlockIODst dst(output_dimensions, output_strides, this->m_impl.data(),
454 this->srcCoeff(desc.offset()));
455
456 // Reorder dimensions according to the shuffle.
457 typename TensorBlockIO::DimensionsMap dst_to_src_dim_map;
458 for (int i = 0; i < NumDims; ++i) {
459 dst_to_src_dim_map[i] = static_cast<int>(this->m_inverseShuffle[i]);
460 }
461 TensorBlockIO::Copy(dst, src, dst_to_src_dim_map);
462
463 // Deallocate temporary buffer used for the block materialization.
464 if (mem != NULL) this->m_device.deallocate(mem);
465 }
466};
467
468
469} // end namespace Eigen
470
471#endif // EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index