Please, help us to better know about our user community by answering the following short survey: https://forms.gle/wpyrxWi18ox9Z5ae9
 
Loading...
Searching...
No Matches
TensorPadding.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
12
13namespace Eigen {
14
22namespace internal {
23template<typename PaddingDimensions, typename XprType>
24struct traits<TensorPaddingOp<PaddingDimensions, XprType> > : public traits<XprType>
25{
26 typedef typename XprType::Scalar Scalar;
27 typedef traits<XprType> XprTraits;
28 typedef typename XprTraits::StorageKind StorageKind;
29 typedef typename XprTraits::Index Index;
30 typedef typename XprType::Nested Nested;
31 typedef typename remove_reference<Nested>::type _Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
34 typedef typename XprTraits::PointerType PointerType;
35};
36
37template<typename PaddingDimensions, typename XprType>
38struct eval<TensorPaddingOp<PaddingDimensions, XprType>, Eigen::Dense>
39{
40 typedef const TensorPaddingOp<PaddingDimensions, XprType>& type;
41};
42
43template<typename PaddingDimensions, typename XprType>
44struct nested<TensorPaddingOp<PaddingDimensions, XprType>, 1, typename eval<TensorPaddingOp<PaddingDimensions, XprType> >::type>
45{
46 typedef TensorPaddingOp<PaddingDimensions, XprType> type;
47};
48
49} // end namespace internal
50
51
52
53template<typename PaddingDimensions, typename XprType>
54class TensorPaddingOp : public TensorBase<TensorPaddingOp<PaddingDimensions, XprType>, ReadOnlyAccessors>
55{
56 public:
57 typedef typename Eigen::internal::traits<TensorPaddingOp>::Scalar Scalar;
58 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
59 typedef typename XprType::CoeffReturnType CoeffReturnType;
60 typedef typename Eigen::internal::nested<TensorPaddingOp>::type Nested;
61 typedef typename Eigen::internal::traits<TensorPaddingOp>::StorageKind StorageKind;
62 typedef typename Eigen::internal::traits<TensorPaddingOp>::Index Index;
63
64 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType& expr, const PaddingDimensions& padding_dims, const Scalar padding_value)
65 : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {}
66
67 EIGEN_DEVICE_FUNC
68 const PaddingDimensions& padding() const { return m_padding_dims; }
69 EIGEN_DEVICE_FUNC
70 Scalar padding_value() const { return m_padding_value; }
71
72 EIGEN_DEVICE_FUNC
73 const typename internal::remove_all<typename XprType::Nested>::type&
74 expression() const { return m_xpr; }
75
76 protected:
77 typename XprType::Nested m_xpr;
78 const PaddingDimensions m_padding_dims;
79 const Scalar m_padding_value;
80};
81
82
83// Eval as rvalue
84template<typename PaddingDimensions, typename ArgType, typename Device>
85struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device>
86{
87 typedef TensorPaddingOp<PaddingDimensions, ArgType> XprType;
88 typedef typename XprType::Index Index;
89 static const int NumDims = internal::array_size<PaddingDimensions>::value;
90 typedef DSizes<Index, NumDims> Dimensions;
91 typedef typename XprType::Scalar Scalar;
92 typedef typename XprType::CoeffReturnType CoeffReturnType;
93 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
94 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
95 typedef StorageMemory<CoeffReturnType, Device> Storage;
96 typedef typename Storage::Type EvaluatorPointerType;
97
98 enum {
99 IsAligned = true,
100 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
101 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
102 PreferBlockAccess = true,
103 Layout = TensorEvaluator<ArgType, Device>::Layout,
104 CoordAccess = true,
105 RawAccess = false
106 };
107
108 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
109
110 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
111 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
112 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
113
114 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
115 Layout, Index>
116 TensorBlock;
117 //===--------------------------------------------------------------------===//
118
119 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
120 : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()), m_device(device)
121 {
122 // The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead
123 // to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector
124 // of 1 element first and then pad.
125 EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
126
127 // Compute dimensions
128 m_dimensions = m_impl.dimensions();
129 for (int i = 0; i < NumDims; ++i) {
130 m_dimensions[i] += m_padding[i].first + m_padding[i].second;
131 }
132 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
133 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
134 m_inputStrides[0] = 1;
135 m_outputStrides[0] = 1;
136 for (int i = 1; i < NumDims; ++i) {
137 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
138 m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
139 }
140 m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
141 } else {
142 m_inputStrides[NumDims - 1] = 1;
143 m_outputStrides[NumDims] = 1;
144 for (int i = NumDims - 2; i >= 0; --i) {
145 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
146 m_outputStrides[i+1] = m_outputStrides[i+2] * m_dimensions[i+1];
147 }
148 m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0];
149 }
150 }
151
152 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
153
154 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
155 m_impl.evalSubExprsIfNeeded(NULL);
156 return true;
157 }
158
159#ifdef EIGEN_USE_THREADS
160 template <typename EvalSubExprsCallback>
161 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
162 EvaluatorPointerType, EvalSubExprsCallback done) {
163 m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
164 }
165#endif // EIGEN_USE_THREADS
166
167 EIGEN_STRONG_INLINE void cleanup() {
168 m_impl.cleanup();
169 }
170
171 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
172 {
173 eigen_assert(index < dimensions().TotalSize());
174 Index inputIndex = 0;
175 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
176 EIGEN_UNROLL_LOOP
177 for (int i = NumDims - 1; i > 0; --i) {
178 const Index idx = index / m_outputStrides[i];
179 if (isPaddingAtIndexForDim(idx, i)) {
180 return m_paddingValue;
181 }
182 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
183 index -= idx * m_outputStrides[i];
184 }
185 if (isPaddingAtIndexForDim(index, 0)) {
186 return m_paddingValue;
187 }
188 inputIndex += (index - m_padding[0].first);
189 } else {
190 EIGEN_UNROLL_LOOP
191 for (int i = 0; i < NumDims - 1; ++i) {
192 const Index idx = index / m_outputStrides[i+1];
193 if (isPaddingAtIndexForDim(idx, i)) {
194 return m_paddingValue;
195 }
196 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
197 index -= idx * m_outputStrides[i+1];
198 }
199 if (isPaddingAtIndexForDim(index, NumDims-1)) {
200 return m_paddingValue;
201 }
202 inputIndex += (index - m_padding[NumDims-1].first);
203 }
204 return m_impl.coeff(inputIndex);
205 }
206
207 template<int LoadMode>
208 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
209 {
210 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
211 return packetColMajor(index);
212 }
213 return packetRowMajor(index);
214 }
215
216 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
217 TensorOpCost cost = m_impl.costPerCoeff(vectorized);
218 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
219 EIGEN_UNROLL_LOOP
220 for (int i = 0; i < NumDims; ++i)
221 updateCostPerDimension(cost, i, i == 0);
222 } else {
223 EIGEN_UNROLL_LOOP
224 for (int i = NumDims - 1; i >= 0; --i)
225 updateCostPerDimension(cost, i, i == NumDims - 1);
226 }
227 return cost;
228 }
229
230 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
231 internal::TensorBlockResourceRequirements getResourceRequirements() const {
232 const size_t target_size = m_device.lastLevelCacheSize();
233 return internal::TensorBlockResourceRequirements::merge(
234 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
235 m_impl.getResourceRequirements());
236 }
237
238 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
239 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
240 bool /*root_of_expr_ast*/ = false) const {
241 // If one of the dimensions is zero, return empty block view.
242 if (desc.size() == 0) {
243 return TensorBlock(internal::TensorBlockKind::kView, NULL,
244 desc.dimensions());
245 }
246
247 static const bool IsColMajor = Layout == static_cast<int>(ColMajor);
248 const int inner_dim_idx = IsColMajor ? 0 : NumDims - 1;
249
250 Index offset = desc.offset();
251
252 // Compute offsets in the output tensor corresponding to the desc.offset().
253 DSizes<Index, NumDims> output_offsets;
254 for (int i = NumDims - 1; i > 0; --i) {
255 const int dim = IsColMajor ? i : NumDims - i - 1;
256 const int stride_dim = IsColMajor ? dim : dim + 1;
257 output_offsets[dim] = offset / m_outputStrides[stride_dim];
258 offset -= output_offsets[dim] * m_outputStrides[stride_dim];
259 }
260 output_offsets[inner_dim_idx] = offset;
261
262 // Offsets in the input corresponding to output offsets.
263 DSizes<Index, NumDims> input_offsets = output_offsets;
264 for (int i = 0; i < NumDims; ++i) {
265 const int dim = IsColMajor ? i : NumDims - i - 1;
266 input_offsets[dim] = input_offsets[dim] - m_padding[dim].first;
267 }
268
269 // Compute offset in the input buffer (at this point it might be illegal and
270 // point outside of the input buffer, because we don't check for negative
271 // offsets, it will be autocorrected in the block iteration loop below).
272 Index input_offset = 0;
273 for (int i = 0; i < NumDims; ++i) {
274 const int dim = IsColMajor ? i : NumDims - i - 1;
275 input_offset += input_offsets[dim] * m_inputStrides[dim];
276 }
277
278 // Destination buffer and scratch buffer both indexed from 0 and have the
279 // same dimensions as the requested block (for destination buffer this
280 // property is guaranteed by `desc.destination()`).
281 Index output_offset = 0;
282 const DSizes<Index, NumDims> output_strides =
283 internal::strides<Layout>(desc.dimensions());
284
285 // NOTE(ezhulenev): We initialize bock iteration state for `NumDims - 1`
286 // dimensions, skipping innermost dimension. In theory it should be possible
287 // to squeeze matching innermost dimensions, however in practice that did
288 // not show any improvements in benchmarks. Also in practice first outer
289 // dimension usually has padding, and will prevent squeezing.
290
291 // Initialize output block iterator state. Dimension in this array are
292 // always in inner_most -> outer_most order (col major layout).
293 array<BlockIteratorState, NumDims - 1> it;
294 for (int i = 0; i < NumDims - 1; ++i) {
295 const int dim = IsColMajor ? i + 1 : NumDims - i - 2;
296 it[i].count = 0;
297 it[i].size = desc.dimension(dim);
298
299 it[i].input_stride = m_inputStrides[dim];
300 it[i].input_span = it[i].input_stride * (it[i].size - 1);
301
302 it[i].output_stride = output_strides[dim];
303 it[i].output_span = it[i].output_stride * (it[i].size - 1);
304 }
305
306 const Index input_inner_dim_size =
307 static_cast<Index>(m_impl.dimensions()[inner_dim_idx]);
308
309 // Total output size.
310 const Index output_size = desc.size();
311
312 // We will fill inner dimension of this size in the output. It might be
313 // larger than the inner dimension in the input, so we might have to pad
314 // before/after we copy values from the input inner dimension.
315 const Index output_inner_dim_size = desc.dimension(inner_dim_idx);
316
317 // How many values to fill with padding BEFORE reading from the input inner
318 // dimension.
319 const Index output_inner_pad_before_size =
320 input_offsets[inner_dim_idx] < 0
321 ? numext::mini(numext::abs(input_offsets[inner_dim_idx]),
322 output_inner_dim_size)
323 : 0;
324
325 // How many values we can actually copy from the input inner dimension.
326 const Index output_inner_copy_size = numext::mini(
327 // Want to copy from input.
328 (output_inner_dim_size - output_inner_pad_before_size),
329 // Can copy from input.
330 numext::maxi(input_inner_dim_size - (input_offsets[inner_dim_idx] +
331 output_inner_pad_before_size),
332 Index(0)));
333
334 eigen_assert(output_inner_copy_size >= 0);
335
336 // How many values to fill with padding AFTER reading from the input inner
337 // dimension.
338 const Index output_inner_pad_after_size =
339 (output_inner_dim_size - output_inner_copy_size -
340 output_inner_pad_before_size);
341
342 // Sanity check, sum of all sizes must be equal to the output size.
343 eigen_assert(output_inner_dim_size ==
344 (output_inner_pad_before_size + output_inner_copy_size +
345 output_inner_pad_after_size));
346
347 // Keep track of current coordinates and padding in the output.
348 DSizes<Index, NumDims> output_coord = output_offsets;
349 DSizes<Index, NumDims> output_padded;
350 for (int i = 0; i < NumDims; ++i) {
351 const int dim = IsColMajor ? i : NumDims - i - 1;
352 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
353 }
354
355 typedef internal::StridedLinearBufferCopy<ScalarNoConst, Index> LinCopy;
356
357 // Prepare storage for the materialized padding result.
358 const typename TensorBlock::Storage block_storage =
359 TensorBlock::prepareStorage(desc, scratch);
360
361 // TODO(ezhulenev): Squeeze multiple non-padded inner dimensions into a
362 // single logical inner dimension.
363
364 // When possible we squeeze writes for the innermost (only if non-padded)
365 // dimension with the first padded dimension. This allows to reduce the
366 // number of calls to LinCopy and better utilize vector instructions.
367 const bool squeeze_writes =
368 NumDims > 1 &&
369 // inner dimension is not padded
370 (input_inner_dim_size == m_dimensions[inner_dim_idx]) &&
371 // and equal to the block inner dimension
372 (input_inner_dim_size == output_inner_dim_size);
373
374 const int squeeze_dim = IsColMajor ? inner_dim_idx + 1 : inner_dim_idx - 1;
375
376 // Maximum coordinate on a squeeze dimension that we can write to.
377 const Index squeeze_max_coord =
378 squeeze_writes ? numext::mini(
379 // max non-padded element in the input
380 static_cast<Index>(m_dimensions[squeeze_dim] -
381 m_padding[squeeze_dim].second),
382 // max element in the output buffer
383 static_cast<Index>(output_offsets[squeeze_dim] +
384 desc.dimension(squeeze_dim)))
385 : static_cast<Index>(0);
386
387 // Iterate copying data from `m_impl.data()` to the output buffer.
388 for (Index size = 0; size < output_size;) {
389 // Detect if we are in the padded region (exclude innermost dimension).
390 bool is_padded = false;
391 for (int j = 1; j < NumDims; ++j) {
392 const int dim = IsColMajor ? j : NumDims - j - 1;
393 is_padded = output_padded[dim];
394 if (is_padded) break;
395 }
396
397 if (is_padded) {
398 // Fill single innermost dimension with padding value.
399 size += output_inner_dim_size;
400
401 LinCopy::template Run<LinCopy::Kind::FillLinear>(
402 typename LinCopy::Dst(output_offset, 1, block_storage.data()),
403 typename LinCopy::Src(0, 0, &m_paddingValue),
404 output_inner_dim_size);
405
406
407 } else if (squeeze_writes) {
408 // Squeeze multiple reads from innermost dimensions.
409 const Index squeeze_num = squeeze_max_coord - output_coord[squeeze_dim];
410 size += output_inner_dim_size * squeeze_num;
411
412 // Copy `squeeze_num` inner dimensions from input to output.
413 LinCopy::template Run<LinCopy::Kind::Linear>(
414 typename LinCopy::Dst(output_offset, 1, block_storage.data()),
415 typename LinCopy::Src(input_offset, 1, m_impl.data()),
416 output_inner_dim_size * squeeze_num);
417
418 // Update iteration state for only `squeeze_num - 1` processed inner
419 // dimensions, because we have another iteration state update at the end
420 // of the loop that will update iteration state for the last inner
421 // processed dimension.
422 it[0].count += (squeeze_num - 1);
423 input_offset += it[0].input_stride * (squeeze_num - 1);
424 output_offset += it[0].output_stride * (squeeze_num - 1);
425 output_coord[squeeze_dim] += (squeeze_num - 1);
426
427 } else {
428 // Single read from innermost dimension.
429 size += output_inner_dim_size;
430
431 { // Fill with padding before copying from input inner dimension.
432 const Index out = output_offset;
433
434 LinCopy::template Run<LinCopy::Kind::FillLinear>(
435 typename LinCopy::Dst(out, 1, block_storage.data()),
436 typename LinCopy::Src(0, 0, &m_paddingValue),
437 output_inner_pad_before_size);
438 }
439
440 { // Copy data from input inner dimension.
441 const Index out = output_offset + output_inner_pad_before_size;
442 const Index in = input_offset + output_inner_pad_before_size;
443
444 eigen_assert(output_inner_copy_size == 0 || m_impl.data() != NULL);
445
446 LinCopy::template Run<LinCopy::Kind::Linear>(
447 typename LinCopy::Dst(out, 1, block_storage.data()),
448 typename LinCopy::Src(in, 1, m_impl.data()),
449 output_inner_copy_size);
450 }
451
452 { // Fill with padding after copying from input inner dimension.
453 const Index out = output_offset + output_inner_pad_before_size +
454 output_inner_copy_size;
455
456 LinCopy::template Run<LinCopy::Kind::FillLinear>(
457 typename LinCopy::Dst(out, 1, block_storage.data()),
458 typename LinCopy::Src(0, 0, &m_paddingValue),
459 output_inner_pad_after_size);
460 }
461 }
462
463 for (int j = 0; j < NumDims - 1; ++j) {
464 const int dim = IsColMajor ? j + 1 : NumDims - j - 2;
465
466 if (++it[j].count < it[j].size) {
467 input_offset += it[j].input_stride;
468 output_offset += it[j].output_stride;
469 output_coord[dim] += 1;
470 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
471 break;
472 }
473 it[j].count = 0;
474 input_offset -= it[j].input_span;
475 output_offset -= it[j].output_span;
476 output_coord[dim] -= it[j].size - 1;
477 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
478 }
479 }
480
481 return block_storage.AsTensorMaterializedBlock();
482 }
483
484 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
485
486#ifdef EIGEN_USE_SYCL
487 // binding placeholder accessors to a command group handler for SYCL
488 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
489 m_impl.bind(cgh);
490 }
491#endif
492
493 private:
494 struct BlockIteratorState {
495 BlockIteratorState()
496 : count(0),
497 size(0),
498 input_stride(0),
499 input_span(0),
500 output_stride(0),
501 output_span(0) {}
502
503 Index count;
504 Index size;
505 Index input_stride;
506 Index input_span;
507 Index output_stride;
508 Index output_span;
509 };
510
511 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
512 Index index, int dim_index) const {
513#if defined(EIGEN_HAS_INDEX_LIST)
514 return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
515 index < m_padding[dim_index].first) ||
516 (!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
517 index >= m_dimensions[dim_index] - m_padding[dim_index].second);
518#else
519 return (index < m_padding[dim_index].first) ||
520 (index >= m_dimensions[dim_index] - m_padding[dim_index].second);
521#endif
522 }
523
524 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero(
525 int dim_index) const {
526#if defined(EIGEN_HAS_INDEX_LIST)
527 return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
528#else
529 EIGEN_UNUSED_VARIABLE(dim_index);
530 return false;
531#endif
532 }
533
534 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero(
535 int dim_index) const {
536#if defined(EIGEN_HAS_INDEX_LIST)
537 return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
538#else
539 EIGEN_UNUSED_VARIABLE(dim_index);
540 return false;
541#endif
542 }
543
544
545 void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const {
546 const double in = static_cast<double>(m_impl.dimensions()[i]);
547 const double out = in + m_padding[i].first + m_padding[i].second;
548 if (out == 0)
549 return;
550 const double reduction = in / out;
551 cost *= reduction;
552 if (first) {
553 cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
554 reduction * (1 * TensorOpCost::AddCost<Index>()));
555 } else {
556 cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
557 2 * TensorOpCost::MulCost<Index>() +
558 reduction * (2 * TensorOpCost::MulCost<Index>() +
559 1 * TensorOpCost::DivCost<Index>()));
560 }
561 }
562
563 protected:
564
565 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
566 {
567 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
568 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
569
570 const Index initialIndex = index;
571 Index inputIndex = 0;
572 EIGEN_UNROLL_LOOP
573 for (int i = NumDims - 1; i > 0; --i) {
574 const Index firstIdx = index;
575 const Index lastIdx = index + PacketSize - 1;
576 const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i];
577 const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
578 const Index lastPaddedRight = m_outputStrides[i+1];
579
580 if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
581 // all the coefficient are in the padding zone.
582 return internal::pset1<PacketReturnType>(m_paddingValue);
583 }
584 else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
585 // all the coefficient are in the padding zone.
586 return internal::pset1<PacketReturnType>(m_paddingValue);
587 }
588 else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
589 // all the coefficient are between the 2 padding zones.
590 const Index idx = index / m_outputStrides[i];
591 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
592 index -= idx * m_outputStrides[i];
593 }
594 else {
595 // Every other case
596 return packetWithPossibleZero(initialIndex);
597 }
598 }
599
600 const Index lastIdx = index + PacketSize - 1;
601 const Index firstIdx = index;
602 const Index lastPaddedLeft = m_padding[0].first;
603 const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
604 const Index lastPaddedRight = m_outputStrides[1];
605
606 if (!isLeftPaddingCompileTimeZero(0) && lastIdx < lastPaddedLeft) {
607 // all the coefficient are in the padding zone.
608 return internal::pset1<PacketReturnType>(m_paddingValue);
609 }
610 else if (!isRightPaddingCompileTimeZero(0) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
611 // all the coefficient are in the padding zone.
612 return internal::pset1<PacketReturnType>(m_paddingValue);
613 }
614 else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
615 // all the coefficient are between the 2 padding zones.
616 inputIndex += (index - m_padding[0].first);
617 return m_impl.template packet<Unaligned>(inputIndex);
618 }
619 // Every other case
620 return packetWithPossibleZero(initialIndex);
621 }
622
623 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
624 {
625 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
626 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
627
628 const Index initialIndex = index;
629 Index inputIndex = 0;
630 EIGEN_UNROLL_LOOP
631 for (int i = 0; i < NumDims - 1; ++i) {
632 const Index firstIdx = index;
633 const Index lastIdx = index + PacketSize - 1;
634 const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i+1];
635 const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1];
636 const Index lastPaddedRight = m_outputStrides[i];
637
638 if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
639 // all the coefficient are in the padding zone.
640 return internal::pset1<PacketReturnType>(m_paddingValue);
641 }
642 else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
643 // all the coefficient are in the padding zone.
644 return internal::pset1<PacketReturnType>(m_paddingValue);
645 }
646 else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
647 // all the coefficient are between the 2 padding zones.
648 const Index idx = index / m_outputStrides[i+1];
649 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
650 index -= idx * m_outputStrides[i+1];
651 }
652 else {
653 // Every other case
654 return packetWithPossibleZero(initialIndex);
655 }
656 }
657
658 const Index lastIdx = index + PacketSize - 1;
659 const Index firstIdx = index;
660 const Index lastPaddedLeft = m_padding[NumDims-1].first;
661 const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second);
662 const Index lastPaddedRight = m_outputStrides[NumDims-1];
663
664 if (!isLeftPaddingCompileTimeZero(NumDims-1) && lastIdx < lastPaddedLeft) {
665 // all the coefficient are in the padding zone.
666 return internal::pset1<PacketReturnType>(m_paddingValue);
667 }
668 else if (!isRightPaddingCompileTimeZero(NumDims-1) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
669 // all the coefficient are in the padding zone.
670 return internal::pset1<PacketReturnType>(m_paddingValue);
671 }
672 else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
673 // all the coefficient are between the 2 padding zones.
674 inputIndex += (index - m_padding[NumDims-1].first);
675 return m_impl.template packet<Unaligned>(inputIndex);
676 }
677 // Every other case
678 return packetWithPossibleZero(initialIndex);
679 }
680
681 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
682 {
683 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
684 EIGEN_UNROLL_LOOP
685 for (int i = 0; i < PacketSize; ++i) {
686 values[i] = coeff(index+i);
687 }
688 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
689 return rslt;
690 }
691
692 Dimensions m_dimensions;
693 array<Index, NumDims+1> m_outputStrides;
694 array<Index, NumDims> m_inputStrides;
695 TensorEvaluator<ArgType, Device> m_impl;
696 PaddingDimensions m_padding;
697
698 Scalar m_paddingValue;
699
700 const Device EIGEN_DEVICE_REF m_device;
701};
702
703
704
705
706} // end namespace Eigen
707
708#endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index