Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels_UQ_PCE.hpp
Go to the documentation of this file.
1/*
2// @HEADER
3// ***********************************************************************
4//
5// Tpetra: Templated Linear Algebra Services Package
6// Copyright (2008) Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ************************************************************************
41// @HEADER
42*/
43
44#ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_UQ_PCE_HPP
45#define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_UQ_PCE_HPP
46
47//----------------------------------------------------------------------------
48// Specializations of Tpetra::MultiVector pack/unpack kernels for UQ::PCE
49//----------------------------------------------------------------------------
50
51#include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
54
55namespace Tpetra {
56namespace KokkosRefactor {
57namespace Details {
58
59 // Functors for implementing packAndPrepare and unpackAndCombine
60 // through parallel_for
61
62 template <typename DstView, typename SrcView, typename IdxView>
63 struct PackArraySingleColumn<
64 DstView, SrcView, IdxView,
65 typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
66 Kokkos::is_view_uq_pce<SrcView>::value >::type >
67 {
68 typedef typename DstView::execution_space execution_space;
69 typedef typename execution_space::size_type size_type;
70
71 static const unsigned BlockSize = 32;
72
73 DstView dst;
74 SrcView src;
75 IdxView idx;
76 size_t col;
77
78 PackArraySingleColumn(const DstView& dst_,
79 const SrcView& src_,
80 const IdxView& idx_,
81 size_t col_) :
82 dst(dst_), src(src_), idx(idx_), col(col_) {}
83
84 KOKKOS_INLINE_FUNCTION
85 void operator()( const size_type k ) const {
86 dst(k) = src(idx(k), col);
87 }
88
89 KOKKOS_INLINE_FUNCTION
90 void operator()( const size_type k, const size_type tidx ) const {
91 for (size_type i=tidx; i<Kokkos::dimension_scalar(dst); i+=BlockSize)
92 dst(k).fastAccessCoeff(i) = src(idx(k), col).fastAccessCoeff(i);
93 }
94
95 static void pack(const DstView& dst,
96 const SrcView& src,
97 const IdxView& idx,
98 size_t col) {
99 Kokkos::parallel_for(
100 Kokkos::MPVectorWorkConfig<execution_space>( idx.size(), BlockSize ),
101 PackArraySingleColumn(dst,src,idx,col) );
102 }
103 };
104
105 template <typename DstView, typename SrcView, typename IdxView>
106 struct PackArrayMultiColumn<
107 DstView, SrcView, IdxView,
108 typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
109 Kokkos::is_view_uq_pce<SrcView>::value >::type >
110 {
111 typedef typename DstView::execution_space execution_space;
112 typedef typename execution_space::size_type size_type;
113
114 static const unsigned BlockSize = 32;
115
116 DstView dst;
117 SrcView src;
118 IdxView idx;
119 size_t numCols;
120
121 PackArrayMultiColumn(const DstView& dst_,
122 const SrcView& src_,
123 const IdxView& idx_,
124 size_t numCols_) :
125 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
126
127 KOKKOS_INLINE_FUNCTION
128 void operator()( const size_type k ) const {
129 const typename IdxView::value_type localRow = idx(k);
130 const size_t offset = k*numCols;
131 for (size_t j = 0; j < numCols; ++j)
132 dst(offset + j) = src(localRow, j);
133 }
134
135 KOKKOS_INLINE_FUNCTION
136 void operator()( const size_type k, const size_type tidx ) const {
137 const typename IdxView::value_type localRow = idx(k);
138 const size_t offset = k*numCols;
139 for (size_t j = 0; j < numCols; ++j)
140 for (size_type i=tidx; i<Kokkos::dimension_scalar(dst); i+=BlockSize)
141 dst(offset + j).fastAccessCoeff(i) =
142 src(localRow, j).fastAccessCoeff(i);
143 }
144
145 static void pack(const DstView& dst,
146 const SrcView& src,
147 const IdxView& idx,
148 size_t numCols) {
149 Kokkos::parallel_for(
150 Kokkos::MPVectorWorkConfig<execution_space>( idx.size(), BlockSize ),
151 PackArrayMultiColumn(dst,src,idx,numCols) );
152 }
153 };
154
155 template <typename DstView, typename SrcView, typename IdxView,
156 typename ColView>
157 struct PackArrayMultiColumnVariableStride<
158 DstView, SrcView, IdxView, ColView,
159 typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
160 Kokkos::is_view_uq_pce<SrcView>::value >::type >
161 {
162 typedef typename DstView::execution_space execution_space;
163 typedef typename execution_space::size_type size_type;
164
165 static const unsigned BlockSize = 32;
166
167 DstView dst;
168 SrcView src;
169 IdxView idx;
170 ColView col;
171 size_t numCols;
172
174 const SrcView& src_,
175 const IdxView& idx_,
176 const ColView& col_,
177 size_t numCols_) :
178 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
179
180 KOKKOS_INLINE_FUNCTION
181 void operator()( const size_type k ) const {
182 const typename IdxView::value_type localRow = idx(k);
183 const size_t offset = k*numCols;
184 for (size_t j = 0; j < numCols; ++j)
185 dst(offset + j) = src(localRow, col(j));
186 }
187
188 KOKKOS_INLINE_FUNCTION
189 void operator()( const size_type k, const size_type tidx ) const {
190 const typename IdxView::value_type localRow = idx(k);
191 const size_t offset = k*numCols;
192 for (size_t j = 0; j < numCols; ++j)
193 for (size_type i=tidx; i<Kokkos::dimension_scalar(dst); i+=BlockSize)
194 dst(offset + j).fastAccessCoeff(i) =
195 src(localRow, col(j)).fastAccessCoeff(i);
196 }
197
198 static void pack(const DstView& dst,
199 const SrcView& src,
200 const IdxView& idx,
201 const ColView& col,
202 size_t numCols) {
203 Kokkos::parallel_for(
204 Kokkos::MPVectorWorkConfig<execution_space>( idx.size(), BlockSize ),
205 PackArrayMultiColumnVariableStride(dst,src,idx,col,numCols) );
206 }
207 };
208
209 template <typename ExecutionSpace,
210 typename DstView,
211 typename SrcView,
212 typename IdxView,
213 typename Op>
214 struct UnpackArrayMultiColumn<
215 ExecutionSpace, DstView, SrcView, IdxView, Op,
216 typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
217 Kokkos::is_view_uq_pce<SrcView>::value >::type >
218 {
219 typedef typename ExecutionSpace::execution_space execution_space;
220 typedef typename execution_space::size_type size_type;
221
222 static const unsigned BlockSize = 32;
223
224 DstView dst;
225 SrcView src;
226 IdxView idx;
227 Op op;
228 size_t numCols;
229
230 UnpackArrayMultiColumn (const ExecutionSpace& /* execSpace */,
231 const DstView& dst_,
232 const SrcView& src_,
233 const IdxView& idx_,
234 const Op& op_,
235 const size_t numCols_) :
236 dst (dst_),
237 src (src_),
238 idx (idx_),
239 op (op_),
240 numCols (numCols_)
241 {}
242
243 template <class TagType>
244 KOKKOS_INLINE_FUNCTION void
245 operator() (TagType tag, const size_type k) const
246 {
247 const typename IdxView::value_type localRow = idx(k);
248 const size_t offset = k*numCols;
249 for (size_t j = 0; j < numCols; ++j) {
250 op (tag, dst(localRow, j), src(offset+j));
251 }
252 }
253
254 template <class TagType>
255 KOKKOS_INLINE_FUNCTION void
256 operator() (TagType tag, const size_type k, const size_type tidx) const
257 {
258 const typename IdxView::value_type localRow = idx(k);
259 const size_t offset = k*numCols;
260 for (size_t j = 0; j < numCols; ++j) {
261 for (size_type i=tidx; i<Kokkos::dimension_scalar(dst); i+=BlockSize) {
262 op (tag, dst(localRow, j).fastAccessCoeff(i),
263 src(offset+j).fastAccessCoeff(i));
264 }
265 }
266 }
267
268 static void
269 unpack (const ExecutionSpace& execSpace,
270 const DstView& dst,
271 const SrcView& src,
272 const IdxView& idx,
273 const Op& op,
274 const size_t numCols,
275 const bool use_atomic_updates)
276 {
277 if (use_atomic_updates) {
278 Kokkos::parallel_for
279 ("Tpetra::MultiVector (Sacado::UQ::PCE) unpack (constant stride)",
281 idx.size (), BlockSize),
282 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
283 }
284 else {
285 Kokkos::parallel_for
286 ("Tpetra::MultiVector (Sacado::UQ::PCE) unpack (constant stride)",
288 idx.size (), BlockSize),
289 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
290 }
291 }
292 };
293
294 template <typename ExecutionSpace,
295 typename DstView,
296 typename SrcView,
297 typename IdxView,
298 typename ColView,
299 typename Op>
300 struct UnpackArrayMultiColumnVariableStride<
301 ExecutionSpace, DstView, SrcView, IdxView, ColView, Op,
302 typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
303 Kokkos::is_view_uq_pce<SrcView>::value >::type>
304 {
305 typedef typename ExecutionSpace::execution_space execution_space;
306 typedef typename execution_space::size_type size_type;
307
308 static const unsigned BlockSize = 32;
309
310 DstView dst;
311 SrcView src;
312 IdxView idx;
313 ColView col;
314 Op op;
315 size_t numCols;
316
317 UnpackArrayMultiColumnVariableStride (const ExecutionSpace& /* execSpace */,
318 const DstView& dst_,
319 const SrcView& src_,
320 const IdxView& idx_,
321 const ColView& col_,
322 const Op& op_,
323 size_t numCols_) :
324 dst(dst_), src(src_), idx(idx_), col(col_), op(op_), numCols(numCols_) {}
325
326 template <class TagType>
327 KOKKOS_INLINE_FUNCTION void
328 operator()( TagType tag, const size_type k ) const {
329 const typename IdxView::value_type localRow = idx(k);
330 const size_t offset = k*numCols;
331 for (size_t j = 0; j < numCols; ++j)
332 op( tag, dst(localRow,col(j)), src(offset+j) );
333 }
334
335 template <class TagType>
336 KOKKOS_INLINE_FUNCTION void
337 operator()( TagType tag, const size_type k, const size_type tidx ) const {
338 const typename IdxView::value_type localRow = idx(k);
339 const size_t offset = k*numCols;
340 for (size_t j = 0; j < numCols; ++j)
341 for (size_type i=tidx; i<Kokkos::dimension_scalar(dst); i+=BlockSize)
342 op( tag, dst(localRow,col(j)).fastAccessCoeff(i),
343 src(offset+j).fastAccessCoeff(i) );
344 }
345
346 static void
347 unpack (const ExecutionSpace& execSpace,
348 const DstView& dst,
349 const SrcView& src,
350 const IdxView& idx,
351 const ColView& col,
352 const Op& op,
353 const size_t numCols,
354 const bool use_atomic_updates)
355 {
356 if (use_atomic_updates) {
357 Kokkos::parallel_for
358 ("Tpetra::MultiVector unpack (Sacado::UQ::PCE) (nonconstant stride)",
360 idx.size (), BlockSize),
361 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
362 idx, col, op, numCols));
363 }
364 else {
365 Kokkos::parallel_for
366 ("Tpetra::MultiVector unpack (Sacado::UQ::PCE) (nonconstant stride)",
368 idx.size (), BlockSize),
369 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
370 idx, col, op, numCols));
371 }
372 }
373 };
374
375 template <typename DstView, typename SrcView,
376 typename DstIdxView, typename SrcIdxView, typename Op>
377 struct PermuteArrayMultiColumn<
378 DstView, SrcView, DstIdxView, SrcIdxView, Op,
379 typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
380 Kokkos::is_view_uq_pce<SrcView>::value >::type>
381 {
382 typedef typename DstView::execution_space execution_space;
383 typedef typename execution_space::size_type size_type;
384
385 static const unsigned BlockSize = 32;
386
387 DstView dst;
388 SrcView src;
389 DstIdxView dst_idx;
390 SrcIdxView src_idx;
391 size_t numCols;
392 Op op;
393
394 PermuteArrayMultiColumn(const DstView& dst_,
395 const SrcView& src_,
396 const DstIdxView& dst_idx_,
397 const SrcIdxView& src_idx_,
398 size_t numCols_, const Op& op_) :
399 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
400 numCols(numCols_), op(op_) {}
401
402 KOKKOS_INLINE_FUNCTION
403 void operator()( const size_type k ) const {
404 const typename DstIdxView::value_type toRow = dst_idx(k);
405 const typename SrcIdxView::value_type fromRow = src_idx(k);
406 nonatomic_tag tag; // permute does not need atomics
407 for (size_t j = 0; j < numCols; ++j)
408 op(tag, dst(toRow, j),src(fromRow, j));
409 }
410
411 KOKKOS_INLINE_FUNCTION
412 void operator()( const size_type k, const size_type tidx ) const {
413 const typename DstIdxView::value_type toRow = dst_idx(k);
414 const typename SrcIdxView::value_type fromRow = src_idx(k);
415 nonatomic_tag tag; // permute does not need atomics
416 for (size_t j = 0; j < numCols; ++j)
417 for (size_type i=tidx; i<Kokkos::dimension_scalar(dst); i+=BlockSize)
418 op(tag, dst(toRow, j).fastAccessCoeff(i),
419 src(fromRow, j).fastAccessCoeff(i));
420 }
421
422 static void permute(const DstView& dst,
423 const SrcView& src,
424 const DstIdxView& dst_idx,
425 const SrcIdxView& src_idx,
426 size_t numCols,
427 const Op& op) {
428 const size_type n = std::min( dst_idx.size(), src_idx.size() );
429 Kokkos::parallel_for(
431 PermuteArrayMultiColumn(dst,src,dst_idx,src_idx,numCols,op) );
432 }
433 };
434
435 template <typename DstView, typename SrcView,
436 typename DstIdxView, typename SrcIdxView,
437 typename DstColView, typename SrcColView, typename Op>
438 struct PermuteArrayMultiColumnVariableStride<
439 DstView, SrcView, DstIdxView, SrcIdxView, DstColView, SrcColView, Op,
440 typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
441 Kokkos::is_view_uq_pce<SrcView>::value >::type >
442 {
443 typedef typename DstView::execution_space execution_space;
444 typedef typename execution_space::size_type size_type;
445
446 static const unsigned BlockSize = 32;
447
448 DstView dst;
449 SrcView src;
450 DstIdxView dst_idx;
451 SrcIdxView src_idx;
452 DstColView dst_col;
453 SrcColView src_col;
454 size_t numCols;
455 Op op;
456
458 const SrcView& src_,
459 const DstIdxView& dst_idx_,
460 const SrcIdxView& src_idx_,
461 const DstColView& dst_col_,
462 const SrcColView& src_col_,
463 size_t numCols_,
464 const Op& op_) :
465 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
466 dst_col(dst_col_), src_col(src_col_),
467 numCols(numCols_), op(op_) {}
468
469 KOKKOS_INLINE_FUNCTION
470 void operator()( const size_type k ) const {
471 const typename DstIdxView::value_type toRow = dst_idx(k);
472 const typename SrcIdxView::value_type fromRow = src_idx(k);
473 nonatomic_tag tag; // permute does not need atomics
474 for (size_t j = 0; j < numCols; ++j)
475 op(tag, dst(toRow, dst_col(j)),src(fromRow, src_col(j)));
476 }
477
478 KOKKOS_INLINE_FUNCTION
479 void operator()( const size_type k, const size_type tidx ) const {
480 const typename DstIdxView::value_type toRow = dst_idx(k);
481 const typename SrcIdxView::value_type fromRow = src_idx(k);
482 nonatomic_tag tag; // permute does not need atomics
483 for (size_t j = 0; j < numCols; ++j)
484 for (size_type i=tidx; i<Kokkos::dimension_scalar(dst); i+=BlockSize)
485 op(tag, dst(toRow, dst_col(j)).fastAccessCoeff(i),
486 src(fromRow, src_col(j)).fastAccessCoeff(i));
487 }
488
489 static void permute(const DstView& dst,
490 const SrcView& src,
491 const DstIdxView& dst_idx,
492 const SrcIdxView& src_idx,
493 const DstColView& dst_col,
494 const SrcColView& src_col,
495 size_t numCols,
496 const Op& op) {
497 const size_type n = std::min( dst_idx.size(), src_idx.size() );
498 Kokkos::parallel_for(
500 PermuteArrayMultiColumnVariableStride(
501 dst,src,dst_idx,src_idx,dst_col,src_col,numCols,op) );
502 }
503 };
504
505} // Details namespace
506} // KokkosRefactor namespace
507} // Tpetra namespace
508
509#endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_UQ_PCE_HPP
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c fastAccessCoeff(j) - expr2.val(j)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P... > >::value, unsigned >::type dimension_scalar(const View< T, P... > &view)
Team-based parallel work configuration for Sacado::MP::Vector.