42#ifndef STOKHOS_CRSPRODUCTTENSOR_HPP
43#define STOKHOS_CRSPRODUCTTENSOR_HPP
45#include "Kokkos_Core.hpp"
50#include "Teuchos_ParameterList.hpp"
77template<
typename ValueType,
class ExecutionSpace,
class Memory =
void >
86 typedef typename Kokkos::ViewTraits< size_type*, execution_space,void,void >::host_mirror_space
host_mirror_space ;
87 typedef CrsProductTensor<value_type, host_mirror_space>
HostMirror;
105#if defined( KOKKOS_ENABLE_CUDA )
106 std::is_same<ExecutionSpace,Kokkos::Cuda>::value;
119 typedef Kokkos::View< value_type*, Kokkos::LayoutLeft, execution_space, memory_type >
vec_type;
120 typedef Kokkos::View< size_type*, Kokkos::LayoutLeft, execution_space, memory_type >
coord_array_type;
121 typedef Kokkos::View< size_type*[2], Kokkos::LayoutLeft, execution_space, memory_type >
coord2_array_type;
122 typedef Kokkos::View< value_type*, Kokkos::LayoutLeft, execution_space, memory_type >
value_array_type;
123 typedef Kokkos::View< size_type*, Kokkos::LayoutLeft, execution_space, memory_type >
entry_array_type;
124 typedef Kokkos::View< size_type*, Kokkos::LayoutLeft, execution_space, memory_type >
row_map_array_type;
155 KOKKOS_INLINE_FUNCTION
158 KOKKOS_INLINE_FUNCTION
172 KOKKOS_INLINE_FUNCTION
186 KOKKOS_INLINE_FUNCTION
188 operator = (
const CrsProductTensor<value_type,execution_space,M> & rhs )
204 KOKKOS_INLINE_FUNCTION
208 KOKKOS_INLINE_FUNCTION
212 KOKKOS_INLINE_FUNCTION
217 KOKKOS_INLINE_FUNCTION
222 KOKKOS_INLINE_FUNCTION
227 KOKKOS_INLINE_FUNCTION
232 KOKKOS_INLINE_FUNCTION
237 KOKKOS_INLINE_FUNCTION
242 KOKKOS_INLINE_FUNCTION
247 KOKKOS_INLINE_FUNCTION
252 KOKKOS_INLINE_FUNCTION
257 KOKKOS_INLINE_FUNCTION
262 KOKKOS_INLINE_FUNCTION
266 template <
typename OrdinalType>
270 const Teuchos::ParameterList& params = Teuchos::ParameterList())
285 std::vector< size_t > coord_work(
dimension, (
size_t) 0 );
287 for (
typename Cijk_type::i_iterator i_it=Cijk.
i_begin();
288 i_it!=Cijk.
i_end(); ++i_it) {
289 OrdinalType i = index(i_it);
290 for (
typename Cijk_type::ik_iterator k_it = Cijk.
k_begin(i_it);
291 k_it != Cijk.
k_end(i_it); ++k_it) {
292 OrdinalType k = index(k_it);
293 for (
typename Cijk_type::ikj_iterator j_it = Cijk.
j_begin(k_it);
294 j_it != Cijk.
j_end(k_it); ++j_it) {
295 OrdinalType
j = index(j_it);
312 coord_work[i] += pad;
318 std::vector< CijkRowCount > row_count(
dimension );
320 row_count[i].count = coord_work[i];
321 row_count[i].basis = i;
328 std::vector<size_type> sorted_row_map(
dimension );
330 coord_work[i] = row_count[i].count;
331 sorted_row_map[ row_count[i].basis ] = i;
351 typename coord_array_type::HostMirror
353 typename coord2_array_type::HostMirror
355 typename value_array_type::HostMirror
357 typename entry_array_type::HostMirror
359 typename entry_array_type::HostMirror
366 sum += coord_work[i];
367 host_row_map(i+1) = sum;
368 host_num_entry(i) = 0;
372 coord_work[iCoord] = host_row_map[iCoord];
381 for (
typename Cijk_type::i_iterator i_it=Cijk.
i_begin();
382 i_it!=Cijk.
i_end(); ++i_it) {
383 OrdinalType i = index(i_it);
385 for (
typename Cijk_type::ik_iterator k_it = Cijk.
k_begin(i_it);
386 k_it != Cijk.
k_end(i_it); ++k_it) {
387 OrdinalType k = index(k_it);
388 for (
typename Cijk_type::ikj_iterator j_it = Cijk.
j_begin(k_it);
389 j_it != Cijk.
j_end(k_it); ++j_it) {
390 OrdinalType
j = index(j_it);
391 ValueType c = Stokhos::value(j_it);
393 const size_type n = coord_work[row]; ++coord_work[row];
394 host_value(n) = (
j != k) ? c : 0.5*c;
395 host_coord2(n,0) =
j;
396 host_coord2(n,1) = k;
397 host_coord(n) = ( k << 16 ) |
j;
398 ++host_num_entry(row);
404 host_num_entry(row) =
448 typename coord_array_type::HostMirror
450 typename coord2_array_type::HostMirror
452 typename value_array_type::HostMirror
454 typename entry_array_type::HostMirror
456 typename entry_array_type::HostMirror
462 host_num_entry(0) = 1;
466 host_coord2(0,0) = 0;
467 host_coord2(0,1) = 0;
490 host_tensor.m_dim = tensor.
m_dim;
493 host_tensor.m_nnz = tensor.
m_nnz;
494 host_tensor.m_flops = tensor.
m_flops;
499 template <
class DstDevice,
class DstMemory >
501 deep_copy(
const CrsProductTensor<ValueType,DstDevice,DstMemory>& dst ,
512template<
class Device,
typename OrdinalType,
typename ValueType>
513CrsProductTensor<ValueType, Device>
517 const Teuchos::ParameterList& params = Teuchos::ParameterList())
522template<
class Device,
typename OrdinalType,
typename ValueType,
524CrsProductTensor<ValueType, Device, Memory>
528 const Teuchos::ParameterList& params = Teuchos::ParameterList())
531 basis, Cijk, params );
534template<
class Device,
typename OrdinalType,
typename ValueType>
535CrsProductTensor<ValueType, Device>
541template<
class Device,
typename OrdinalType,
typename ValueType,
543CrsProductTensor<ValueType, Device, Memory>
549template <
class ValueType,
class Device,
class Memory >
557 template <
class ValueType,
558 class DstDevice,
class DstMemory,
559 class SrcDevice,
class SrcMemory >
561deep_copy(
const CrsProductTensor<ValueType,DstDevice,DstMemory> & dst ,
562 const CrsProductTensor<ValueType,SrcDevice,SrcMemory> & src )
567template <
typename ValueType,
typename Device >
573 typedef CrsProductTensor< ValueType , execution_space >
tensor_type ;
578#define USE_AUTO_VECTORIZATION 1
580#define USE_AUTO_VECTORIZATION 0
583#if defined(__INTEL_COMPILER) && USE_AUTO_VECTORIZATION
586 template<
typename MatrixValue ,
typename VectorValue >
587 KOKKOS_INLINE_FUNCTION
588 static void apply(
const tensor_type & tensor ,
589 const MatrixValue *
const a ,
590 const VectorValue *
const x ,
591 VectorValue *
const y ,
592 const VectorValue & alpha = VectorValue(1) )
596 const size_type * cj = &tensor.coord(0,0);
597 const size_type * ck = &tensor.coord(0,1);
598 const size_type nDim = tensor.dimension();
600 for ( size_type iy = 0 ; iy < nDim ; ++iy ) {
601 const size_type nEntry = tensor.num_entry(iy);
602 const size_type iEntryBeg = tensor.entry_begin(iy);
603 const size_type iEntryEnd = iEntryBeg + nEntry;
604 VectorValue ytmp = 0;
606#pragma simd vectorlength(tensor_type::vectorsize)
608#pragma vector aligned
609 for (size_type iEntry = iEntryBeg; iEntry<iEntryEnd; ++iEntry) {
610 const size_type
j = cj[iEntry];
611 const size_type k = ck[iEntry];
612 ytmp += tensor.value(iEntry) * ( a[
j] * x[k] + a[k] * x[
j] );
615 y[iy] += alpha * ytmp ;
619#elif defined(__MIC__)
622 template<
typename MatrixValue ,
typename VectorValue >
623 KOKKOS_INLINE_FUNCTION
624 static void apply(
const tensor_type & tensor ,
625 const MatrixValue *
const a ,
626 const VectorValue *
const x ,
627 VectorValue *
const y ,
628 const VectorValue & alpha = VectorValue(1) )
630 const size_type nDim = tensor.dimension();
631 for ( size_type iy = 0 ; iy < nDim ; ++iy ) {
633 const size_type nEntry = tensor.num_entry(iy);
634 const size_type iEntryBeg = tensor.entry_begin(iy);
635 const size_type iEntryEnd = iEntryBeg + nEntry;
636 size_type iEntry = iEntryBeg;
638 VectorValue ytmp = 0 ;
640 const size_type nBlock = nEntry / tensor_type::vectorsize;
641 const size_type nEntryB = nBlock * tensor_type::vectorsize;
642 const size_type iEnd = iEntryBeg + nEntryB;
644 typedef TinyVec<ValueType,tensor_type::vectorsize,tensor_type::use_intrinsics> TV;
647 for (size_type block=0; block<nBlock; ++block, iEntry+=tensor_type::vectorsize) {
648 const size_type *
j = &tensor.coord(iEntry,0);
649 const size_type *k = &tensor.coord(iEntry,1);
650 TV aj(a,
j), ak(a, k), xj(x,
j), xk(x, k),
651 c(&(tensor.value(iEntry)));
655 aj.multiply_add(ak, xj);
656 vy.multiply_add(c, aj);
663 const size_type rem = iEntryEnd-iEntry;
665 typedef TinyVec<ValueType,8,tensor_type::use_intrinsics> TV2;
666 const size_type *
j = &tensor.coord(iEntry,0);
667 const size_type *k = &tensor.coord(iEntry,1);
668 TV2 aj(a,
j), ak(a, k), xj(x,
j), xk(x, k),
669 c(&(tensor.value(iEntry)));
673 aj.multiply_add(ak, xj);
678 y[iy] += alpha * ytmp ;
685 template<
typename MatrixValue ,
typename VectorValue >
686 KOKKOS_INLINE_FUNCTION
688 const MatrixValue *
const a ,
689 const VectorValue *
const x ,
690 VectorValue *
const y ,
691 const VectorValue & alpha = VectorValue(1) )
693 const size_type nDim = tensor.dimension();
694 for (
size_type iy = 0 ; iy < nDim ; ++iy ) {
696 const size_type nEntry = tensor.num_entry(iy);
697 const size_type iEntryBeg = tensor.entry_begin(iy);
698 const size_type iEntryEnd = iEntryBeg + nEntry;
701 VectorValue ytmp = 0 ;
704 if (tensor_type::vectorsize > 1 && nEntry >= tensor_type::vectorsize) {
705 const size_type nBlock = nEntry / tensor_type::vectorsize;
706 const size_type nEntryB = nBlock * tensor_type::vectorsize;
707 const size_type iEnd = iEntryBeg + nEntryB;
709 typedef TinyVec<ValueType,tensor_type::vectorsize,tensor_type::use_intrinsics> TV;
712 for (; iEntry<iEnd; iEntry+=tensor_type::vectorsize) {
714 const size_type *k = &tensor.coord(iEntry,1);
715 TV aj(a,
j), ak(a, k), xj(x,
j), xk(x, k), c(&(tensor.value(iEntry)));
719 aj.multiply_add(ak, xj);
720 vy.multiply_add(c, aj);
726 for ( ; iEntry<iEntryEnd; ++iEntry) {
728 const size_type k = tensor.coord(iEntry,1);
730 ytmp += tensor.value(iEntry) * ( a[
j] * x[k] + a[k] * x[
j] );
733 y[iy] += alpha * ytmp ;
738 KOKKOS_INLINE_FUNCTION
740 {
return tensor.dimension(); }
742 KOKKOS_INLINE_FUNCTION
744 {
return tensor.dimension(); }
755template<
typename ValueType ,
typename MatrixValue ,
typename VectorValue ,
761 typedef CrsProductTensor< ValueType , execution_space >
tensor_type;
762 typedef StochasticProductTensor< ValueType, tensor_type, execution_space >
BlockSpec;
764 typedef Kokkos::View< VectorValue** , Kokkos::LayoutLeft , execution_space >
block_vector_type ;
765 typedef BlockCrsMatrix< BlockSpec , MatrixValue , execution_space >
matrix_type ;
785 KOKKOS_INLINE_FUNCTION
790 VectorValue *
const y = &
m_y(0,iBlockRow);
792 const size_type iEntryBegin =
m_A.graph.row_map[ iBlockRow ];
793 const size_type iEntryEnd =
m_A.graph.row_map[ iBlockRow + 1 ];
798 for (
size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
799 const VectorValue *
const x = &
m_x( 0 ,
m_A.graph.entries(iEntry) );
800 const MatrixValue *
const a = &
m_A.values( 0 , iEntry );
811 KOKKOS_INLINE_FUNCTION
812 std::pair< size_type , size_type >
817 enum { work_align = 64 /
sizeof(VectorValue) };
819 enum { work_mask = work_align - 1 };
822 ( ( ( ( work_count + work_mask ) >> work_shift ) + thread_count - 1 ) /
823 thread_count ) << work_shift ;
826 std::min( thread_rank * work_per_thread , work_count );
828 std::min( work_begin + work_per_thread , work_count );
830 return std::make_pair( work_begin , work_end );
837 KOKKOS_INLINE_FUNCTION
838 void operator()(
const typename Kokkos::TeamPolicy< execution_space >::member_type &
device )
const
843 const size_type row_count =
m_A.graph.row_map.extent(0)-1;
844 if (iBlockRow >= row_count)
849 std::pair<size_type,size_type> work_range =
854 VectorValue *
const y = &
m_y(0,iBlockRow);
857 for (
size_type j = work_range.first ;
j < work_range.second ; ++
j )
862 const size_type iBlockEntryBeg =
m_A.graph.row_map[ iBlockRow ];
863 const size_type iBlockEntryEnd =
m_A.graph.row_map[ iBlockRow + 1 ];
866 (iBlockEntryEnd-iBlockEntryBeg+BlockSize-1) / BlockSize;
868 const MatrixValue* sh_A[BlockSize];
869 const VectorValue* sh_x[BlockSize];
872 for (
size_type block = 0; block<numBlock; ++block, iBlockEntry+=BlockSize) {
874 block == numBlock-1 ? iBlockEntryEnd-iBlockEntry : BlockSize;
876 for (
size_type col = 0; col < block_size; ++col ) {
877 const size_type iBlockColumn =
m_A.graph.entries( iBlockEntry + col );
878 sh_x[col] = &
m_x( 0 , iBlockColumn );
879 sh_A[col] = &
m_A.values( 0 , iBlockEntry + col );
882 for (
size_type iy = work_range.first ; iy < work_range.second ; ++iy ) {
884 const size_type nEntry = tensor.num_entry(iy);
885 const size_type iEntryBeg = tensor.entry_begin(iy);
886 const size_type iEntryEnd = iEntryBeg + nEntry;
889 VectorValue ytmp = 0 ;
892 const size_type nBlock = nEntry / tensor_type::vectorsize;
893 const size_type nEntryB = nBlock * tensor_type::vectorsize;
894 const size_type iEnd = iEntryBeg + nEntryB;
896 typedef TinyVec<ValueType,tensor_type::vectorsize,tensor_type::use_intrinsics> ValTV;
897 typedef TinyVec<MatrixValue,tensor_type::vectorsize,tensor_type::use_intrinsics> MatTV;
898 typedef TinyVec<VectorValue,tensor_type::vectorsize,tensor_type::use_intrinsics> VecTV;
901 for (
size_type block=0; block<nBlock; ++block, iEntry+=tensor_type::vectorsize) {
903 const size_type *k = &tensor.coord(iEntry,1);
904 ValTV c(&(tensor.value(iEntry)));
906 for (
size_type col = 0; col < block_size; ++col ) {
907 MatTV aj(sh_A[col],
j), ak(sh_A[col], k);
908 VecTV xj(sh_x[col],
j), xk(sh_x[col], k);
912 aj.multiply_add(ak, xj);
913 vy.multiply_add(c, aj);
922 typedef TinyVec<ValueType,8,tensor_type::use_intrinsics> ValTV2;
923 typedef TinyVec<MatrixValue,8,tensor_type::use_intrinsics> MatTV2;
924 typedef TinyVec<VectorValue,8,tensor_type::use_intrinsics> VecTV2;
926 const size_type *k = &tensor.coord(iEntry,1);
927 ValTV2 c(&(tensor.value(iEntry)));
929 for (
size_type col = 0; col < block_size; ++col ) {
930 MatTV2 aj(sh_A[col],
j), ak(sh_A[col], k);
931 VecTV2 xj(sh_x[col],
j), xk(sh_x[col], k);
935 aj.multiply_add(ak, xj);
957 KOKKOS_INLINE_FUNCTION
958 void operator()(
const typename Kokkos::TeamPolicy< execution_space >::member_type &
device )
const
963 const size_type row_count =
m_A.graph.row_map.extent(0)-1;
964 if (iBlockRow >= row_count)
969 std::pair<size_type,size_type> work_range =
974 VectorValue *
const y = &
m_y(0,iBlockRow);
977 for (
size_type j = work_range.first ;
j < work_range.second ; ++
j )
982 const size_type iBlockEntryBeg =
m_A.graph.row_map[ iBlockRow ];
983 const size_type iBlockEntryEnd =
m_A.graph.row_map[ iBlockRow + 1 ];
986 (iBlockEntryEnd-iBlockEntryBeg+BlockSize-1) / BlockSize;
988 const MatrixValue* sh_A[BlockSize];
989 const VectorValue* sh_x[BlockSize];
992 for (
size_type block = 0; block<numBlock; ++block, iBlockEntry+=BlockSize) {
994 block == numBlock-1 ? iBlockEntryEnd-iBlockEntry : BlockSize;
996 for (
size_type col = 0; col < block_size; ++col ) {
997 const size_type iBlockColumn =
m_A.graph.entries( iBlockEntry + col );
998 sh_x[col] = &
m_x( 0 , iBlockColumn );
999 sh_A[col] = &
m_A.values( 0 , iBlockEntry + col );
1002 for (
size_type iy = work_range.first ; iy < work_range.second ; ++iy ) {
1004 const size_type nEntry = tensor.num_entry(iy);
1005 const size_type iEntryBeg = tensor.entry_begin(iy);
1006 const size_type iEntryEnd = iEntryBeg + nEntry;
1009 VectorValue ytmp = 0 ;
1012 if (tensor_type::vectorsize > 1 && nEntry >= tensor_type::vectorsize) {
1013 const size_type nBlock = nEntry / tensor_type::vectorsize;
1014 const size_type nEntryB = nBlock * tensor_type::vectorsize;
1015 const size_type iEnd = iEntryBeg + nEntryB;
1017 typedef TinyVec<ValueType,tensor_type::vectorsize,tensor_type::use_intrinsics> ValTV;
1018 typedef TinyVec<MatrixValue,tensor_type::vectorsize,tensor_type::use_intrinsics> MatTV;
1019 typedef TinyVec<VectorValue,tensor_type::vectorsize,tensor_type::use_intrinsics> VecTV;
1022 for (; iEntry<iEnd; iEntry+=tensor_type::vectorsize) {
1023 const size_type *
j = &tensor.coord(iEntry,0);
1024 const size_type *k = &tensor.coord(iEntry,1);
1025 ValTV c(&(tensor.value(iEntry)));
1027 for (
size_type col = 0; col < block_size; ++col ) {
1028 MatTV aj(sh_A[col],
j), ak(sh_A[col], k);
1029 VecTV xj(sh_x[col],
j), xk(sh_x[col], k);
1033 aj.multiply_add(ak, xj);
1034 vy.multiply_add(c, aj);
1041 for ( ; iEntry<iEntryEnd; ++iEntry) {
1043 const size_type k = tensor.coord(iEntry,1);
1044 ValueType cijk = tensor.value(iEntry);
1046 for (
size_type col = 0; col < block_size; ++col ) {
1047 ytmp += cijk * ( sh_A[col][
j] * sh_x[col][k] +
1048 sh_A[col][k] * sh_x[col][
j] );
1074 const bool use_block_algorithm =
true;
1076 const bool use_block_algorithm =
false;
1079 const size_t row_count = A.graph.row_map.extent(0) - 1 ;
1080 if (use_block_algorithm) {
1082 const size_t team_size = 4;
1084 const size_t team_size = 2;
1086 const size_t league_size = row_count;
1087 Kokkos::TeamPolicy< execution_space > config(league_size, team_size);
1091 Kokkos::parallel_for( row_count ,
MultiplyImpl(A,x,y) );
static KOKKOS_INLINE_FUNCTION size_type vector_size(const tensor_type &tensor)
CrsProductTensor< ValueType, execution_space > tensor_type
tensor_type::size_type size_type
static KOKKOS_INLINE_FUNCTION size_type matrix_size(const tensor_type &tensor)
static KOKKOS_INLINE_FUNCTION void apply(const tensor_type &tensor, const MatrixValue *const a, const VectorValue *const x, VectorValue *const y, const VectorValue &alpha=VectorValue(1))
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
KOKKOS_INLINE_FUNCTION const size_type & coord(const size_type entry) const
Coordinates of an entry.
Kokkos::View< value_type *, Kokkos::LayoutLeft, execution_space, memory_type > vec_type
KOKKOS_INLINE_FUNCTION size_type num_entry(size_type i) const
Number of entries with a coordinate 'i'.
static void deep_copy(const CrsProductTensor< ValueType, DstDevice, DstMemory > &dst, const CrsProductTensor &src)
row_map_array_type m_row_map
KOKKOS_INLINE_FUNCTION size_type num_flops() const
Number flop's per multiply-add.
KOKKOS_INLINE_FUNCTION CrsProductTensor()
static const size_type num_entry_align
Kokkos::ViewTraits< size_type *, execution_space, void, void >::host_mirror_space host_mirror_space
KOKKOS_INLINE_FUNCTION ~CrsProductTensor()
KOKKOS_INLINE_FUNCTION size_type entry_maximum() const
Maximum sparse entries for any coordinate.
Kokkos::View< value_type *, Kokkos::LayoutLeft, execution_space, memory_type > value_array_type
Kokkos::View< size_type *, Kokkos::LayoutLeft, execution_space, memory_type > row_map_array_type
static HostMirror create_mirror_view(const CrsProductTensor &tensor)
KOKKOS_INLINE_FUNCTION bool is_empty() const
Is the tensor empty.
static const size_type cuda_vectorsize
KOKKOS_INLINE_FUNCTION CrsProductTensor & operator=(const CrsProductTensor< value_type, execution_space, M > &rhs)
KOKKOS_INLINE_FUNCTION const size_type & coord(const size_type entry, const size_type c) const
Coordinates of an entry.
KOKKOS_INLINE_FUNCTION size_type entry_begin(size_type i) const
Begin entries with a coordinate 'i'.
Kokkos::View< size_type *[2], Kokkos::LayoutLeft, execution_space, memory_type > coord2_array_type
ExecutionSpace execution_space
KOKKOS_INLINE_FUNCTION size_type dimension() const
Dimension of the tensor.
CrsProductTensor< value_type, host_mirror_space > HostMirror
KOKKOS_INLINE_FUNCTION CrsProductTensor(const CrsProductTensor< value_type, execution_space, M > &rhs)
KOKKOS_INLINE_FUNCTION size_type entry_end(size_type i) const
End entries with a coordinate 'i'.
KOKKOS_INLINE_FUNCTION size_type num_non_zeros() const
Number of non-zero's.
Kokkos::View< size_type *, Kokkos::LayoutLeft, execution_space, memory_type > entry_array_type
KOKKOS_INLINE_FUNCTION size_type entry_count() const
Number of sparse entries.
KOKKOS_INLINE_FUNCTION size_type avg_entries_per_row() const
Number average number of entries per row.
static CrsProductTensor create(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::Sparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList ¶ms=Teuchos::ParameterList())
Kokkos::View< size_type *, Kokkos::LayoutLeft, execution_space, memory_type > coord_array_type
static const size_type tensor_align
static CrsProductTensor createMeanBased()
coord2_array_type m_coord2
static const size_type host_vectorsize
static const bool is_cuda
static const bool use_intrinsics
KOKKOS_INLINE_FUNCTION const value_type & value(const size_type entry) const
Value of an entry.
entry_array_type m_num_entry
static const size_type vectorsize
size_type m_avg_entries_per_row
BlockCrsMatrix< BlockSpec, MatrixValue, execution_space > matrix_type
BlockSpec::size_type size_type
CrsProductTensor< ValueType, execution_space > tensor_type
Kokkos::View< VectorValue **, Kokkos::LayoutLeft, execution_space > block_vector_type
KOKKOS_INLINE_FUNCTION std::pair< size_type, size_type > compute_work_range(const size_type work_count, const size_type thread_count, const size_type thread_rank) const
const block_vector_type m_y
KOKKOS_INLINE_FUNCTION void operator()(const size_type iBlockRow) const
KOKKOS_INLINE_FUNCTION void operator()(const typename Kokkos::TeamPolicy< execution_space >::member_type &device) const
static void apply(const matrix_type &A, const block_vector_type &x, const block_vector_type &y)
StochasticProductTensor< ValueType, tensor_type, execution_space > BlockSpec
const block_vector_type m_x
MultiplyImpl(const matrix_type &A, const block_vector_type &x, const block_vector_type &y)
virtual ordinal_type size() const =0
Return total size of basis.
Abstract base class for multivariate orthogonal polynomials generated from tensor products of univari...
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
kj_iterator j_end(const k_iterator &k) const
Iterator pointing to last j entry for given k.
k_iterator k_begin() const
Iterator pointing to first k entry.
kji_iterator i_begin(const kj_iterator &j) const
Iterator pointing to first i entry for given j and k.
kj_iterator j_begin(const k_iterator &k) const
Iterator pointing to first j entry for given k.
kji_iterator i_end(const kj_iterator &j) const
Iterator pointing to last i entry for given j and k.
k_iterator k_end() const
Iterator pointing to last k entry.
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
Top-level namespace for Stokhos classes and functions.
CrsProductTensor< ValueType, Device > create_product_tensor(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::Sparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList ¶ms=Teuchos::ParameterList())
void deep_copy(const CrsProductTensor< ValueType, DstDevice, DstMemory > &dst, const CrsProductTensor< ValueType, SrcDevice, SrcMemory > &src)
CrsProductTensor< ValueType, Device, Memory >::HostMirror create_mirror_view(const CrsProductTensor< ValueType, Device, Memory > &src)
CrsProductTensor< ValueType, Device > create_mean_based_product_tensor()
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
bool operator()(const CijkRowCount &a, const CijkRowCount &b) const