Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Stokhos_Cuda_CrsMatrix.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#ifndef STOKHOS_CUDA_CRSMATRIX_HPP
43#define STOKHOS_CUDA_CRSMATRIX_HPP
44
45#include "Stokhos_ConfigDefs.h"
46#ifdef HAVE_STOKHOS_CUSPARSE
47
48#include <utility>
49#include <sstream>
50#include <stdexcept>
51
52#include <cuda_runtime.h>
53//#include <cusparse.h>
54#include <cusparse_v2.h>
55
56#include "Kokkos_Core.hpp"
57
58#include "Stokhos_Multiply.hpp"
59#include "Stokhos_CrsMatrix.hpp"
60
61// Use new cuSPARSE SpMv/SpMM functions only in CUDA 11 or greater.
62// (while they exist in some versions of CUDA 10, they appear to not always
63// product correct results).
64#define USE_NEW_SPMV (CUSPARSE_VERSION >= 11000)
65
66namespace Stokhos {
67
68class CudaSparseSingleton {
69public:
70
71 cusparseStatus_t status;
72 cusparseHandle_t handle;
73 cusparseMatDescr_t descra;
74
75 static CudaSparseSingleton & singleton();
76
77private:
78
79 CudaSparseSingleton()
80 {
81 status = cusparseCreate(&handle);
82 if(status != CUSPARSE_STATUS_SUCCESS)
83 {
84 throw std::runtime_error( std::string("ERROR - CUSPARSE Library Initialization failed" ) );
85 }
86
87 status = cusparseCreateMatDescr(&descra);
88 if(status != CUSPARSE_STATUS_SUCCESS)
89 {
90 throw std::runtime_error( std::string("ERROR - CUSPARSE Library Matrix descriptor failed" ) );
91 }
92
93 cusparseSetMatType(descra , CUSPARSE_MATRIX_TYPE_GENERAL);
94 cusparseSetMatIndexBase(descra , CUSPARSE_INDEX_BASE_ZERO);
95 }
96
97 CudaSparseSingleton( const CudaSparseSingleton & );
98 CudaSparseSingleton & operator = ( const CudaSparseSingleton & );
99};
100
101CudaSparseSingleton & CudaSparseSingleton::singleton()
102{
103 static CudaSparseSingleton s ; return s ;
104}
105
106template<>
107class Multiply<
108 CrsMatrix< float , Kokkos::Cuda > ,
109 Kokkos::View< float* , Kokkos::Cuda > ,
110 Kokkos::View< float* , Kokkos::Cuda > ,
111 void,
112 IntegralRank<1> >
113{
114public:
115 typedef Kokkos::Cuda execution_space ;
116 typedef execution_space::size_type size_type ;
117 typedef Kokkos::View< float* , execution_space > vector_type ;
118 typedef CrsMatrix< float , execution_space > matrix_type ;
119
120 //--------------------------------------------------------------------------
121
122 static void apply( const matrix_type & A ,
123 const vector_type & x ,
124 const vector_type & y )
125 {
126 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
127 const float alpha = 1 , beta = 0 ;
128 const int n = A.graph.row_map.extent(0) - 1 ;
129 const int nz = A.graph.entries.extent(0);
130
131#if USE_NEW_SPMV
132 using offset_type = typename matrix_type::graph_type::size_type;
133 using entry_type = typename matrix_type::graph_type::data_type;
134 const cusparseIndexType_t myCusparseOffsetType =
135 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
136 const cusparseIndexType_t myCusparseEntryType =
137 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
138 cusparseSpMatDescr_t A_cusparse;
139 cusparseCreateCsr(
140 &A_cusparse, n, n, nz,
141 (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
142 (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
143 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
144 cusparseDnVecDescr_t x_cusparse, y_cusparse;
145 cusparseCreateDnVec(&x_cusparse, n, (void*)x.data(), CUDA_R_32F);
146 cusparseCreateDnVec(&y_cusparse, n, (void*)y.data(), CUDA_R_32F);
147 size_t bufferSize = 0;
148 void* dBuffer = NULL;
149 cusparseSpMV_bufferSize(
150 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
151 x_cusparse, &beta, y_cusparse, CUDA_R_32F, CUSPARSE_MV_ALG_DEFAULT,
152 &bufferSize);
153 cudaMalloc(&dBuffer, bufferSize);
154 cusparseStatus_t status =
155 cusparseSpMV(s.handle ,
156 CUSPARSE_OPERATION_NON_TRANSPOSE ,
157 &alpha ,
158 A_cusparse ,
159 x_cusparse ,
160 &beta ,
161 y_cusparse ,
162 CUDA_R_32F ,
163 CUSPARSE_MV_ALG_DEFAULT ,
164 dBuffer );
165 cudaFree(dBuffer);
166 cusparseDestroyDnVec(x_cusparse);
167 cusparseDestroyDnVec(y_cusparse);
168 cusparseDestroySpMat(A_cusparse);
169#else
170 cusparseStatus_t status =
171 cusparseScsrmv( s.handle ,
172 CUSPARSE_OPERATION_NON_TRANSPOSE ,
173 n , n , nz ,
174 &alpha ,
175 s.descra ,
176 A.values.data() ,
177 A.graph.row_map.data() ,
178 A.graph.entries.data() ,
179 x.data() ,
180 &beta ,
181 y.data() );
182#endif
183
184 if ( CUSPARSE_STATUS_SUCCESS != status ) {
185 throw std::runtime_error( std::string("ERROR - cusparseScsrmv " ) );
186 }
187 }
188};
189
190template<>
191class Multiply<
192 CrsMatrix< double , Kokkos::Cuda > ,
193 Kokkos::View< double* , Kokkos::Cuda > ,
194 Kokkos::View< double* , Kokkos::Cuda > ,
195 void,
196 IntegralRank<1> >
197{
198public:
199 typedef Kokkos::Cuda execution_space ;
200 typedef execution_space::size_type size_type ;
201 typedef Kokkos::View< double* , execution_space > vector_type ;
202 typedef CrsMatrix< double , execution_space > matrix_type ;
203
204 //--------------------------------------------------------------------------
205
206 static void apply( const matrix_type & A ,
207 const vector_type & x ,
208 const vector_type & y )
209 {
210 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
211 const double alpha = 1 , beta = 0 ;
212 const int n = A.graph.row_map.extent(0) - 1 ;
213 const int nz = A.graph.entries.extent(0);
214
215#if USE_NEW_SPMV
216 using offset_type = typename matrix_type::graph_type::size_type;
217 using entry_type = typename matrix_type::graph_type::data_type;
218 const cusparseIndexType_t myCusparseOffsetType =
219 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
220 const cusparseIndexType_t myCusparseEntryType =
221 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
222 cusparseSpMatDescr_t A_cusparse;
223 cusparseCreateCsr(
224 &A_cusparse, n, n, nz,
225 (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
226 (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
227 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
228 cusparseDnVecDescr_t x_cusparse, y_cusparse;
229 cusparseCreateDnVec(&x_cusparse, n, (void*)x.data(), CUDA_R_64F);
230 cusparseCreateDnVec(&y_cusparse, n, (void*)y.data(), CUDA_R_64F);
231 size_t bufferSize = 0;
232 void* dBuffer = NULL;
233 cusparseSpMV_bufferSize(
234 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
235 x_cusparse, &beta, y_cusparse, CUDA_R_64F, CUSPARSE_MV_ALG_DEFAULT,
236 &bufferSize);
237 cudaMalloc(&dBuffer, bufferSize);
238 cusparseStatus_t status =
239 cusparseSpMV(s.handle ,
240 CUSPARSE_OPERATION_NON_TRANSPOSE ,
241 &alpha ,
242 A_cusparse ,
243 x_cusparse ,
244 &beta ,
245 y_cusparse ,
246 CUDA_R_64F ,
247 CUSPARSE_MV_ALG_DEFAULT ,
248 dBuffer );
249 cudaFree(dBuffer);
250 cusparseDestroyDnVec(x_cusparse);
251 cusparseDestroyDnVec(y_cusparse);
252 cusparseDestroySpMat(A_cusparse);
253#else
254 cusparseStatus_t status =
255 cusparseDcsrmv( s.handle ,
256 CUSPARSE_OPERATION_NON_TRANSPOSE ,
257 n , n , nz ,
258 &alpha ,
259 s.descra ,
260 A.values.data() ,
261 A.graph.row_map.data() ,
262 A.graph.entries.data() ,
263 x.data() ,
264 &beta ,
265 y.data() );
266#endif
267
268 if ( CUSPARSE_STATUS_SUCCESS != status ) {
269 throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
270 }
271 }
272};
273
274template <typename Ordinal>
275class Multiply<
276 CrsMatrix< float , Kokkos::Cuda > ,
277 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
278 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
279 std::vector<Ordinal> ,
280 IntegralRank<2> >
281{
282public:
283 typedef Kokkos::Cuda execution_space;
284 typedef execution_space::size_type size_type;
285 typedef Kokkos::View< float*, Kokkos::LayoutLeft, execution_space > vector_type;
286 typedef Kokkos::View< float**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
287 typedef CrsMatrix< float , execution_space > matrix_type;
288
289 //--------------------------------------------------------------------------
290
291 static void apply( const matrix_type & A ,
292 const multi_vector_type & x ,
293 const multi_vector_type & y ,
294 const std::vector<Ordinal> & col_indices )
295 {
296 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
297 const float alpha = 1 , beta = 0 ;
298 const int n = A.graph.row_map.extent(0) - 1 ;
299 const int nz = A.graph.entries.extent(0);
300 const size_t ncol = col_indices.size();
301
302 // Copy columns of x into a contiguous vector
303 vector_type xx( Kokkos::ViewAllocateWithoutInitializing("xx"), n * ncol );
304 vector_type yy( Kokkos::ViewAllocateWithoutInitializing("yy"), n * ncol );
305
306 for (size_t col=0; col<ncol; col++) {
307 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
308 vector_type xx_view = Kokkos::subview( xx , span );
309 vector_type x_col =
310 Kokkos::subview( x, Kokkos::ALL(), col_indices[col] );
311 Kokkos::deep_copy(xx_view, x_col);
312 }
313
314 // Sparse matrix-times-multivector
315#if USE_NEW_SPMV
316 using offset_type = typename matrix_type::graph_type::size_type;
317 using entry_type = typename matrix_type::graph_type::data_type;
318 const cusparseIndexType_t myCusparseOffsetType =
319 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
320 const cusparseIndexType_t myCusparseEntryType =
321 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
322 cusparseSpMatDescr_t A_cusparse;
323 cusparseCreateCsr(
324 &A_cusparse, n, n, nz,
325 (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
326 (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
327 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
328 cusparseDnMatDescr_t x_cusparse, y_cusparse;
329 cusparseCreateDnMat(
330 &x_cusparse, n, ncol, n,
331 (void*)xx.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
332 cusparseCreateDnMat(
333 &y_cusparse, n, ncol, n,
334 (void*)yy.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
335 size_t bufferSize = 0;
336 void* dBuffer = NULL;
337 cusparseSpMM_bufferSize(
338 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
339 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
340 x_cusparse, &beta, y_cusparse, CUDA_R_32F, CUSPARSE_MM_ALG_DEFAULT,
341 &bufferSize);
342 cudaMalloc(&dBuffer, bufferSize);
343 cusparseStatus_t status =
344 cusparseSpMM(s.handle ,
345 CUSPARSE_OPERATION_NON_TRANSPOSE ,
346 CUSPARSE_OPERATION_NON_TRANSPOSE ,
347 &alpha ,
348 A_cusparse ,
349 x_cusparse ,
350 &beta ,
351 y_cusparse ,
352 CUDA_R_32F ,
353 CUSPARSE_MM_ALG_DEFAULT ,
354 dBuffer );
355 cudaFree(dBuffer);
356 cusparseDestroyDnMat(x_cusparse);
357 cusparseDestroyDnMat(y_cusparse);
358 cusparseDestroySpMat(A_cusparse);
359#else
360 cusparseStatus_t status =
361 cusparseScsrmm( s.handle ,
362 CUSPARSE_OPERATION_NON_TRANSPOSE ,
363 n , ncol , n , nz ,
364 &alpha ,
365 s.descra ,
366 A.values.data() ,
367 A.graph.row_map.data() ,
368 A.graph.entries.data() ,
369 xx.data() ,
370 n ,
371 &beta ,
372 yy.data() ,
373 n );
374#endif
375
376 if ( CUSPARSE_STATUS_SUCCESS != status ) {
377 throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
378 }
379
380 // Copy columns out of continguous multivector
381 for (size_t col=0; col<ncol; col++) {
382 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
383 vector_type yy_view = Kokkos::subview( yy , span );
384 vector_type y_col =
385 Kokkos::subview( y, Kokkos::ALL(), col_indices[col] );
386 Kokkos::deep_copy(y_col, yy_view );
387 }
388 }
389};
390
391#define USE_CUSPARSE 1
392#if USE_CUSPARSE
393
394template <typename Ordinal>
395class Multiply<
396 CrsMatrix< double , Kokkos::Cuda > ,
397 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
398 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
399 std::vector<Ordinal> ,
400 IntegralRank<2> >
401{
402public:
403 typedef Kokkos::Cuda execution_space;
404 typedef execution_space::size_type size_type;
405 typedef Kokkos::View< double*, Kokkos::LayoutLeft, execution_space > vector_type;
406 typedef Kokkos::View< double**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
407 typedef CrsMatrix< double , execution_space > matrix_type;
408
409 //--------------------------------------------------------------------------
410
411#define USE_TRANSPOSE 0
412#if USE_TRANSPOSE
413
414 // A version that copies the vectors to a transposed 2D view and calls
415 // new CUSPARSE function for transpose layout. Seems to be somewhat
416 // slower????
417
418 struct GatherTranspose {
419 typedef Kokkos::Cuda execution_space;
420 typedef execution_space::size_type size_type;
421
422 multi_vector_type m_xt;
423 const multi_vector_type m_x;
424 const Kokkos::View<Ordinal*,execution_space> m_col;
425 const size_type m_ncol;
426
427 GatherTranspose( multi_vector_type& xt,
428 const multi_vector_type& x,
429 const Kokkos::View<Ordinal*,execution_space>& col ) :
430 m_xt(xt), m_x(x), m_col(col), m_ncol(col.extent(0)) {}
431
432 __device__
433 inline void operator() (size_type i) const {
434 for (size_type j=0; j<m_ncol; ++j)
435 m_xt(j,i) = m_x(i,m_col(j));
436 }
437
438 static void apply( multi_vector_type& xt,
439 const multi_vector_type& x,
440 const Kokkos::View<Ordinal*,execution_space>& col ) {
441 const size_type n = x.extent(0);
442 Kokkos::parallel_for( n , GatherTranspose(xt,x,col) );
443 }
444 };
445
446 static void apply( const matrix_type & A ,
447 const multi_vector_type & x ,
448 const multi_vector_type & y ,
449 const std::vector<Ordinal> & col_indices )
450 {
451 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
452 const double alpha = 1 , beta = 0 ;
453 const int n = A.graph.row_map.extent(0) - 1 ;
454 const int nz = A.graph.entries.extent(0);
455 const size_t ncol = col_indices.size();
456
457 // Copy col_indices to the device
458 Kokkos::View<Ordinal*,execution_space> col_indices_dev(
459 Kokkos::ViewAllocateWithoutInitializing("col_indices"), ncol);
460 typename Kokkos::View<Ordinal*,execution_space>::HostMirror col_indices_host =
461 Kokkos::create_mirror_view(col_indices_dev);
462 for (size_t i=0; i<ncol; ++i)
463 col_indices_host(i) = col_indices[i];
464 Kokkos::deep_copy(col_indices_dev, col_indices_host);
465
466 // Copy columns of x into a contiguous multi-vector and transpose
467 multi_vector_type xx(
468 Kokkos::ViewAllocateWithoutInitializing("xx"), ncol , n );
469 GatherTranspose::apply(xx, x, col_indices_dev);
470
471 // Temporary to store result (this is not transposed)
472 multi_vector_type yy(
473 Kokkos::ViewAllocateWithoutInitializing("yy"), n , ncol );
474
475 // Sparse matrix-times-multivector
476 cusparseStatus_t status =
477 cusparseDcsrmm2( s.handle ,
478 CUSPARSE_OPERATION_NON_TRANSPOSE ,
479 CUSPARSE_OPERATION_TRANSPOSE ,
480 n , ncol , n , nz ,
481 &alpha ,
482 s.descra ,
483 A.values.data() ,
484 A.graph.row_map.data() ,
485 A.graph.entries.data() ,
486 xx.data() ,
487 ncol ,
488 &beta ,
489 yy.data() ,
490 n );
491
492 if ( CUSPARSE_STATUS_SUCCESS != status ) {
493 throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
494 }
495
496 // Copy columns out of continguous multivector
497 for (size_t col=0; col<ncol; col++) {
498 vector_type yy_view =
499 Kokkos::subview( yy , Kokkos::ALL(), col );
500 vector_type y_col =
501 Kokkos::subview( y, Kokkos::ALL(), col_indices[col] );
502 Kokkos::deep_copy(y_col, yy_view );
503 }
504 }
505#else
506 static void apply( const matrix_type & A ,
507 const multi_vector_type & x ,
508 const multi_vector_type & y ,
509 const std::vector<Ordinal> & col_indices )
510 {
511 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
512 const double alpha = 1 , beta = 0 ;
513 const int n = A.graph.row_map.extent(0) - 1 ;
514 const int nz = A.graph.entries.extent(0);
515 const size_t ncol = col_indices.size();
516
517 // Copy columns of x into a contiguous vector
518 vector_type xx( Kokkos::ViewAllocateWithoutInitializing("xx"), n * ncol );
519 vector_type yy( Kokkos::ViewAllocateWithoutInitializing("yy"), n * ncol );
520
521 for (size_t col=0; col<ncol; col++) {
522 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
523 vector_type xx_view = Kokkos::subview( xx , span );
524 vector_type x_col =
525 Kokkos::subview( x, Kokkos::ALL(), col_indices[col] );
526 Kokkos::deep_copy(xx_view, x_col);
527 }
528
529 // Sparse matrix-times-multivector
530#if USE_NEW_SPMV
531 using offset_type = typename matrix_type::graph_type::size_type;
532 using entry_type = typename matrix_type::graph_type::data_type;
533 const cusparseIndexType_t myCusparseOffsetType =
534 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
535 const cusparseIndexType_t myCusparseEntryType =
536 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
537 cusparseSpMatDescr_t A_cusparse;
538 cusparseCreateCsr(
539 &A_cusparse, n, n, nz,
540 (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
541 (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
542 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
543 cusparseDnMatDescr_t x_cusparse, y_cusparse;
544 cusparseCreateDnMat(
545 &x_cusparse, n, ncol, n,
546 (void*)xx.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
547 cusparseCreateDnMat(
548 &y_cusparse, n, ncol, n,
549 (void*)yy.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
550 size_t bufferSize = 0;
551 void* dBuffer = NULL;
552 cusparseSpMM_bufferSize(
553 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
554 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
555 x_cusparse, &beta, y_cusparse, CUDA_R_64F, CUSPARSE_MM_ALG_DEFAULT,
556 &bufferSize);
557 cudaMalloc(&dBuffer, bufferSize);
558 cusparseStatus_t status =
559 cusparseSpMM(s.handle ,
560 CUSPARSE_OPERATION_NON_TRANSPOSE ,
561 CUSPARSE_OPERATION_NON_TRANSPOSE ,
562 &alpha ,
563 A_cusparse ,
564 x_cusparse ,
565 &beta ,
566 y_cusparse ,
567 CUDA_R_64F ,
568 CUSPARSE_MM_ALG_DEFAULT ,
569 dBuffer );
570 cudaFree(dBuffer);
571 cusparseDestroyDnMat(x_cusparse);
572 cusparseDestroyDnMat(y_cusparse);
573 cusparseDestroySpMat(A_cusparse);
574#else
575 cusparseStatus_t status =
576 cusparseDcsrmm( s.handle ,
577 CUSPARSE_OPERATION_NON_TRANSPOSE ,
578 n , ncol , n , nz ,
579 &alpha ,
580 s.descra ,
581 A.values.data() ,
582 A.graph.row_map.data() ,
583 A.graph.entries.data() ,
584 xx.data() ,
585 n ,
586 &beta ,
587 yy.data() ,
588 n );
589#endif
590
591 if ( CUSPARSE_STATUS_SUCCESS != status ) {
592 throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
593 }
594
595 // Copy columns out of continguous multivector
596 for (size_t col=0; col<ncol; col++) {
597 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
598 vector_type yy_view = Kokkos::subview( yy , span );
599 vector_type y_col =
600 Kokkos::subview( y, Kokkos::ALL(), col_indices[col] );
601 Kokkos::deep_copy(y_col, yy_view );
602 }
603 }
604#endif
605};
606
607template <>
608class Multiply<
609 CrsMatrix< float , Kokkos::Cuda > ,
610 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
611 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
612 void ,
613 IntegralRank<2> >
614{
615public:
616 typedef Kokkos::Cuda execution_space;
617 typedef execution_space::size_type size_type;
618 typedef Kokkos::View< float**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
619 typedef CrsMatrix< float , execution_space > matrix_type;
620
621 //--------------------------------------------------------------------------
622
623 static void apply( const matrix_type & A ,
624 const multi_vector_type & x ,
625 const multi_vector_type & y )
626 {
627 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
628 const float alpha = 1 , beta = 0 ;
629 const int n = A.graph.row_map.extent(0) - 1 ;
630 const int nz = A.graph.entries.extent(0);
631 const size_t ncol = x.extent(1);
632
633 // Sparse matrix-times-multivector
634#if USE_NEW_SPMV
635 using offset_type = typename matrix_type::graph_type::size_type;
636 using entry_type = typename matrix_type::graph_type::data_type;
637 const cusparseIndexType_t myCusparseOffsetType =
638 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
639 const cusparseIndexType_t myCusparseEntryType =
640 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
641 cusparseSpMatDescr_t A_cusparse;
642 cusparseCreateCsr(
643 &A_cusparse, n, n, nz,
644 (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
645 (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
646 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
647 cusparseDnMatDescr_t x_cusparse, y_cusparse;
648 cusparseCreateDnMat(
649 &x_cusparse, n, ncol, x.stride(1),
650 (void*)x.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
651 cusparseCreateDnMat(
652 &y_cusparse, n, ncol, y.stride(1),
653 (void*)y.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
654 size_t bufferSize = 0;
655 void* dBuffer = NULL;
656 cusparseSpMM_bufferSize(
657 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
658 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
659 x_cusparse, &beta, y_cusparse, CUDA_R_32F, CUSPARSE_MM_ALG_DEFAULT,
660 &bufferSize);
661 cudaMalloc(&dBuffer, bufferSize);
662 cusparseStatus_t status =
663 cusparseSpMM(s.handle ,
664 CUSPARSE_OPERATION_NON_TRANSPOSE ,
665 CUSPARSE_OPERATION_NON_TRANSPOSE ,
666 &alpha ,
667 A_cusparse ,
668 x_cusparse ,
669 &beta ,
670 y_cusparse ,
671 CUDA_R_32F ,
672 CUSPARSE_MM_ALG_DEFAULT ,
673 dBuffer );
674 cudaFree(dBuffer);
675 cusparseDestroyDnMat(x_cusparse);
676 cusparseDestroyDnMat(y_cusparse);
677 cusparseDestroySpMat(A_cusparse);
678#else
679 cusparseStatus_t status =
680 cusparseScsrmm( s.handle ,
681 CUSPARSE_OPERATION_NON_TRANSPOSE ,
682 n , ncol , n , nz ,
683 &alpha ,
684 s.descra ,
685 A.values.data() ,
686 A.graph.row_map.data() ,
687 A.graph.entries.data() ,
688 x.data() ,
689 n ,
690 &beta ,
691 y.data() ,
692 n );
693#endif
694
695 if ( CUSPARSE_STATUS_SUCCESS != status ) {
696 throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
697 }
698 }
699};
700
701template <>
702class Multiply<
703 CrsMatrix< double , Kokkos::Cuda > ,
704 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
705 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
706 void ,
707 IntegralRank<2> >
708{
709public:
710 typedef Kokkos::Cuda execution_space;
711 typedef execution_space::size_type size_type;
712 typedef Kokkos::View< double**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
713 typedef CrsMatrix< double , execution_space > matrix_type;
714
715 //--------------------------------------------------------------------------
716
717 static void apply( const matrix_type & A ,
718 const multi_vector_type & x ,
719 const multi_vector_type & y )
720 {
721 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
722 const double alpha = 1 , beta = 0 ;
723 const int n = A.graph.row_map.extent(0) - 1 ;
724 const int nz = A.graph.entries.extent(0);
725 const size_t ncol = x.extent(1);
726
727 // Sparse matrix-times-multivector
728#if USE_NEW_SPMV
729 using offset_type = typename matrix_type::graph_type::size_type;
730 using entry_type = typename matrix_type::graph_type::data_type;
731 const cusparseIndexType_t myCusparseOffsetType =
732 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
733 const cusparseIndexType_t myCusparseEntryType =
734 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
735 cusparseSpMatDescr_t A_cusparse;
736 cusparseCreateCsr(
737 &A_cusparse, n, n, nz,
738 (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
739 (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
740 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
741 cusparseDnMatDescr_t x_cusparse, y_cusparse;
742 cusparseCreateDnMat(
743 &x_cusparse, n, ncol, x.stride(1),
744 (void*)x.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
745 cusparseCreateDnMat(
746 &y_cusparse, n, ncol, y.stride(1),
747 (void*)y.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
748 size_t bufferSize = 0;
749 void* dBuffer = NULL;
750 cusparseSpMM_bufferSize(
751 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
752 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
753 x_cusparse, &beta, y_cusparse, CUDA_R_64F, CUSPARSE_MM_ALG_DEFAULT,
754 &bufferSize);
755 cudaMalloc(&dBuffer, bufferSize);
756 cusparseStatus_t status =
757 cusparseSpMM(s.handle ,
758 CUSPARSE_OPERATION_NON_TRANSPOSE ,
759 CUSPARSE_OPERATION_NON_TRANSPOSE ,
760 &alpha ,
761 A_cusparse ,
762 x_cusparse ,
763 &beta ,
764 y_cusparse ,
765 CUDA_R_64F ,
766 CUSPARSE_MM_ALG_DEFAULT ,
767 dBuffer );
768 cudaFree(dBuffer);
769 cusparseDestroyDnMat(x_cusparse);
770 cusparseDestroyDnMat(y_cusparse);
771 cusparseDestroySpMat(A_cusparse);
772#else
773 cusparseStatus_t status =
774 cusparseDcsrmm( s.handle ,
775 CUSPARSE_OPERATION_NON_TRANSPOSE ,
776 n , ncol , n , nz ,
777 &alpha ,
778 s.descra ,
779 A.values.data() ,
780 A.graph.row_map.data() ,
781 A.graph.entries.data() ,
782 x.data() ,
783 n ,
784 &beta ,
785 y.data() ,
786 n );
787#endif
788
789 if ( CUSPARSE_STATUS_SUCCESS != status ) {
790 throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
791 }
792 }
793};
794
795#else
796// Working on creating a version that doesn't copy vectors to a contiguous
797// 2-D view and doesn't call CUSPARSE. Not done yet.
798template <typename Ordinal>
799class Multiply<
800 CrsMatrix< double , Kokkos::Cuda > ,
801 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
802 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
803 std::vector<Ordinal> ,
804 IntegralRank<2> >
805{
806public:
807 typedef Kokkos::Cuda execution_space;
808 typedef execution_space::size_type size_type;
809 typedef Kokkos::View< double*, Kokkos::LayoutLeft, execution_space > vector_type;
810 typedef Kokkos::View< double**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
811 typedef CrsMatrix< double , execution_space > matrix_type;
812 typedef Kokkos::View< size_type*, execution_space > column_indices_type;
813
814 const matrix_type m_A;
815 const multi_vector_type m_x;
816 multi_vector_type m_y;
817 const column_indices_type m_col;
818 const size_type m_num_col;
819
820 Multiply( const matrix_type& A,
821 const multi_vector_type& x,
822 multi_vector_type& y,
823 const column_indices_type& col) :
824 m_A(A),
825 m_x(x),
826 m_y(y),
827 m_col(col),
828 m_num_col(col.extent(0)) {}
829
830 __device__
831 inline void operator() ( const size_type iRow ) const {
832 const size_type iEntryBegin = m_A.graph.row_map[iRow];
833 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
834
835 for (size_type j=0; j<m_num_col; j++) {
836 size_type iCol = m_col_indices[j];
837
838 scalar_type sum = 0.0;
839
840 for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
841 sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
842 }
843
844 m_y( iRow, iCol ) = sum;
845
846 }
847 }
848
849 //--------------------------------------------------------------------------
850
851 static void apply( const matrix_type & A ,
852 const multi_vector_type & x ,
853 const multi_vector_type & y ,
854 const std::vector<Ordinal> & col_indices )
855 {
856 // Copy col_indices to the device
857 Kokkos::View<Ordinal*,execution_space> col_indices_dev(
858 Kokkos::ViewAllocateWithoutInitializing("col_indices"), ncol);
859 typename Kokkos::View<Ordinal*,execution_space>::HostMirror col_indices_host =
860 Kokkos::create_mirror_view(col_indices_dev);
861 for (size_t i=0; i<ncol; ++i)
862 col_indices_host(i) = col_indices[i];
863 Kokkos::deep_copy(col_indices_dev, col_indices_host);
864
865 const size_t n = A.graph.row_map.extent(0) - 1 ;
866 Kokkos::parallel_for( n , Multiply(A,x,y,col_indices_dev) );
867 }
868};
869
870#endif
871
872template<>
873class Multiply<
874 CrsMatrix< float , Kokkos::Cuda > ,
875 std::vector< Kokkos::View< float* , Kokkos::Cuda > >,
876 std::vector< Kokkos::View< float* , Kokkos::Cuda > >,
877 void,
878 IntegralRank<1> >
879{
880public:
881 typedef Kokkos::Cuda execution_space ;
882 typedef execution_space::size_type size_type ;
883 typedef Kokkos::View< float* , execution_space > vector_type ;
884 typedef CrsMatrix< float , execution_space > matrix_type ;
885
886 //--------------------------------------------------------------------------
887
888 static void apply( const matrix_type & A ,
889 const std::vector<vector_type> & x ,
890 const std::vector<vector_type> & y )
891 {
892 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
893 const float alpha = 1 , beta = 0 ;
894 const int n = A.graph.row_map.extent(0) - 1 ;
895 const int nz = A.graph.entries.extent(0);
896 const size_t ncol = x.size();
897
898 // Copy columns of x into a contiguous vector
899 vector_type xx( Kokkos::ViewAllocateWithoutInitializing("xx"), n * ncol );
900 vector_type yy( Kokkos::ViewAllocateWithoutInitializing("yy"), n * ncol );
901
902 for (size_t col=0; col<ncol; col++) {
903 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
904 vector_type xx_view = Kokkos::subview( xx , span );
905 Kokkos::deep_copy(xx_view, x[col]);
906 }
907
908 // Sparse matrix-times-multivector
909#if USE_NEW_SPMV
910 using offset_type = typename matrix_type::graph_type::size_type;
911 using entry_type = typename matrix_type::graph_type::data_type;
912 const cusparseIndexType_t myCusparseOffsetType =
913 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
914 const cusparseIndexType_t myCusparseEntryType =
915 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
916 cusparseSpMatDescr_t A_cusparse;
917 cusparseCreateCsr(
918 &A_cusparse, n, n, nz,
919 (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
920 (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
921 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
922 cusparseDnMatDescr_t x_cusparse, y_cusparse;
923 cusparseCreateDnMat(
924 &x_cusparse, n, ncol, n,
925 (void*)xx.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
926 cusparseCreateDnMat(
927 &y_cusparse, n, ncol, n,
928 (void*)yy.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
929 size_t bufferSize = 0;
930 void* dBuffer = NULL;
931 cusparseSpMM_bufferSize(
932 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
933 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
934 x_cusparse, &beta, y_cusparse, CUDA_R_32F, CUSPARSE_MM_ALG_DEFAULT,
935 &bufferSize);
936 cudaMalloc(&dBuffer, bufferSize);
937 cusparseStatus_t status =
938 cusparseSpMM(s.handle ,
939 CUSPARSE_OPERATION_NON_TRANSPOSE ,
940 CUSPARSE_OPERATION_NON_TRANSPOSE ,
941 &alpha ,
942 A_cusparse ,
943 x_cusparse ,
944 &beta ,
945 y_cusparse ,
946 CUDA_R_32F ,
947 CUSPARSE_MM_ALG_DEFAULT ,
948 dBuffer );
949 cudaFree(dBuffer);
950 cusparseDestroyDnMat(x_cusparse);
951 cusparseDestroyDnMat(y_cusparse);
952 cusparseDestroySpMat(A_cusparse);
953#else
954 cusparseStatus_t status =
955 cusparseScsrmm( s.handle ,
956 CUSPARSE_OPERATION_NON_TRANSPOSE ,
957 n , ncol , n , nz ,
958 &alpha ,
959 s.descra ,
960 A.values.data() ,
961 A.graph.row_map.data() ,
962 A.graph.entries.data() ,
963 xx.data() ,
964 n ,
965 &beta ,
966 yy.data() ,
967 n );
968#endif
969
970 if ( CUSPARSE_STATUS_SUCCESS != status ) {
971 throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
972 }
973
974 // Copy columns out of continguous multivector
975 for (size_t col=0; col<ncol; col++) {
976 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
977 vector_type yy_view = Kokkos::subview( yy , span );
978 Kokkos::deep_copy(y[col], yy_view );
979 }
980 }
981};
982
983template<>
984class Multiply<
985 CrsMatrix< double , Kokkos::Cuda > ,
986 std::vector< Kokkos::View< double* , Kokkos::Cuda > >,
987 std::vector< Kokkos::View< double* , Kokkos::Cuda > >,
988 void,
989 IntegralRank<1> >
990{
991public:
992 typedef Kokkos::Cuda execution_space ;
993 typedef execution_space::size_type size_type ;
994 typedef Kokkos::View< double* , execution_space > vector_type ;
995 typedef CrsMatrix< double , execution_space > matrix_type ;
996
997 //--------------------------------------------------------------------------
998
999 static void apply( const matrix_type & A ,
1000 const std::vector<vector_type> & x ,
1001 const std::vector<vector_type> & y )
1002 {
1003 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
1004 const double alpha = 1 , beta = 0 ;
1005 const int n = A.graph.row_map.extent(0) - 1 ;
1006 const int nz = A.graph.entries.extent(0);
1007 const size_t ncol = x.size();
1008
1009 // Copy columns of x into a contiguous vector
1010 vector_type xx( Kokkos::ViewAllocateWithoutInitializing("xx"), n * ncol );
1011 vector_type yy( Kokkos::ViewAllocateWithoutInitializing("yy"), n * ncol );
1012
1013 for (size_t col=0; col<ncol; col++) {
1014 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
1015 vector_type xx_view = Kokkos::subview( xx , span );
1016 Kokkos::deep_copy(xx_view, x[col]);
1017 }
1018
1019 // Sparse matrix-times-multivector
1020#if USE_NEW_SPMV
1021 using offset_type = typename matrix_type::graph_type::size_type;
1022 using entry_type = typename matrix_type::graph_type::data_type;
1023 const cusparseIndexType_t myCusparseOffsetType =
1024 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
1025 const cusparseIndexType_t myCusparseEntryType =
1026 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
1027 cusparseSpMatDescr_t A_cusparse;
1028 cusparseCreateCsr(
1029 &A_cusparse, n, n, nz,
1030 (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
1031 (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
1032 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
1033 cusparseDnMatDescr_t x_cusparse, y_cusparse;
1034 cusparseCreateDnMat(
1035 &x_cusparse, n, ncol, n,
1036 (void*)xx.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
1037 cusparseCreateDnMat(
1038 &y_cusparse, n, ncol, n,
1039 (void*)yy.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
1040 size_t bufferSize = 0;
1041 void* dBuffer = NULL;
1042 cusparseSpMM_bufferSize(
1043 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1044 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
1045 x_cusparse, &beta, y_cusparse, CUDA_R_64F, CUSPARSE_MM_ALG_DEFAULT,
1046 &bufferSize);
1047 cudaMalloc(&dBuffer, bufferSize);
1048 cusparseStatus_t status =
1049 cusparseSpMM(s.handle ,
1050 CUSPARSE_OPERATION_NON_TRANSPOSE ,
1051 CUSPARSE_OPERATION_NON_TRANSPOSE ,
1052 &alpha ,
1053 A_cusparse ,
1054 x_cusparse ,
1055 &beta ,
1056 y_cusparse ,
1057 CUDA_R_64F ,
1058 CUSPARSE_MM_ALG_DEFAULT ,
1059 dBuffer );
1060 cudaFree(dBuffer);
1061 cusparseDestroyDnMat(x_cusparse);
1062 cusparseDestroyDnMat(y_cusparse);
1063 cusparseDestroySpMat(A_cusparse);
1064#else
1065 cusparseStatus_t status =
1066 cusparseDcsrmm( s.handle ,
1067 CUSPARSE_OPERATION_NON_TRANSPOSE ,
1068 n , ncol , n , nz ,
1069 &alpha ,
1070 s.descra ,
1071 A.values.data() ,
1072 A.graph.row_map.data() ,
1073 A.graph.entries.data() ,
1074 xx.data() ,
1075 n ,
1076 &beta ,
1077 yy.data() ,
1078 n );
1079#endif
1080
1081 if ( CUSPARSE_STATUS_SUCCESS != status ) {
1082 throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
1083 }
1084
1085 // Copy columns out of continguous multivector
1086 for (size_t col=0; col<ncol; col++) {
1087 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
1088 vector_type yy_view = Kokkos::subview( yy , span );
1089 Kokkos::deep_copy(y[col], yy_view );
1090 }
1091 }
1092};
1093
1094//----------------------------------------------------------------------------
1095
1096} // namespace Stokhos
1097
1098#endif /* #ifdef HAVE_STOKHOS_CUSPARSE */
1099
1100#endif /* #ifndef STOKHOS_CUDA_CRSMATRIX_HPP */
Kokkos::DefaultExecutionSpace execution_space
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP... > >::value >::type sum(const Kokkos::View< RD, RP... > &r, const Kokkos::View< XD, XP... > &x)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
Top-level namespace for Stokhos classes and functions.
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition csr_vector.h:265
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Definition csr_vector.h:267