42#ifndef STOKHOS_CUDA_CRSMATRIX_HPP
43#define STOKHOS_CUDA_CRSMATRIX_HPP
46#ifdef HAVE_STOKHOS_CUSPARSE
52#include <cuda_runtime.h>
54#include <cusparse_v2.h>
56#include "Kokkos_Core.hpp"
64#define USE_NEW_SPMV (CUSPARSE_VERSION >= 11000)
68class CudaSparseSingleton {
71 cusparseStatus_t status;
72 cusparseHandle_t handle;
73 cusparseMatDescr_t descra;
75 static CudaSparseSingleton & singleton();
81 status = cusparseCreate(&handle);
82 if(status != CUSPARSE_STATUS_SUCCESS)
84 throw std::runtime_error( std::string(
"ERROR - CUSPARSE Library Initialization failed" ) );
87 status = cusparseCreateMatDescr(&descra);
88 if(status != CUSPARSE_STATUS_SUCCESS)
90 throw std::runtime_error( std::string(
"ERROR - CUSPARSE Library Matrix descriptor failed" ) );
93 cusparseSetMatType(descra , CUSPARSE_MATRIX_TYPE_GENERAL);
94 cusparseSetMatIndexBase(descra , CUSPARSE_INDEX_BASE_ZERO);
97 CudaSparseSingleton(
const CudaSparseSingleton & );
98 CudaSparseSingleton & operator = (
const CudaSparseSingleton & );
101CudaSparseSingleton & CudaSparseSingleton::singleton()
103 static CudaSparseSingleton s ;
return s ;
108 CrsMatrix< float ,
Kokkos::Cuda > ,
109 Kokkos::View< float* , Kokkos::Cuda > ,
110 Kokkos::View< float* , Kokkos::Cuda > ,
116 typedef execution_space::size_type size_type ;
117 typedef Kokkos::View< float* , execution_space > vector_type ;
118 typedef CrsMatrix< float , execution_space > matrix_type ;
122 static void apply(
const matrix_type & A ,
123 const vector_type & x ,
124 const vector_type & y )
126 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
127 const float alpha = 1 , beta = 0 ;
128 const int n = A.graph.row_map.extent(0) - 1 ;
129 const int nz = A.graph.entries.extent(0);
132 using offset_type =
typename matrix_type::graph_type::size_type;
133 using entry_type =
typename matrix_type::graph_type::data_type;
134 const cusparseIndexType_t myCusparseOffsetType =
135 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
136 const cusparseIndexType_t myCusparseEntryType =
137 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
138 cusparseSpMatDescr_t A_cusparse;
140 &A_cusparse, n, n, nz,
141 (
void*)A.graph.row_map.data(), (
void*)A.graph.entries.data(),
142 (
void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
143 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
144 cusparseDnVecDescr_t x_cusparse, y_cusparse;
145 cusparseCreateDnVec(&x_cusparse, n, (
void*)
x.data(), CUDA_R_32F);
146 cusparseCreateDnVec(&y_cusparse, n, (
void*)
y.data(), CUDA_R_32F);
147 size_t bufferSize = 0;
148 void* dBuffer = NULL;
149 cusparseSpMV_bufferSize(
150 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
151 x_cusparse, &beta, y_cusparse, CUDA_R_32F, CUSPARSE_MV_ALG_DEFAULT,
153 cudaMalloc(&dBuffer, bufferSize);
154 cusparseStatus_t status =
155 cusparseSpMV(s.handle ,
156 CUSPARSE_OPERATION_NON_TRANSPOSE ,
163 CUSPARSE_MV_ALG_DEFAULT ,
166 cusparseDestroyDnVec(x_cusparse);
167 cusparseDestroyDnVec(y_cusparse);
168 cusparseDestroySpMat(A_cusparse);
170 cusparseStatus_t status =
171 cusparseScsrmv( s.handle ,
172 CUSPARSE_OPERATION_NON_TRANSPOSE ,
177 A.graph.row_map.data() ,
178 A.graph.entries.data() ,
184 if ( CUSPARSE_STATUS_SUCCESS != status ) {
185 throw std::runtime_error( std::string(
"ERROR - cusparseScsrmv " ) );
193 Kokkos::View< double* , Kokkos::Cuda > ,
194 Kokkos::View< double* , Kokkos::Cuda > ,
200 typedef execution_space::size_type size_type ;
201 typedef Kokkos::View< double* , execution_space > vector_type ;
202 typedef CrsMatrix< double , execution_space > matrix_type ;
206 static void apply(
const matrix_type & A ,
207 const vector_type & x ,
208 const vector_type & y )
210 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
211 const double alpha = 1 , beta = 0 ;
212 const int n = A.graph.row_map.extent(0) - 1 ;
213 const int nz = A.graph.entries.extent(0);
216 using offset_type =
typename matrix_type::graph_type::size_type;
217 using entry_type =
typename matrix_type::graph_type::data_type;
218 const cusparseIndexType_t myCusparseOffsetType =
219 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
220 const cusparseIndexType_t myCusparseEntryType =
221 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
222 cusparseSpMatDescr_t A_cusparse;
224 &A_cusparse, n, n, nz,
225 (
void*)A.graph.row_map.data(), (
void*)A.graph.entries.data(),
226 (
void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
227 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
228 cusparseDnVecDescr_t x_cusparse, y_cusparse;
229 cusparseCreateDnVec(&x_cusparse, n, (
void*)
x.data(), CUDA_R_64F);
230 cusparseCreateDnVec(&y_cusparse, n, (
void*)
y.data(), CUDA_R_64F);
231 size_t bufferSize = 0;
232 void* dBuffer = NULL;
233 cusparseSpMV_bufferSize(
234 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
235 x_cusparse, &beta, y_cusparse, CUDA_R_64F, CUSPARSE_MV_ALG_DEFAULT,
237 cudaMalloc(&dBuffer, bufferSize);
238 cusparseStatus_t status =
239 cusparseSpMV(s.handle ,
240 CUSPARSE_OPERATION_NON_TRANSPOSE ,
247 CUSPARSE_MV_ALG_DEFAULT ,
250 cusparseDestroyDnVec(x_cusparse);
251 cusparseDestroyDnVec(y_cusparse);
252 cusparseDestroySpMat(A_cusparse);
254 cusparseStatus_t status =
255 cusparseDcsrmv( s.handle ,
256 CUSPARSE_OPERATION_NON_TRANSPOSE ,
261 A.graph.row_map.data() ,
262 A.graph.entries.data() ,
268 if ( CUSPARSE_STATUS_SUCCESS != status ) {
269 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
274template <
typename Ordinal>
276 CrsMatrix< float ,
Kokkos::Cuda > ,
277 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
278 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
279 std::vector<Ordinal> ,
284 typedef execution_space::size_type size_type;
285 typedef Kokkos::View< float*, Kokkos::LayoutLeft, execution_space > vector_type;
286 typedef Kokkos::View< float**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
287 typedef CrsMatrix< float , execution_space > matrix_type;
291 static void apply(
const matrix_type & A ,
292 const multi_vector_type & x ,
293 const multi_vector_type & y ,
294 const std::vector<Ordinal> & col_indices )
296 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
297 const float alpha = 1 , beta = 0 ;
298 const int n = A.graph.row_map.extent(0) - 1 ;
299 const int nz = A.graph.entries.extent(0);
300 const size_t ncol = col_indices.size();
303 vector_type xx( Kokkos::ViewAllocateWithoutInitializing(
"xx"), n * ncol );
304 vector_type yy( Kokkos::ViewAllocateWithoutInitializing(
"yy"), n * ncol );
306 for (
size_t col=0; col<ncol; col++) {
307 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
308 vector_type xx_view = Kokkos::subview( xx , span );
310 Kokkos::subview( x, Kokkos::ALL(), col_indices[col] );
316 using offset_type =
typename matrix_type::graph_type::size_type;
317 using entry_type =
typename matrix_type::graph_type::data_type;
318 const cusparseIndexType_t myCusparseOffsetType =
319 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
320 const cusparseIndexType_t myCusparseEntryType =
321 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
322 cusparseSpMatDescr_t A_cusparse;
324 &A_cusparse, n, n, nz,
325 (
void*)A.graph.row_map.data(), (
void*)A.graph.entries.data(),
326 (
void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
327 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
328 cusparseDnMatDescr_t x_cusparse, y_cusparse;
330 &x_cusparse, n, ncol, n,
331 (
void*)xx.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
333 &y_cusparse, n, ncol, n,
334 (
void*)yy.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
335 size_t bufferSize = 0;
336 void* dBuffer = NULL;
337 cusparseSpMM_bufferSize(
338 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
339 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
340 x_cusparse, &beta, y_cusparse, CUDA_R_32F, CUSPARSE_MM_ALG_DEFAULT,
342 cudaMalloc(&dBuffer, bufferSize);
343 cusparseStatus_t status =
344 cusparseSpMM(s.handle ,
345 CUSPARSE_OPERATION_NON_TRANSPOSE ,
346 CUSPARSE_OPERATION_NON_TRANSPOSE ,
353 CUSPARSE_MM_ALG_DEFAULT ,
356 cusparseDestroyDnMat(x_cusparse);
357 cusparseDestroyDnMat(y_cusparse);
358 cusparseDestroySpMat(A_cusparse);
360 cusparseStatus_t status =
361 cusparseScsrmm( s.handle ,
362 CUSPARSE_OPERATION_NON_TRANSPOSE ,
367 A.graph.row_map.data() ,
368 A.graph.entries.data() ,
376 if ( CUSPARSE_STATUS_SUCCESS != status ) {
377 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
381 for (
size_t col=0; col<ncol; col++) {
382 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
383 vector_type yy_view = Kokkos::subview( yy , span );
385 Kokkos::subview( y, Kokkos::ALL(), col_indices[col] );
391#define USE_CUSPARSE 1
394template <
typename Ordinal>
397 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
398 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
399 std::vector<Ordinal> ,
404 typedef execution_space::size_type size_type;
405 typedef Kokkos::View< double*, Kokkos::LayoutLeft, execution_space > vector_type;
406 typedef Kokkos::View< double**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
407 typedef CrsMatrix< double , execution_space > matrix_type;
411#define USE_TRANSPOSE 0
418 struct GatherTranspose {
420 typedef execution_space::size_type size_type;
422 multi_vector_type m_xt;
423 const multi_vector_type m_x;
424 const Kokkos::View<Ordinal*,execution_space> m_col;
425 const size_type m_ncol;
427 GatherTranspose( multi_vector_type& xt,
428 const multi_vector_type& x,
429 const Kokkos::View<Ordinal*,execution_space>& col ) :
430 m_xt(xt), m_x(
x), m_col(col), m_ncol(col.extent(0)) {}
433 inline void operator() (size_type i)
const {
434 for (size_type
j=0;
j<m_ncol; ++
j)
435 m_xt(
j,i) = m_x(i,m_col(
j));
438 static void apply( multi_vector_type& xt,
439 const multi_vector_type& x,
440 const Kokkos::View<Ordinal*,execution_space>& col ) {
441 const size_type n =
x.extent(0);
442 Kokkos::parallel_for( n , GatherTranspose(xt,x,col) );
446 static void apply(
const matrix_type & A ,
447 const multi_vector_type & x ,
448 const multi_vector_type & y ,
449 const std::vector<Ordinal> & col_indices )
451 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
452 const double alpha = 1 , beta = 0 ;
453 const int n = A.graph.row_map.extent(0) - 1 ;
454 const int nz = A.graph.entries.extent(0);
455 const size_t ncol = col_indices.size();
458 Kokkos::View<Ordinal*,execution_space> col_indices_dev(
459 Kokkos::ViewAllocateWithoutInitializing(
"col_indices"), ncol);
460 typename Kokkos::View<Ordinal*,execution_space>::HostMirror col_indices_host =
462 for (
size_t i=0; i<ncol; ++i)
463 col_indices_host(i) = col_indices[i];
467 multi_vector_type xx(
468 Kokkos::ViewAllocateWithoutInitializing(
"xx"), ncol , n );
469 GatherTranspose::apply(xx, x, col_indices_dev);
472 multi_vector_type yy(
473 Kokkos::ViewAllocateWithoutInitializing(
"yy"), n , ncol );
476 cusparseStatus_t status =
477 cusparseDcsrmm2( s.handle ,
478 CUSPARSE_OPERATION_NON_TRANSPOSE ,
479 CUSPARSE_OPERATION_TRANSPOSE ,
484 A.graph.row_map.data() ,
485 A.graph.entries.data() ,
492 if ( CUSPARSE_STATUS_SUCCESS != status ) {
493 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
497 for (
size_t col=0; col<ncol; col++) {
498 vector_type yy_view =
499 Kokkos::subview( yy , Kokkos::ALL(), col );
501 Kokkos::subview( y, Kokkos::ALL(), col_indices[col] );
506 static void apply(
const matrix_type & A ,
507 const multi_vector_type & x ,
508 const multi_vector_type & y ,
509 const std::vector<Ordinal> & col_indices )
511 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
512 const double alpha = 1 , beta = 0 ;
513 const int n = A.graph.row_map.extent(0) - 1 ;
514 const int nz = A.graph.entries.extent(0);
515 const size_t ncol = col_indices.size();
518 vector_type xx( Kokkos::ViewAllocateWithoutInitializing(
"xx"), n * ncol );
519 vector_type yy( Kokkos::ViewAllocateWithoutInitializing(
"yy"), n * ncol );
521 for (
size_t col=0; col<ncol; col++) {
522 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
523 vector_type xx_view = Kokkos::subview( xx , span );
525 Kokkos::subview( x, Kokkos::ALL(), col_indices[col] );
531 using offset_type =
typename matrix_type::graph_type::size_type;
532 using entry_type =
typename matrix_type::graph_type::data_type;
533 const cusparseIndexType_t myCusparseOffsetType =
534 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
535 const cusparseIndexType_t myCusparseEntryType =
536 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
537 cusparseSpMatDescr_t A_cusparse;
539 &A_cusparse, n, n, nz,
540 (
void*)A.graph.row_map.data(), (
void*)A.graph.entries.data(),
541 (
void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
542 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
543 cusparseDnMatDescr_t x_cusparse, y_cusparse;
545 &x_cusparse, n, ncol, n,
546 (
void*)xx.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
548 &y_cusparse, n, ncol, n,
549 (
void*)yy.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
550 size_t bufferSize = 0;
551 void* dBuffer = NULL;
552 cusparseSpMM_bufferSize(
553 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
554 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
555 x_cusparse, &beta, y_cusparse, CUDA_R_64F, CUSPARSE_MM_ALG_DEFAULT,
557 cudaMalloc(&dBuffer, bufferSize);
558 cusparseStatus_t status =
559 cusparseSpMM(s.handle ,
560 CUSPARSE_OPERATION_NON_TRANSPOSE ,
561 CUSPARSE_OPERATION_NON_TRANSPOSE ,
568 CUSPARSE_MM_ALG_DEFAULT ,
571 cusparseDestroyDnMat(x_cusparse);
572 cusparseDestroyDnMat(y_cusparse);
573 cusparseDestroySpMat(A_cusparse);
575 cusparseStatus_t status =
576 cusparseDcsrmm( s.handle ,
577 CUSPARSE_OPERATION_NON_TRANSPOSE ,
582 A.graph.row_map.data() ,
583 A.graph.entries.data() ,
591 if ( CUSPARSE_STATUS_SUCCESS != status ) {
592 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
596 for (
size_t col=0; col<ncol; col++) {
597 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
598 vector_type yy_view = Kokkos::subview( yy , span );
600 Kokkos::subview( y, Kokkos::ALL(), col_indices[col] );
609 CrsMatrix< float ,
Kokkos::Cuda > ,
610 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
611 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
617 typedef execution_space::size_type size_type;
618 typedef Kokkos::View< float**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
619 typedef CrsMatrix< float , execution_space > matrix_type;
623 static void apply(
const matrix_type & A ,
624 const multi_vector_type & x ,
625 const multi_vector_type & y )
627 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
628 const float alpha = 1 , beta = 0 ;
629 const int n = A.graph.row_map.extent(0) - 1 ;
630 const int nz = A.graph.entries.extent(0);
631 const size_t ncol =
x.extent(1);
635 using offset_type =
typename matrix_type::graph_type::size_type;
636 using entry_type =
typename matrix_type::graph_type::data_type;
637 const cusparseIndexType_t myCusparseOffsetType =
638 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
639 const cusparseIndexType_t myCusparseEntryType =
640 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
641 cusparseSpMatDescr_t A_cusparse;
643 &A_cusparse, n, n, nz,
644 (
void*)A.graph.row_map.data(), (
void*)A.graph.entries.data(),
645 (
void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
646 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
647 cusparseDnMatDescr_t x_cusparse, y_cusparse;
649 &x_cusparse, n, ncol,
x.stride(1),
650 (
void*)
x.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
652 &y_cusparse, n, ncol,
y.stride(1),
653 (
void*)
y.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
654 size_t bufferSize = 0;
655 void* dBuffer = NULL;
656 cusparseSpMM_bufferSize(
657 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
658 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
659 x_cusparse, &beta, y_cusparse, CUDA_R_32F, CUSPARSE_MM_ALG_DEFAULT,
661 cudaMalloc(&dBuffer, bufferSize);
662 cusparseStatus_t status =
663 cusparseSpMM(s.handle ,
664 CUSPARSE_OPERATION_NON_TRANSPOSE ,
665 CUSPARSE_OPERATION_NON_TRANSPOSE ,
672 CUSPARSE_MM_ALG_DEFAULT ,
675 cusparseDestroyDnMat(x_cusparse);
676 cusparseDestroyDnMat(y_cusparse);
677 cusparseDestroySpMat(A_cusparse);
679 cusparseStatus_t status =
680 cusparseScsrmm( s.handle ,
681 CUSPARSE_OPERATION_NON_TRANSPOSE ,
686 A.graph.row_map.data() ,
687 A.graph.entries.data() ,
695 if ( CUSPARSE_STATUS_SUCCESS != status ) {
696 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
704 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
705 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
711 typedef execution_space::size_type size_type;
712 typedef Kokkos::View< double**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
713 typedef CrsMatrix< double , execution_space > matrix_type;
717 static void apply(
const matrix_type & A ,
718 const multi_vector_type & x ,
719 const multi_vector_type & y )
721 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
722 const double alpha = 1 , beta = 0 ;
723 const int n = A.graph.row_map.extent(0) - 1 ;
724 const int nz = A.graph.entries.extent(0);
725 const size_t ncol =
x.extent(1);
729 using offset_type =
typename matrix_type::graph_type::size_type;
730 using entry_type =
typename matrix_type::graph_type::data_type;
731 const cusparseIndexType_t myCusparseOffsetType =
732 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
733 const cusparseIndexType_t myCusparseEntryType =
734 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
735 cusparseSpMatDescr_t A_cusparse;
737 &A_cusparse, n, n, nz,
738 (
void*)A.graph.row_map.data(), (
void*)A.graph.entries.data(),
739 (
void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
740 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
741 cusparseDnMatDescr_t x_cusparse, y_cusparse;
743 &x_cusparse, n, ncol,
x.stride(1),
744 (
void*)
x.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
746 &y_cusparse, n, ncol,
y.stride(1),
747 (
void*)
y.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
748 size_t bufferSize = 0;
749 void* dBuffer = NULL;
750 cusparseSpMM_bufferSize(
751 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
752 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
753 x_cusparse, &beta, y_cusparse, CUDA_R_64F, CUSPARSE_MM_ALG_DEFAULT,
755 cudaMalloc(&dBuffer, bufferSize);
756 cusparseStatus_t status =
757 cusparseSpMM(s.handle ,
758 CUSPARSE_OPERATION_NON_TRANSPOSE ,
759 CUSPARSE_OPERATION_NON_TRANSPOSE ,
766 CUSPARSE_MM_ALG_DEFAULT ,
769 cusparseDestroyDnMat(x_cusparse);
770 cusparseDestroyDnMat(y_cusparse);
771 cusparseDestroySpMat(A_cusparse);
773 cusparseStatus_t status =
774 cusparseDcsrmm( s.handle ,
775 CUSPARSE_OPERATION_NON_TRANSPOSE ,
780 A.graph.row_map.data() ,
781 A.graph.entries.data() ,
789 if ( CUSPARSE_STATUS_SUCCESS != status ) {
790 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
798template <
typename Ordinal>
801 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
802 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
803 std::vector<Ordinal> ,
808 typedef execution_space::size_type size_type;
809 typedef Kokkos::View< double*, Kokkos::LayoutLeft, execution_space > vector_type;
810 typedef Kokkos::View< double**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
811 typedef CrsMatrix< double , execution_space > matrix_type;
812 typedef Kokkos::View< size_type*, execution_space > column_indices_type;
814 const matrix_type m_A;
815 const multi_vector_type m_x;
816 multi_vector_type m_y;
817 const column_indices_type m_col;
818 const size_type m_num_col;
820 Multiply(
const matrix_type& A,
821 const multi_vector_type& x,
822 multi_vector_type& y,
823 const column_indices_type& col) :
828 m_num_col(col.extent(0)) {}
831 inline void operator() (
const size_type iRow )
const {
832 const size_type iEntryBegin = m_A.graph.row_map[iRow];
833 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
835 for (size_type
j=0;
j<m_num_col;
j++) {
836 size_type iCol = m_col_indices[
j];
840 for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
841 sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
844 m_y( iRow, iCol ) =
sum;
851 static void apply(
const matrix_type & A ,
852 const multi_vector_type & x ,
853 const multi_vector_type & y ,
854 const std::vector<Ordinal> & col_indices )
857 Kokkos::View<Ordinal*,execution_space> col_indices_dev(
858 Kokkos::ViewAllocateWithoutInitializing(
"col_indices"), ncol);
859 typename Kokkos::View<Ordinal*,execution_space>::HostMirror col_indices_host =
861 for (
size_t i=0; i<ncol; ++i)
862 col_indices_host(i) = col_indices[i];
865 const size_t n = A.graph.row_map.extent(0) - 1 ;
866 Kokkos::parallel_for( n , Multiply(A,x,y,col_indices_dev) );
874 CrsMatrix< float ,
Kokkos::Cuda > ,
875 std::vector< Kokkos::View< float* , Kokkos::Cuda > >,
876 std::vector< Kokkos::View< float* , Kokkos::Cuda > >,
882 typedef execution_space::size_type size_type ;
883 typedef Kokkos::View< float* , execution_space > vector_type ;
884 typedef CrsMatrix< float , execution_space > matrix_type ;
888 static void apply(
const matrix_type & A ,
889 const std::vector<vector_type> & x ,
890 const std::vector<vector_type> & y )
892 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
893 const float alpha = 1 , beta = 0 ;
894 const int n = A.graph.row_map.extent(0) - 1 ;
895 const int nz = A.graph.entries.extent(0);
896 const size_t ncol =
x.size();
899 vector_type xx( Kokkos::ViewAllocateWithoutInitializing(
"xx"), n * ncol );
900 vector_type yy( Kokkos::ViewAllocateWithoutInitializing(
"yy"), n * ncol );
902 for (
size_t col=0; col<ncol; col++) {
903 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
904 vector_type xx_view = Kokkos::subview( xx , span );
910 using offset_type =
typename matrix_type::graph_type::size_type;
911 using entry_type =
typename matrix_type::graph_type::data_type;
912 const cusparseIndexType_t myCusparseOffsetType =
913 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
914 const cusparseIndexType_t myCusparseEntryType =
915 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
916 cusparseSpMatDescr_t A_cusparse;
918 &A_cusparse, n, n, nz,
919 (
void*)A.graph.row_map.data(), (
void*)A.graph.entries.data(),
920 (
void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
921 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
922 cusparseDnMatDescr_t x_cusparse, y_cusparse;
924 &x_cusparse, n, ncol, n,
925 (
void*)xx.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
927 &y_cusparse, n, ncol, n,
928 (
void*)yy.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
929 size_t bufferSize = 0;
930 void* dBuffer = NULL;
931 cusparseSpMM_bufferSize(
932 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
933 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
934 x_cusparse, &beta, y_cusparse, CUDA_R_32F, CUSPARSE_MM_ALG_DEFAULT,
936 cudaMalloc(&dBuffer, bufferSize);
937 cusparseStatus_t status =
938 cusparseSpMM(s.handle ,
939 CUSPARSE_OPERATION_NON_TRANSPOSE ,
940 CUSPARSE_OPERATION_NON_TRANSPOSE ,
947 CUSPARSE_MM_ALG_DEFAULT ,
950 cusparseDestroyDnMat(x_cusparse);
951 cusparseDestroyDnMat(y_cusparse);
952 cusparseDestroySpMat(A_cusparse);
954 cusparseStatus_t status =
955 cusparseScsrmm( s.handle ,
956 CUSPARSE_OPERATION_NON_TRANSPOSE ,
961 A.graph.row_map.data() ,
962 A.graph.entries.data() ,
970 if ( CUSPARSE_STATUS_SUCCESS != status ) {
971 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
975 for (
size_t col=0; col<ncol; col++) {
976 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
977 vector_type yy_view = Kokkos::subview( yy , span );
986 std::vector< Kokkos::View< double* , Kokkos::Cuda > >,
987 std::vector< Kokkos::View< double* , Kokkos::Cuda > >,
993 typedef execution_space::size_type size_type ;
994 typedef Kokkos::View< double* , execution_space > vector_type ;
995 typedef CrsMatrix< double , execution_space > matrix_type ;
999 static void apply(
const matrix_type & A ,
1000 const std::vector<vector_type> & x ,
1001 const std::vector<vector_type> & y )
1003 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
1004 const double alpha = 1 , beta = 0 ;
1005 const int n = A.graph.row_map.extent(0) - 1 ;
1006 const int nz = A.graph.entries.extent(0);
1007 const size_t ncol =
x.size();
1010 vector_type xx( Kokkos::ViewAllocateWithoutInitializing(
"xx"), n * ncol );
1011 vector_type yy( Kokkos::ViewAllocateWithoutInitializing(
"yy"), n * ncol );
1013 for (
size_t col=0; col<ncol; col++) {
1014 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
1015 vector_type xx_view = Kokkos::subview( xx , span );
1021 using offset_type =
typename matrix_type::graph_type::size_type;
1022 using entry_type =
typename matrix_type::graph_type::data_type;
1023 const cusparseIndexType_t myCusparseOffsetType =
1024 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
1025 const cusparseIndexType_t myCusparseEntryType =
1026 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
1027 cusparseSpMatDescr_t A_cusparse;
1029 &A_cusparse, n, n, nz,
1030 (
void*)A.graph.row_map.data(), (
void*)A.graph.entries.data(),
1031 (
void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
1032 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
1033 cusparseDnMatDescr_t x_cusparse, y_cusparse;
1034 cusparseCreateDnMat(
1035 &x_cusparse, n, ncol, n,
1036 (
void*)xx.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
1037 cusparseCreateDnMat(
1038 &y_cusparse, n, ncol, n,
1039 (
void*)yy.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
1040 size_t bufferSize = 0;
1041 void* dBuffer = NULL;
1042 cusparseSpMM_bufferSize(
1043 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1044 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
1045 x_cusparse, &beta, y_cusparse, CUDA_R_64F, CUSPARSE_MM_ALG_DEFAULT,
1047 cudaMalloc(&dBuffer, bufferSize);
1048 cusparseStatus_t status =
1049 cusparseSpMM(s.handle ,
1050 CUSPARSE_OPERATION_NON_TRANSPOSE ,
1051 CUSPARSE_OPERATION_NON_TRANSPOSE ,
1058 CUSPARSE_MM_ALG_DEFAULT ,
1061 cusparseDestroyDnMat(x_cusparse);
1062 cusparseDestroyDnMat(y_cusparse);
1063 cusparseDestroySpMat(A_cusparse);
1065 cusparseStatus_t status =
1066 cusparseDcsrmm( s.handle ,
1067 CUSPARSE_OPERATION_NON_TRANSPOSE ,
1072 A.graph.row_map.data() ,
1073 A.graph.entries.data() ,
1081 if ( CUSPARSE_STATUS_SUCCESS != status ) {
1082 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
1086 for (
size_t col=0; col<ncol; col++) {
1087 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
1088 vector_type yy_view = Kokkos::subview( yy , span );
Kokkos::DefaultExecutionSpace execution_space
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP... > >::value >::type sum(const Kokkos::View< RD, RP... > &r, const Kokkos::View< XD, XP... > &x)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
Top-level namespace for Stokhos classes and functions.
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y