Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_Cholmod_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// Amesos2: Templated Direct Sparse Solver Package
6// Copyright 2011 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ***********************************************************************
41//
42// @HEADER
43
53#ifndef AMESOS2_CHOLMOD_DEF_HPP
54#define AMESOS2_CHOLMOD_DEF_HPP
55
56#include <Teuchos_Tuple.hpp>
57#include <Teuchos_ParameterList.hpp>
58#include <Teuchos_StandardParameterEntryValidators.hpp>
59
62
63#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
64#include "KokkosSparse_sptrsv_cholmod.hpp"
65#endif
66
67namespace Amesos2 {
68
69
70template <class Matrix, class Vector>
72 Teuchos::RCP<const Matrix> A,
73 Teuchos::RCP<Vector> X,
74 Teuchos::RCP<const Vector> B )
75 : SolverCore<Amesos2::Cholmod,Matrix,Vector>(A, X, B)
76 , firstsolve(true)
77 , skip_symfact(false)
78 , map()
79 , is_contiguous_(true) // default is set by params
80 , use_triangular_solves_(false) // default is set by params
81 , use_cholmod_int_type_(false) // if true we use CHOLMOD_INT (no GPU support)
82{
83 data_.L = NULL;
84 data_.Y = NULL;
85 data_.E = NULL;
86
87 data_.c.supernodal = CHOLMOD_AUTO;
88 data_.c.quick_return_if_not_posdef = 1;
89}
90
91
92template <class Matrix, class Vector>
94{
95 if(use_cholmod_int_type_) {
96 cholmod_free_factor(&(data_.L), &(data_.c));
97 cholmod_free_dense(&(data_.Y), &data_.c);
98 cholmod_free_dense(&(data_.E), &data_.c);
99 cholmod_finish(&(data_.c));
100 }
101 else {
102 // useful to check if GPU is being used, but very verbose
103 // cholmod_l_gpu_stats(&(data_.c));
104 cholmod_l_free_factor(&(data_.L), &(data_.c));
105 cholmod_l_free_dense(&(data_.Y), &data_.c);
106 cholmod_l_free_dense(&(data_.E), &data_.c);
107 cholmod_l_finish(&(data_.c));
108 }
109}
110
111template<class Matrix, class Vector>
112int
114{
115#ifdef HAVE_AMESOS2_TIMERS
116 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
117#endif
118
119 int info = 0;
120
121 if(use_cholmod_int_type_) {
122 data_.L = cholmod_analyze(&data_.A, &(data_.c));
123 }
124 else {
125 data_.L = cholmod_l_analyze(&data_.A, &(data_.c));
126 }
127
128 info = data_.c.status;
129 skip_symfact = true;
130
131 /* All processes should have the same error code */
132 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
133
134 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
135 std::runtime_error,
136 "Amesos2 cholmod_l_analyze failure in Cholmod preOrdering_impl");
137
138 return(0);
139}
140
141
142template <class Matrix, class Vector>
143int
145{
146 int info = 0;
147
148 if(!skip_symfact) {
149#ifdef HAVE_AMESOS2_TIMERS
150 Teuchos::TimeMonitor symFactTimer(this->timers_.symFactTime_);
151#endif
152
153 if(use_cholmod_int_type_) {
154 cholmod_resymbol (&data_.A, NULL, 0, true, data_.L, &(data_.c));
155 }
156 else {
157 cholmod_l_resymbol (&data_.A, NULL, 0, true, data_.L, &(data_.c));
158 }
159
160 info = data_.c.status;
161 } else {
162 /*
163 * Symbolic factorization has already occured in preOrdering_impl,
164 * but if the user calls this routine directly later, we need to
165 * redo the symbolic factorization.
166 */
167 skip_symfact = false;
168 }
169
170 /* All processes should have the same error code */
171 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
172
173 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
174 std::runtime_error,
175 "Amesos2 cholmod_l_resymbol failure in Cholmod symbolicFactorization_impl");
176
177 if(use_triangular_solves_) {
178 triangular_solve_symbolic();
179 }
180
181 return(0);
182}
183
184
185template <class Matrix, class Vector>
186int
188{
189 int info = 0;
190
191#ifdef HAVE_AMESOS2_DEBUG
192 TEUCHOS_TEST_FOR_EXCEPTION(data_.A.ncol != Teuchos::as<size_t>(this->globalNumCols_),
193 std::runtime_error,
194 "Error in converting to cholmod_sparse: wrong number of global columns." );
195 TEUCHOS_TEST_FOR_EXCEPTION(data_.A.nrow != Teuchos::as<size_t>(this->globalNumRows_),
196 std::runtime_error,
197 "Error in converting to cholmod_sparse: wrong number of global rows." );
198#endif
199
200#ifdef HAVE_AMESOS2_TIMERS
201 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
202#endif
203
204#ifdef HAVE_AMESOS2_VERBOSE_DEBUG
205 // Add print out of views here - need host conversion first
206#endif
207
208 if(use_cholmod_int_type_) {
209 cholmod_factorize(&data_.A, data_.L, &(data_.c));
210 }
211 else {
212 cholmod_l_factorize(&data_.A, data_.L, &(data_.c));
213 }
214
215 info = data_.c.status;
216
217 /* All processes should have the same error code */
218 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
219
220 TEUCHOS_TEST_FOR_EXCEPTION(info == CHOLMOD_OUT_OF_MEMORY,
221 std::runtime_error,
222 "Amesos2 cholmod_l_factorize error code: CHOLMOD_OUT_OF_MEMORY");
223
224 TEUCHOS_TEST_FOR_EXCEPTION(info == CHOLMOD_NOT_POSDEF,
225 std::runtime_error,
226 "Amesos2 cholmod_l_factorize error code: CHOLMOD_NOT_POSDEF.");
227
228 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
229 std::runtime_error,
230 "Amesos2 cholmod_l_factorize error code:" << info);
231
232 if(use_triangular_solves_) {
233 triangular_solve_numeric();
234 }
235
236 return(info);
237}
238
239
240template <class Matrix, class Vector>
241int
242Cholmod<Matrix,Vector>::solve_impl(const Teuchos::Ptr<MultiVecAdapter<Vector> > X,
243 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
244{
245 const global_size_type ld_rhs = X->getGlobalLength();
246 const size_t nrhs = X->getGlobalNumVectors();
247
248 bool bDidAssignX;
249 { // Get values from RHS B
250#ifdef HAVE_AMESOS2_TIMERS
251 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
252 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
253#endif
254
255 // In general we may want to write directly to the x space without a copy.
256 // So we 'get' x which may be a direct view assignment to the MV.
257 const bool initialize_data = true;
258 const bool do_not_initialize_data = false;
259 if(use_triangular_solves_) { // to device
260#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
261 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
262 device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
263 Teuchos::as<size_t>(ld_rhs),
264 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
265 this->rowIndexBase_);
266 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
267 device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
268 Teuchos::as<size_t>(ld_rhs),
269 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
270 this->rowIndexBase_);
271#endif
272 }
273 else { // to host
274 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
275 host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
276 Teuchos::as<size_t>(ld_rhs),
277 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
278 this->rowIndexBase_);
279 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
280 host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
281 Teuchos::as<size_t>(ld_rhs),
282 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
283 this->rowIndexBase_);
284 }
285 }
286
287 int ierr = 0; // returned error code
288
289#ifdef HAVE_AMESOS2_TIMERS
290 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
291#endif
292
293 if(use_triangular_solves_) {
294 triangular_solve();
295 }
296 else {
297 function_map::cholmod_init_dense(Teuchos::as<long>(this->globalNumRows_),
298 Teuchos::as<int>(nrhs), Teuchos::as<int>(ld_rhs), host_bValues_.data(), &data_.b);
299 function_map::cholmod_init_dense(Teuchos::as<long>(this->globalNumRows_),
300 Teuchos::as<int>(nrhs), Teuchos::as<int>(ld_rhs), host_xValues_.data(), &data_.x);
301
302 cholmod_dense *xtemp = &(data_.x);
303 if(use_cholmod_int_type_) {
304 cholmod_solve2(CHOLMOD_A, data_.L, &data_.b, NULL,
305 &(xtemp), NULL, &data_.Y, &data_.E, &data_.c);
306 }
307 else {
308 cholmod_l_solve2(CHOLMOD_A, data_.L, &data_.b, NULL,
309 &(xtemp), NULL, &data_.Y, &data_.E, &data_.c);
310 }
311 }
312
313 ierr = data_.c.status;
314
315 /* All processes should have the same error code */
316 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
317
318 TEUCHOS_TEST_FOR_EXCEPTION(ierr == -2, std::runtime_error, "Ran out of memory" );
319
320 /* Update X's global values */
321
322 // if bDidAssignX, then we solved straight to the adapter's X memory space without
323 // requiring additional memory allocation, so the x data is already in place.
324 if(!bDidAssignX) {
325#ifdef HAVE_AMESOS2_TIMERS
326 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
327#endif
328
329 if(use_triangular_solves_) { // to device
330#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
331 Util::put_1d_data_helper_kokkos_view<
332 MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
333 Teuchos::as<size_t>(ld_rhs),
334 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
335 this->rowIndexBase_);
336#endif
337 }
338 else { // to host
339 Util::put_1d_data_helper_kokkos_view<
340 MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
341 Teuchos::as<size_t>(ld_rhs),
342 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
343 this->rowIndexBase_);
344 }
345 }
346
347 return(ierr);
348}
349
350
351template <class Matrix, class Vector>
352bool
354{
355 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
356}
357
358
359template <class Matrix, class Vector>
360void
361Cholmod<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
362{
363 // Note that the ordering is very important here. We must do this sequence:
364 // (1) Read parameter for use_cholmod_int_type
365 // (2) Call cholmod_start or cholmod_l_start
366 // (3) Set additional data_.c values
367 use_cholmod_int_type_ = parameterList->get<bool>("CholmodInt", false);
368
369 if(use_cholmod_int_type_) {
370 data_.c.itype = CHOLMOD_INT;
371 cholmod_start(&data_.c);
372 }
373 else {
374 data_.c.itype = CHOLMOD_LONG;
375 cholmod_l_start(&data_.c); // long form required for CUDA
376 }
377
378 using Teuchos::RCP;
379 using Teuchos::getIntegralValue;
380 using Teuchos::ParameterEntryValidator;
381
382 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
383
384 is_contiguous_ = parameterList->get<bool>("IsContiguous", true);
385 use_triangular_solves_ = parameterList->get<bool>("Enable_KokkosKernels_TriangularSolves", false);
386
387 if(use_triangular_solves_) {
388#if not defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) || not defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
389 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
390 "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_CHOLMOD not configured." );
391#endif
392 }
393
394 data_.c.dbound = parameterList->get<double>("dbound", 0.0);
395 data_.c.prefer_upper = (parameterList->get<bool>("PreferUpper", true)) ? 1 : 0;
396 data_.c.print = parameterList->get<int>("print", 3);
397 data_.c.nmethods = parameterList->get<int>("nmethods", 0); // tries AMD and may try METIS if necessary and available
398
399 // useGPU = 1 means use GPU if possible - note this must be set after cholmod_l_start
400 // since cholmod_l_start sets it to the default value of -1.
401 // Setting to -1 means GPU is only used if env variable CHOLMOD_USE_GPU is set to 1.
402#ifdef KOKKOS_ENABLE_CUDA
403 const int default_gpu_setting = 1; // use gpu by default
404#else
405 // If building Trilinos with Cuda off, Cholmod can stil use GPU and the solver will still work.
406 // But that is likely not what was expected/wanted. If you do still want it, set the param to 1.
407 const int default_gpu_setting = 0; // use gpu by default
408#endif
409
410 data_.c.useGPU = parameterList->get<int>("useGPU", default_gpu_setting);
411
412#ifdef KOKKOS_ENABLE_CUDA
413 TEUCHOS_TEST_FOR_EXCEPTION(data_.c.useGPU != 0 && use_cholmod_int_type_, std::runtime_error,
414 "Amesos2 Cholmod solver must not use GPU (parameter useGPU = 0) if CholmodInt is turned on because Cholmod only supports GPU with long." );
415#endif
416
417 bool bSuperNodal = parameterList->get<bool>("SuperNodal", false);
418 data_.c.supernodal = bSuperNodal ? CHOLMOD_SUPERNODAL : CHOLMOD_AUTO;
419}
420
421
422template <class Matrix, class Vector>
423Teuchos::RCP<const Teuchos::ParameterList>
425{
426 using std::string;
427 using Teuchos::tuple;
428 using Teuchos::ParameterList;
429 using Teuchos::EnhancedNumberValidator;
430 using Teuchos::setStringToIntegralParameter;
431 using Teuchos::stringToIntegralParameterEntryValidator;
432
433 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
434
435 if( is_null(valid_params) ){
436 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
437
438
439 Teuchos::RCP<EnhancedNumberValidator<int> > print_validator
440 = Teuchos::rcp( new EnhancedNumberValidator<int>(0,5));
441
442 Teuchos::RCP<EnhancedNumberValidator<int> > nmethods_validator
443 = Teuchos::rcp( new EnhancedNumberValidator<int>(0,9));
444
445 pl->set("nmethods", 0, "Specifies the number of different ordering methods to try", nmethods_validator);
446
447 pl->set("print", 3, "Specifies the verbosity of the print statements", print_validator);
448
449 pl->set("dbound", 0.0,
450 "Specifies the smallest absolute value on the diagonal D for the LDL factorization");
451
452 pl->set("PreferUpper", true,
453 "Specifies whether the matrix will be "
454 "stored in upper triangular form.");
455
456 pl->set("useGPU", -1, "1: Use GPU is 1, 0: Do not use GPU, -1: ENV CHOLMOD_USE_GPU set GPU usage.");
457
458 pl->set("Enable_KokkosKernels_TriangularSolves", false, "Whether to use triangular solves.");
459
460 pl->set("CholmodInt", false, "Whether to use cholmod in int form");
461
462 pl->set("IsContiguous", true, "Whether GIDs contiguous");
463
464 pl->set("SuperNodal", false, "Whether to use super nodal");
465
466 valid_params = pl;
467 }
468
469 return valid_params;
470}
471
472
473template <class Matrix, class Vector>
474bool
476{
477#ifdef HAVE_AMESOS2_TIMERS
478 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
479#endif
480
481 // Only the root image needs storage allocated
482 Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
483 if(use_cholmod_int_type_) {
484 Kokkos::resize(host_rows_int_view_, this->globalNumNonZeros_);
485 Kokkos::resize(host_col_ptr_int_view_, this->globalNumRows_ + 1);
486 }
487 else {
488 Kokkos::resize(host_rows_long_view_, this->globalNumNonZeros_);
489 Kokkos::resize(host_col_ptr_long_view_, this->globalNumRows_ + 1);
490 }
491
492 {
493#ifdef HAVE_AMESOS2_TIMERS
494 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
495#endif
496
497 TEUCHOS_TEST_FOR_EXCEPTION(this->rowIndexBase_ != this->columnIndexBase_,
498 std::runtime_error,
499 "Row and column maps have different indexbase ");
500
501 if(use_cholmod_int_type_) {
502 int nnz_ret = 0;
503 if ( is_contiguous_ == true ) {
505 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_int_type_array,
506 host_size_int_type_array>::do_get(this->matrixA_.ptr(),
507 host_nzvals_view_, host_rows_int_view_,
508 host_col_ptr_int_view_, nnz_ret, ROOTED,
509 ARBITRARY,
510 this->rowIndexBase_);
511 }
512 else {
514 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_int_type_array,
515 host_size_int_type_array>::do_get(this->matrixA_.ptr(),
516 host_nzvals_view_, host_rows_int_view_,
517 host_col_ptr_int_view_, nnz_ret, CONTIGUOUS_AND_ROOTED,
518 ARBITRARY,
519 this->rowIndexBase_);
520 }
521 TEUCHOS_TEST_FOR_EXCEPTION(nnz_ret != Teuchos::as<long>(this->globalNumNonZeros_),
522 std::runtime_error,
523 "Did not get the expected number of non-zero vals");
524 }
525 else {
526 long nnz_ret = 0;
527 if ( is_contiguous_ == true ) {
529 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_long_type_array,
530 host_size_long_type_array>::do_get(this->matrixA_.ptr(),
531 host_nzvals_view_, host_rows_long_view_,
532 host_col_ptr_long_view_, nnz_ret, ROOTED,
533 ARBITRARY,
534 this->rowIndexBase_);
535 }
536 else {
538 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_long_type_array,
539 host_size_long_type_array>::do_get(this->matrixA_.ptr(),
540 host_nzvals_view_, host_rows_long_view_,
541 host_col_ptr_long_view_, nnz_ret, CONTIGUOUS_AND_ROOTED,
542 ARBITRARY,
543 this->rowIndexBase_);
544 }
545 TEUCHOS_TEST_FOR_EXCEPTION(nnz_ret != Teuchos::as<long>(this->globalNumNonZeros_),
546 std::runtime_error,
547 "Did not get the expected number of non-zero vals");
548 }
549 }
550
551 if(use_cholmod_int_type_) {
552 function_map::cholmod_init_sparse(Teuchos::as<size_t>(this->globalNumRows_),
553 Teuchos::as<size_t>(this->globalNumCols_),
554 Teuchos::as<size_t>(this->globalNumNonZeros_),
555 0,
556 host_col_ptr_int_view_.data(),
557 host_nzvals_view_.data(),
558 host_rows_int_view_.data(),
559 &(data_.A),
560 CHOLMOD_INT);
561 }
562 else {
563 function_map::cholmod_init_sparse(Teuchos::as<size_t>(this->globalNumRows_),
564 Teuchos::as<size_t>(this->globalNumCols_),
565 Teuchos::as<size_t>(this->globalNumNonZeros_),
566 0,
567 host_col_ptr_long_view_.data(),
568 host_nzvals_view_.data(),
569 host_rows_long_view_.data(),
570 &(data_.A),
571 CHOLMOD_LONG);
572 }
573
574 TEUCHOS_TEST_FOR_EXCEPTION(data_.A.stype == 0, std::runtime_error,
575 "CHOLMOD loadA_impl loaded matrix but it is not symmetric.");
576
577 return true;
578}
579
580
581template <class Matrix, class Vector>
582void
584{
585#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
586 // Create handles for U and U^T solves
587 if(use_cholmod_int_type_) {
588 device_int_khL_.create_sptrsv_handle(
589 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, true);
590 device_int_khU_.create_sptrsv_handle(
591 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, false);
592 }
593 else {
594 device_long_khL_.create_sptrsv_handle(
595 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, true);
596 device_long_khU_.create_sptrsv_handle(
597 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, false);
598 }
599
600 // extract etree and iperm from CHOLMOD
601 Kokkos::resize(host_trsv_etree_, data_.L->nsuper);
602 if(use_cholmod_int_type_) {
603 int *int_etree = static_cast<int*>(data_.c.Iwork) + 2 * data_.L->n;
604 for (size_t i = 0 ; i < data_.L->nsuper; ++i) {
605 host_trsv_etree_(i) = int_etree[i];
606 }
607 }
608 else {
609 long *long_etree = static_cast<long*>(data_.c.Iwork) + 2 * data_.L->n;
610 for (size_t i = 0 ; i < data_.L->nsuper; ++i) { // convert long to int array for trsv API
611 host_trsv_etree_(i) = static_cast<int>(long_etree[i]);
612 }
613 }
614
615 // set etree
616 if(use_cholmod_int_type_) {
617 device_int_khL_.set_sptrsv_etree(host_trsv_etree_.data());
618 device_int_khU_.set_sptrsv_etree(host_trsv_etree_.data());
619 }
620 else {
621 device_long_khL_.set_sptrsv_etree(host_trsv_etree_.data());
622 device_long_khU_.set_sptrsv_etree(host_trsv_etree_.data());
623 }
624
625 size_t ld_rhs = this->matrixA_->getGlobalNumRows();
626 Kokkos::resize(host_trsv_perm_, ld_rhs);
627 if(use_cholmod_int_type_) {
628 int *int_iperm = static_cast<int*>(data_.L->Perm);
629 for (size_t i = 0; i < ld_rhs; i++) {
630 host_trsv_perm_(int_iperm[i]) = i;
631 }
632 }
633 else {
634 long *long_iperm = static_cast<long*>(data_.L->Perm);
635 for (size_t i = 0; i < ld_rhs; i++) {
636 host_trsv_perm_(long_iperm[i]) = i;
637 }
638 }
639 deep_copy_or_assign_view(device_trsv_perm_, host_trsv_perm_); // will use device to permute
640
641 // set permutation
642 if(use_cholmod_int_type_) {
643 device_int_khL_.set_sptrsv_perm(host_trsv_perm_.data());
644 device_int_khU_.set_sptrsv_perm(host_trsv_perm_.data());
645 }
646 else {
647 device_long_khL_.set_sptrsv_perm(host_trsv_perm_.data());
648 device_long_khU_.set_sptrsv_perm(host_trsv_perm_.data());
649 }
650
651 // Do symbolic analysis
652 if(use_cholmod_int_type_) {
653 KokkosSparse::Experimental::sptrsv_symbolic<int, kernel_handle_int_type>
654 (&device_int_khL_, &device_int_khU_, data_.L, &data_.c);
655 }
656 else {
657 KokkosSparse::Experimental::sptrsv_symbolic<long, kernel_handle_long_type>
658 (&device_long_khL_, &device_long_khU_, data_.L, &data_.c);
659 }
660#endif
661}
662
663
664template <class Matrix, class Vector>
665void
666Cholmod<Matrix,Vector>::triangular_solve_numeric()
667{
668#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
669 // Do numerical compute
670 if(use_cholmod_int_type_) {
671 KokkosSparse::Experimental::sptrsv_compute<int, kernel_handle_int_type>
672 (&device_int_khL_, &device_int_khU_, data_.L, &data_.c);
673 }
674 else {
675 KokkosSparse::Experimental::sptrsv_compute<long, kernel_handle_long_type>
676 (&device_long_khL_, &device_long_khU_, data_.L, &data_.c);
677 }
678#endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
679}
680
681
682template <class Matrix, class Vector>
683void
684Cholmod<Matrix,Vector>::triangular_solve() const
685{
686#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
687 size_t ld_rhs = device_xValues_.extent(0);
688 size_t nrhs = device_xValues_.extent(1);
689
690 Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
691 Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
692
693 // forward pivot
694 auto local_device_bValues = device_bValues_;
695 auto local_device_trsv_perm = device_trsv_perm_;
696 auto local_device_trsv_rhs = device_trsv_rhs_;
697 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
698 KOKKOS_LAMBDA(size_t j) {
699 for(size_t k = 0; k < nrhs; ++k) {
700 local_device_trsv_rhs(local_device_trsv_perm(j),k) = local_device_bValues(j,k);
701 }
702 });
703
704 for(size_t k = 0; k < nrhs; ++k) { // sptrsv_solve does not batch
705 auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
706 auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
707
708 // do L solve= - numeric (only rhs is modified) on the default device/host space
709 if(use_cholmod_int_type_) {
710 KokkosSparse::Experimental::sptrsv_solve(&device_int_khL_, sub_sol, sub_rhs);
711 }
712 else {
713 KokkosSparse::Experimental::sptrsv_solve(&device_long_khL_, sub_sol, sub_rhs);
714 }
715
716 // do L^T solve - numeric (only rhs is modified) on the default device/host space
717 if(use_cholmod_int_type_) {
718 KokkosSparse::Experimental::sptrsv_solve(&device_int_khU_, sub_rhs, sub_sol);
719 }
720 else {
721 KokkosSparse::Experimental::sptrsv_solve(&device_long_khU_, sub_rhs, sub_sol);
722 }
723
724 } // end loop over rhs vectors
725
726 // backward pivot
727 auto local_device_xValues = device_xValues_;
728 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
729 KOKKOS_LAMBDA(size_t j) {
730 for(size_t k = 0; k < nrhs; ++k) {
731 local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm(j),k);
732 }
733 });
734#endif
735}
736
737
738template<class Matrix, class Vector>
739const char* Cholmod<Matrix,Vector>::name = "Cholmod";
740
741
742} // end namespace Amesos2
743
744#endif // AMESOS2_CHOLMOD_DEF_HPP
Amesos2 CHOLMOD declarations.
@ ROOTED
Definition Amesos2_TypeDecl.hpp:127
@ CONTIGUOUS_AND_ROOTED
Definition Amesos2_TypeDecl.hpp:128
@ ARBITRARY
Definition Amesos2_TypeDecl.hpp:143
Amesos2 interface to the CHOLMOD package.
Definition Amesos2_Cholmod_decl.hpp:77
~Cholmod()
Destructor.
Definition Amesos2_Cholmod_def.hpp:93
Cholmod(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition Amesos2_Cholmod_def.hpp:71
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers.
Definition Amesos2_SolverCore_decl.hpp:106
EPhase
Used to indicate a phase in the direct solution.
Definition Amesos2_TypeDecl.hpp:65
A generic helper class for getting a CCS representation of a Matrix.
Definition Amesos2_Util.hpp:652