69 Teuchos::RCP<Vector> X,
70 Teuchos::RCP<const Vector> B)
75 , is_contiguous_(true)
80 using Teuchos::MpiComm;
81 using Teuchos::outArg;
82 using Teuchos::ParameterList;
83 using Teuchos::parameterList;
86 using Teuchos::rcp_dynamic_cast;
87 using Teuchos::REDUCE_SUM;
88 using Teuchos::reduceAll;
89 typedef global_ordinal_type GO;
90 typedef Tpetra::Map<local_ordinal_type, GO, node_type> map_type;
96 RCP<const Comm<int> > comm = this->
getComm ();
97 const int myRank = comm->getRank ();
98 const int numProcs = comm->getSize ();
100 SLUD::int_t nprow, npcol;
107 RCP<const Comm<int> > matComm = this->
matrixA_->getComm ();
108 TEUCHOS_TEST_FOR_EXCEPTION(
109 matComm.is_null (), std::logic_error,
"Amesos2::Superlustdist "
110 "constructor: The matrix's communicator is null!");
111 RCP<const MpiComm<int> > matMpiComm =
112 rcp_dynamic_cast<const MpiComm<int> > (matComm);
117 TEUCHOS_TEST_FOR_EXCEPTION(
118 matMpiComm.is_null (), std::logic_error,
"Amesos2::Superlustdist "
119 "constructor: The matrix's communicator is not an MpiComm!");
120 TEUCHOS_TEST_FOR_EXCEPTION(
121 matMpiComm->getRawMpiComm ().is_null (), std::logic_error,
"Amesos2::"
122 "Superlustdist constructor: The matrix's communicator claims to be a "
123 "Teuchos::MpiComm<int>, but its getRawPtrComm() method returns "
124 "Teuchos::null! This means that the underlying MPI_Comm doesn't even "
125 "exist, which likely implies that the Teuchos::MpiComm was constructed "
126 "incorrectly. It means something different than if the MPI_Comm were "
128 MPI_Comm rawMpiComm = (* (matMpiComm->getRawMpiComm ())) ();
129 data_.mat_comm = rawMpiComm;
135 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
144 SLUD::set_default_options_dist(&data_.options);
146 RCP<ParameterList> default_params =
151 data_.options.Fact = SLUD::DOFACT;
152 data_.equed = SLUD::NOEQUIL;
153 data_.options.SolveInitialized = SLUD::NO;
154 data_.options.RefineInitialized = SLUD::NO;
155 data_.rowequ =
false;
156 data_.colequ =
false;
164 data_.symb_comm = MPI_COMM_NULL;
169 data_.domains = (int) ( pow(2.0, floor(log10((
double)nprow*npcol)/log10(2.0))) );
171 const int color = (myRank < data_.domains) ? 0 : MPI_UNDEFINED;
172 MPI_Comm_split (data_.mat_comm, color, myRank, &(data_.symb_comm));
184 int myProcParticipates = 0;
185 if (myRank < nprow * npcol) {
187 myProcParticipates = 1;
192 int numParticipatingProcs = 0;
193 reduceAll<int, int> (*comm, REDUCE_SUM, myProcParticipates,
194 outArg (numParticipatingProcs));
195 TEUCHOS_TEST_FOR_EXCEPTION(
197 std::logic_error,
"Amesos2::Superludist constructor: The matrix has "
198 << this->
globalNumRows_ <<
" > 0 global row(s), but no processes in the "
199 "communicator want to participate in its factorization! nprow = "
200 << nprow <<
" and npcol = " << npcol <<
".");
203 size_t myNumRows = 0;
206 const GO quotient = (numParticipatingProcs == 0) ?
static_cast<GO
> (0) :
207 GNR /
static_cast<GO
> (numParticipatingProcs);
209 GNR - quotient *
static_cast<GO
> (numParticipatingProcs);
210 const GO lclNumRows = (
static_cast<GO
> (myRank) < remainder) ?
211 (quotient +
static_cast<GO
> (1)) : quotient;
212 myNumRows =
static_cast<size_t> (lclNumRows);
216 const GO indexBase = this->
matrixA_->getRowMap ()->getIndexBase ();
218 rcp (
new map_type (this->
globalNumRows_, myNumRows, indexBase, comm));
224 data_.A.Store = NULL;
226 SLUD::PStatInit(&(data_.stat));
229 data_.scale_perm.perm_r = data_.perm_r.getRawPtr();
230 data_.scale_perm.perm_c = data_.perm_c.getRawPtr();
246 if ( this->status_.getNumPreOrder() > 0 ){
248#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
249 SUPERLU_FREE( data_.sizes );
250 SUPERLU_FREE( data_.fstVtxSep );
253 free( data_.fstVtxSep );
259 if( data_.A.Store != NULL ){
260 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
264 if ( this->status_.getNumNumericFact() > 0 ){
265 function_map::Destroy_LU(this->globalNumRows_, &(data_.grid), &(data_.lu));
267 function_map::LUstructFree(&(data_.lu));
273 if ( this->status_.symbolicFactorizationDone() &&
274 !this->status_.numericFactorizationDone() ){
275 if ( data_.pslu_freeable.xlsub != NULL ){
276#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
277 SUPERLU_FREE( data_.pslu_freeable.xlsub );
278 SUPERLU_FREE( data_.pslu_freeable.lsub );
280 free( data_.pslu_freeable.xlsub );
281 free( data_.pslu_freeable.lsub );
284 if ( data_.pslu_freeable.xusub != NULL ){
285#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
286 SUPERLU_FREE( data_.pslu_freeable.xusub );
287 SUPERLU_FREE( data_.pslu_freeable.usub );
289 free( data_.pslu_freeable.xusub );
290 free( data_.pslu_freeable.usub );
293 if ( data_.pslu_freeable.supno_loc != NULL ){
294#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
295 SUPERLU_FREE( data_.pslu_freeable.supno_loc );
296 SUPERLU_FREE( data_.pslu_freeable.xsup_beg_loc );
297 SUPERLU_FREE( data_.pslu_freeable.xsup_end_loc );
299 free( data_.pslu_freeable.supno_loc );
300 free( data_.pslu_freeable.xsup_beg_loc );
301 free( data_.pslu_freeable.xsup_end_loc );
304#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
305 SUPERLU_FREE( data_.pslu_freeable.globToLoc );
307 free( data_.pslu_freeable.globToLoc );
311 SLUD::PStatFree( &(data_.stat) ) ;
316 if ( data_.options.SolveInitialized == SLUD::YES )
317 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
319 SLUD::superlu_gridexit(&(data_.grid));
323 if ( data_.symb_comm != MPI_COMM_NULL ) MPI_Comm_free(&(data_.symb_comm));
335 SLUD::int_t slu_rows_ub = Teuchos::as<SLUD::int_t>(this->globalNumRows_);
336 for( SLUD::int_t i = 0; i < slu_rows_ub; ++i ) data_.perm_r[i] = i;
346 if( this->status_.getNumPreOrder() > 0 ){
347#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
348 SUPERLU_FREE( data_.sizes );
349 SUPERLU_FREE( data_.fstVtxSep );
352 free( data_.fstVtxSep );
357#ifdef HAVE_AMESOS2_TIMERS
358 Teuchos::TimeMonitor preOrderTime( this->timers_.preOrderTime_ );
360 info = SLUD::get_perm_c_parmetis( &(data_.A),
361 data_.perm_r.getRawPtr(), data_.perm_c.getRawPtr(),
362 data_.grid.nprow * data_.grid.npcol, data_.domains,
363 &(data_.sizes), &(data_.fstVtxSep),
364 &(data_.grid), &(data_.symb_comm) );
366 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
368 "SuperLU_DIST pre-ordering ran out of memory after allocating "
369 << info <<
" bytes of memory" );
391#ifdef HAVE_AMESOS2_TIMERS
392 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
395#if (SUPERLU_DIST_MAJOR_VERSION > 7)
396 info = SLUD::symbfact_dist(&(data_.options), (data_.grid.nprow) * (data_.grid.npcol),
397 data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
398 data_.perm_r.getRawPtr(), data_.sizes,
399 data_.fstVtxSep, &(data_.pslu_freeable),
400 &(data_.grid.comm), &(data_.symb_comm),
404 info = SLUD::symbfact_dist((data_.grid.nprow) * (data_.grid.npcol),
405 data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
406 data_.perm_r.getRawPtr(), data_.sizes,
407 data_.fstVtxSep, &(data_.pslu_freeable),
408 &(data_.grid.comm), &(data_.symb_comm),
412 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
414 "SuperLU_DIST symbolic factorization ran out of memory after"
415 " allocating " << info <<
" bytes of memory" );
417 same_symbolic_ =
false;
418 same_solve_struct_ =
false;
432 if( data_.options.Equil == SLUD::YES ) {
433 SLUD::int_t info = 0;
436 data_.R.resize(this->globalNumRows_);
437 data_.C.resize(this->globalNumCols_);
438 function_map::gsequ_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
439 &(data_.rowcnd), &(data_.colcnd), &(data_.amax), &info, &(data_.grid));
442 function_map::laqgs_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
443 data_.rowcnd, data_.colcnd, data_.amax,
446 data_.rowequ = (data_.equed == SLUD::ROW) || (data_.equed == SLUD::BOTH);
447 data_.colequ = (data_.equed == SLUD::COL) || (data_.equed == SLUD::BOTH);
451 size_t nnz_loc = ((SLUD::NRformat_loc*)data_.A.Store)->nnz_loc;
452 for(
size_t i = 0; i < nnz_loc; ++i ) colind_view_(i) = data_.perm_c[colind_view_(i)];
455 if( same_symbolic_ ){
460#if (SUPERLU_DIST_MAJOR_VERSION > 7)
461 data_.options.Fact = SLUD::SamePattern_SameRowPerm;
462 function_map::pdistribute(&(data_.options),
463 as<SLUD::int_t>(this->globalNumRows_),
464 &(data_.A), &(data_.scale_perm),
465 &(data_.glu_freeable), &(data_.lu),
468 function_map::pdistribute(SLUD::SamePattern_SameRowPerm,
469 as<SLUD::int_t>(this->globalNumRows_),
470 &(data_.A), &(data_.scale_perm),
471 &(data_.glu_freeable), &(data_.lu),
475#if (SUPERLU_DIST_MAJOR_VERSION > 7)
476 data_.options.Fact = SLUD::DOFACT;
477 function_map::dist_psymbtonum(&(data_.options),
478 as<SLUD::int_t>(this->globalNumRows_),
479 &(data_.A), &(data_.scale_perm),
480 &(data_.pslu_freeable), &(data_.lu),
483 function_map::dist_psymbtonum(SLUD::DOFACT,
484 as<SLUD::int_t>(this->globalNumRows_),
485 &(data_.A), &(data_.scale_perm),
486 &(data_.pslu_freeable), &(data_.lu),
492 bool notran = (data_.options.Trans == SLUD::NOTRANS);
493 double anorm = function_map::plangs((notran ? (
char *)
"1" : (
char *)
"I"), &(data_.A), &(data_.grid));
497#ifdef HAVE_AMESOS2_TIMERS
498 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
500 function_map::gstrf(&(data_.options), this->globalNumRows_,
501 this->globalNumCols_, anorm, &(data_.lu),
502 &(data_.grid), &(data_.stat), &info);
506 TEUCHOS_TEST_FOR_EXCEPTION( info > 0,
508 "L and U factors have been computed but U("
509 << info <<
"," << info <<
") is exactly zero "
510 "(i.e. U is singular)");
517 data_.options.Fact = SLUD::FACTORED;
518 same_symbolic_ =
true;
527 const Teuchos::Ptr<
const MultiVecAdapter<Vector> > B)
const
533 const size_t local_len_rhs = superlu_rowmap_->getLocalNumElements();
534 const global_size_type nrhs = X->getGlobalNumVectors();
535 const global_ordinal_type first_global_row_b = superlu_rowmap_->getMinGlobalIndex();
538 bvals_.resize(nrhs * local_len_rhs);
539 xvals_.resize(nrhs * local_len_rhs);
545#ifdef HAVE_AMESOS2_TIMERS
546 Teuchos::TimeMonitor convTimer(this->timers_.vecConvTime_);
555#ifdef HAVE_AMESOS2_TIMERS
556 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
561 copy_helper::do_get(B,
564 Teuchos::ptrInArg(*superlu_rowmap_));
585 if( !same_solve_struct_ ){
586 if( data_.options.SolveInitialized == SLUD::YES ){
587 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
589 function_map::SolveInit(&(data_.options), &(data_.A), data_.perm_r.getRawPtr(),
590 data_.perm_c.getRawPtr(), as<SLUD::int_t>(nrhs), &(data_.lu),
591 &(data_.grid), &(data_.solve_struct));
595 same_solve_struct_ =
true;
599 if (data_.options.Equil == SLUD::YES && data_.rowequ) {
600 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
601 for(global_size_type j = 0; j < nrhs; ++j) {
602 for(
size_t i = 0; i < local_len_rhs; ++i) {
603 bvals_[i + j*ld] *= data_.R[first_global_row_b + i];
611#ifdef HAVE_AMESOS2_TIMERS
612 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
615#if (SUPERLU_DIST_MAJOR_VERSION > 7)
616 function_map::gstrs(&(data_.options), as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
617 &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
618 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
619 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
620 &(data_.solve_struct), &(data_.stat), &ierr);
622 function_map::gstrs(as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
623 &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
624 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
625 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
626 &(data_.solve_struct), &(data_.stat), &ierr);
630 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
632 "Argument " << -ierr <<
" to gstrs had an illegal value" );
647#ifdef HAVE_AMESOS2_TIMERS
648 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
650 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
651 function_map::permute_Dense_Matrix(as<SLUD::int_t>(first_global_row_b),
652 as<SLUD::int_t>(local_len_rhs),
653 data_.solve_struct.row_to_proc,
654 data_.solve_struct.inv_perm_c,
655 bvals_.getRawPtr(), ld,
656 xvals_.getRawPtr(), ld,
662 if (data_.options.Equil == SLUD::YES && data_.colequ) {
663 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
664 for(global_size_type j = 0; j < nrhs; ++j) {
665 for(
size_t i = 0; i < local_len_rhs; ++i) {
666 xvals_[i + j*ld] *= data_.C[first_global_row_b + i];
674#ifdef HAVE_AMESOS2_TIMERS
675 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
678 put_helper::do_put(X,
681 Teuchos::ptrInArg(*superlu_rowmap_));
703 using Teuchos::getIntegralValue;
704 using Teuchos::ParameterEntryValidator;
706 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
708 if( parameterList->isParameter(
"npcol") || parameterList->isParameter(
"nprow") ){
709 TEUCHOS_TEST_FOR_EXCEPTION( !(parameterList->isParameter(
"nprow") &&
710 parameterList->isParameter(
"npcol")),
711 std::invalid_argument,
712 "nprow and npcol must be set together" );
714 SLUD::int_t nprow = parameterList->template get<SLUD::int_t>(
"nprow");
715 SLUD::int_t npcol = parameterList->template get<SLUD::int_t>(
"npcol");
717 TEUCHOS_TEST_FOR_EXCEPTION( nprow * npcol > this->getComm()->getSize(),
718 std::invalid_argument,
719 "nprow and npcol combination invalid" );
721 if( (npcol != data_.grid.npcol) || (nprow != data_.grid.nprow) ){
723 SLUD::superlu_gridexit(&(data_.grid));
725 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
729 TEUCHOS_TEST_FOR_EXCEPTION( this->control_.useTranspose_,
730 std::invalid_argument,
731 "SuperLU_DIST does not support solving the tranpose system" );
733 data_.options.Trans = SLUD::NOTRANS;
736 bool equil = parameterList->get<
bool>(
"Equil",
false);
737 data_.options.Equil = equil ? SLUD::YES : SLUD::NO;
739 if( parameterList->isParameter(
"ColPerm") ){
740 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry(
"ColPerm").validator();
741 parameterList->getEntry(
"ColPerm").setValidator(colperm_validator);
743 data_.options.ColPerm = getIntegralValue<SLUD::colperm_t>(*parameterList,
"ColPerm");
750 data_.options.RowPerm = SLUD::NOROWPERM;
759 data_.options.IterRefine = SLUD::NOREFINE;
761 bool replace_tiny = parameterList->get<
bool>(
"ReplaceTinyPivot",
true);
762 data_.options.ReplaceTinyPivot = replace_tiny ? SLUD::YES : SLUD::NO;
764 if( parameterList->isParameter(
"IsContiguous") ){
765 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous");
775 using Teuchos::tuple;
776 using Teuchos::ParameterList;
777 using Teuchos::EnhancedNumberValidator;
778 using Teuchos::setStringToIntegralParameter;
779 using Teuchos::stringToIntegralParameterEntryValidator;
781 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
783 if( is_null(valid_params) ){
784 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
786 Teuchos::RCP<EnhancedNumberValidator<SLUD::int_t> > col_row_validator
787 = Teuchos::rcp(
new EnhancedNumberValidator<SLUD::int_t>() );
788 col_row_validator->setMin(1);
790 pl->set(
"npcol", data_.grid.npcol,
791 "Number of columns in the processor grid. "
792 "Must be set with nprow", col_row_validator);
793 pl->set(
"nprow", data_.grid.nprow,
794 "Number of rows in the SuperLU_DIST processor grid. "
795 "Must be set together with npcol", col_row_validator);
798 setStringToIntegralParameter<SLUD::trans_t>(
"Trans",
"NOTRANS",
799 "Solve for the transpose system or not",
800 tuple<string>(
"NOTRANS"),
801 tuple<string>(
"Do not solve with transpose"),
802 tuple<SLUD::trans_t>(SLUD::NOTRANS),
806 pl->set(
"Equil",
false,
"Whether to equilibrate the system before solve");
818 pl->set(
"ReplaceTinyPivot",
true,
819 "Specifies whether to replace tiny diagonals during LU factorization");
821 setStringToIntegralParameter<SLUD::colperm_t>(
"ColPerm",
"PARMETIS",
822 "Specifies how to permute the columns of the "
823 "matrix for sparsity preservation",
824 tuple<string>(
"NATURAL",
"PARMETIS"),
825 tuple<string>(
"Natural ordering",
826 "ParMETIS ordering on A^T + A"),
827 tuple<SLUD::colperm_t>(SLUD::NATURAL,
831 pl->set(
"IsContiguous",
true,
"Whether GIDs contiguous");
869 using Teuchos::Array;
870 using Teuchos::ArrayView;
871 using Teuchos::ptrInArg;
876#ifdef HAVE_AMESOS2_TIMERS
877 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
881 if( data_.A.Store != NULL ){
882 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
883 data_.A.Store = NULL;
886 Teuchos::RCP<const MatrixAdapter<Matrix> > redist_mat
887 = this->matrixA_->get(ptrInArg(*superlu_rowmap_));
889 int_t l_nnz, l_rows, g_rows, g_cols, fst_global_row;
890 l_nnz = as<int_t>(redist_mat->getLocalNNZ());
891 l_rows = as<int_t>(redist_mat->getLocalNumRows());
892 g_rows = as<int_t>(redist_mat->getGlobalNumRows());
894 fst_global_row = as<int_t>(superlu_rowmap_->getMinGlobalIndex());
896 Kokkos::resize(nzvals_view_, l_nnz);
897 Kokkos::resize(colind_view_, l_nnz);
898 Kokkos::resize(rowptr_view_, l_rows + 1);
901#ifdef HAVE_AMESOS2_TIMERS
902 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
906 host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get(
908 nzvals_view_, colind_view_, rowptr_view_,
910 ptrInArg(*superlu_rowmap_),
915 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != l_nnz,
917 "Did not get the expected number of non-zero vals");
920 SLUD::Dtype_t dtype = type_map::dtype;
923 function_map::create_CompRowLoc_Matrix(&(data_.A),
925 l_nnz, l_rows, fst_global_row,
930 dtype, SLUD::SLU_GE);