460 typedef impl_scalar_type IST;
461 const char prefix[] =
"Ifpack2::Experimental::RBILUK::compute: ";
466 TEUCHOS_TEST_FOR_EXCEPTION
467 (A_block_.is_null (), std::runtime_error, prefix <<
"The matrix (A_block_, "
468 "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
469 "input before calling this method.");
470 TEUCHOS_TEST_FOR_EXCEPTION
471 (! A_block_->isFillComplete (), std::runtime_error, prefix <<
"The matrix "
472 "(A_block_, the BlockCrsMatrix) is not fill complete. You may not invoke "
473 "initialize() or compute() with this matrix until the matrix is fill "
474 "complete. Note: BlockCrsMatrix is fill complete if and only if its "
475 "underlying graph is fill complete.");
477 if (! this->isInitialized ()) {
484 if (! A_block_.is_null ()) {
485 Teuchos::RCP<block_crs_matrix_type> A_nc =
486 Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
499 Teuchos::Time timer (
"RBILUK::compute");
500 double startTime = timer.wallTime();
502 Teuchos::TimeMonitor timeMon (timer);
503 this->isComputed_ =
false;
510 initAllValues (*A_block_);
513 LO NumL, NumU, NumURead;
516 const size_t MaxNumEntries =
517 L_block_->getLocalMaxNumRowEntries () + U_block_->getLocalMaxNumRowEntries () + 1;
519 const LO blockMatSize = blockSize_*blockSize_;
524 const LO rowStride = blockSize_;
526 Teuchos::Array<int> ipiv_teuchos(blockSize_);
527 Kokkos::View<
int*, Kokkos::HostSpace,
528 Kokkos::MemoryUnmanaged> ipiv (ipiv_teuchos.getRawPtr (), blockSize_);
529 Teuchos::Array<IST> work_teuchos(blockSize_);
530 Kokkos::View<IST*, Kokkos::HostSpace,
531 Kokkos::MemoryUnmanaged> work (work_teuchos.getRawPtr (), blockSize_);
533 size_t num_cols = U_block_->getColMap()->getLocalNumElements();
534 Teuchos::Array<int> colflag(num_cols);
536 typename GetManagedView<little_block_host_type>::managed_non_const_type diagModBlock (
"diagModBlock", blockSize_, blockSize_);
537 typename GetManagedView<little_block_host_type>::managed_non_const_type matTmp (
"matTmp", blockSize_, blockSize_);
538 typename GetManagedView<little_block_host_type>::managed_non_const_type multiplier (
"multiplier", blockSize_, blockSize_);
546 for (
size_t j = 0; j < num_cols; ++j) {
549 Teuchos::Array<LO> InI(MaxNumEntries, 0);
550 Teuchos::Array<scalar_type> InV(MaxNumEntries*blockMatSize,STM::zero());
552 const LO numLocalRows = L_block_->getLocalNumRows ();
553 for (LO local_row = 0; local_row < numLocalRows; ++local_row) {
557 NumIn = MaxNumEntries;
558 local_inds_host_view_type colValsL;
559 values_host_view_type valsL;
561 L_block_->getLocalRowView(local_row, colValsL, valsL);
562 NumL = (LO) colValsL.size();
563 for (LO j = 0; j < NumL; ++j)
565 const LO matOffset = blockMatSize*j;
566 little_block_host_type lmat((
typename little_block_host_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
567 little_block_host_type lmatV((
typename little_block_host_type::value_type*) &InV[matOffset],blockSize_,rowStride);
569 Tpetra::COPY (lmat, lmatV);
570 InI[j] = colValsL[j];
573 little_block_host_type dmat = D_block_->getLocalBlockHostNonConst(local_row, local_row);
574 little_block_host_type dmatV((
typename little_block_host_type::value_type*) &InV[NumL*blockMatSize], blockSize_, rowStride);
576 Tpetra::COPY (dmat, dmatV);
577 InI[NumL] = local_row;
579 local_inds_host_view_type colValsU;
580 values_host_view_type valsU;
581 U_block_->getLocalRowView(local_row, colValsU, valsU);
582 NumURead = (LO) colValsU.size();
584 for (LO j = 0; j < NumURead; ++j)
586 if (!(colValsU[j] < numLocalRows))
continue;
587 InI[NumL+1+j] = colValsU[j];
588 const LO matOffset = blockMatSize*(NumL+1+j);
589 little_block_host_type umat((
typename little_block_host_type::value_type*) &valsU[blockMatSize*j], blockSize_, rowStride);
590 little_block_host_type umatV((
typename little_block_host_type::value_type*) &InV[matOffset], blockSize_, rowStride);
592 Tpetra::COPY (umat, umatV);
598 for (
size_t j = 0; j < NumIn; ++j) {
602#ifndef IFPACK2_RBILUK_INITIAL
603 for (LO i = 0; i < blockSize_; ++i)
604 for (LO j = 0; j < blockSize_; ++j){
606 diagModBlock(i,j) = 0;
611 Kokkos::deep_copy (diagModBlock, diagmod);
614 for (LO jj = 0; jj < NumL; ++jj) {
616 little_block_host_type currentVal((
typename little_block_host_type::value_type*) &InV[jj*blockMatSize], blockSize_, rowStride);
618 Tpetra::COPY (currentVal, multiplier);
620 const little_block_host_type dmatInverse = D_block_->getLocalBlockHostNonConst(j,j);
622#ifndef IFPACK2_RBILUK_INITIAL_NOKK
623 KokkosBatched::Experimental::SerialGemm
624 <KokkosBatched::Experimental::Trans::NoTranspose,
625 KokkosBatched::Experimental::Trans::NoTranspose,
626 KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
627 (STS::one (), currentVal, dmatInverse, STS::zero (), matTmp);
629 Tpetra::GEMM (
"N",
"N", STS::one (), currentVal, dmatInverse,
630 STS::zero (), matTmp);
634 Tpetra::COPY (matTmp, currentVal);
635 local_inds_host_view_type UUI;
636 values_host_view_type UUV;
638 U_block_->getLocalRowView(j, UUI, UUV);
639 NumUU = (LO) UUI.size();
641 if (this->RelaxValue_ == STM::zero ()) {
642 for (LO k = 0; k < NumUU; ++k) {
643 if (!(UUI[k] < numLocalRows))
continue;
644 const int kk = colflag[UUI[k]];
646 little_block_host_type kkval((
typename little_block_host_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
647 little_block_host_type uumat((
typename little_block_host_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
648#ifndef IFPACK2_RBILUK_INITIAL_NOKK
649 KokkosBatched::Experimental::SerialGemm
650 <KokkosBatched::Experimental::Trans::NoTranspose,
651 KokkosBatched::Experimental::Trans::NoTranspose,
652 KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
653 (
magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
655 Tpetra::GEMM (
"N",
"N",
magnitude_type(-STM::one ()), multiplier, uumat,
663 for (LO k = 0; k < NumUU; ++k) {
664 if (!(UUI[k] < numLocalRows))
continue;
665 const int kk = colflag[UUI[k]];
666 little_block_host_type uumat((
typename little_block_host_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
668 little_block_host_type kkval((
typename little_block_host_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
669#ifndef IFPACK2_RBILUK_INITIAL_NOKK
670 KokkosBatched::Experimental::SerialGemm
671 <KokkosBatched::Experimental::Trans::NoTranspose,
672 KokkosBatched::Experimental::Trans::NoTranspose,
673 KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
674 (
magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
676 Tpetra::GEMM (
"N",
"N",
magnitude_type(-STM::one ()), multiplier, uumat,
682#ifndef IFPACK2_RBILUK_INITIAL_NOKK
683 KokkosBatched::Experimental::SerialGemm
684 <KokkosBatched::Experimental::Trans::NoTranspose,
685 KokkosBatched::Experimental::Trans::NoTranspose,
686 KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
687 (
magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), diagModBlock);
689 Tpetra::GEMM (
"N",
"N",
magnitude_type(-STM::one ()), multiplier, uumat,
690 STM::one (), diagModBlock);
699 L_block_->replaceLocalValues (local_row, InI.getRawPtr (), InV.getRawPtr (), NumL);
703 Tpetra::COPY (dmatV, dmat);
705 if (this->RelaxValue_ != STM::zero ()) {
707 Tpetra::AXPY (this->RelaxValue_, diagModBlock, dmat);
721 for (
int k = 0; k < blockSize_; ++k) {
725 Tpetra::GETF2 (dmat, ipiv, lapackInfo);
727 TEUCHOS_TEST_FOR_EXCEPTION(
728 lapackInfo != 0, std::runtime_error,
"Ifpack2::Experimental::RBILUK::compute: "
729 "lapackInfo = " << lapackInfo <<
" which indicates an error in the factorization GETRF.");
731 Tpetra::GETRI (dmat, ipiv, work, lapackInfo);
733 TEUCHOS_TEST_FOR_EXCEPTION(
734 lapackInfo != 0, std::runtime_error,
"Ifpack2::Experimental::RBILUK::compute: "
735 "lapackInfo = " << lapackInfo <<
" which indicates an error in the matrix inverse GETRI.");
738 for (LO j = 0; j < NumU; ++j) {
739 little_block_host_type currentVal((
typename little_block_host_type::value_type*) &InV[(NumL+1+j)*blockMatSize], blockSize_, rowStride);
741#ifndef IFPACK2_RBILUK_INITIAL_NOKK
742 KokkosBatched::Experimental::SerialGemm
743 <KokkosBatched::Experimental::Trans::NoTranspose,
744 KokkosBatched::Experimental::Trans::NoTranspose,
745 KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
746 (STS::one (), dmat, currentVal, STS::zero (), matTmp);
748 Tpetra::GEMM (
"N",
"N", STS::one (), dmat, currentVal,
749 STS::zero (), matTmp);
753 Tpetra::COPY (matTmp, currentVal);
758 U_block_->replaceLocalValues (local_row, &InI[NumL+1], &InV[blockMatSize*(NumL+1)], NumU);
761#ifndef IFPACK2_RBILUK_INITIAL
763 for (
size_t j = 0; j < NumIn; ++j) {
764 colflag[InI[j]] = -1;
768 for (
size_t j = 0; j < num_cols; ++j) {
791 this->isComputed_ =
true;
792 this->numCompute_ += 1;
793 this->computeTime_ += (timer.wallTime() - startTime);
800apply (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
801 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
802 Teuchos::ETransp mode,
810 TEUCHOS_TEST_FOR_EXCEPTION(
811 A_block_.is_null (), std::runtime_error,
"Ifpack2::Experimental::RBILUK::apply: The matrix is "
812 "null. Please call setMatrix() with a nonnull input, then initialize() "
813 "and compute(), before calling this method.");
814 TEUCHOS_TEST_FOR_EXCEPTION(
815 ! this->isComputed (), std::runtime_error,
816 "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
817 "you must call compute() before calling this method.");
818 TEUCHOS_TEST_FOR_EXCEPTION(
819 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
820 "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
821 "X.getNumVectors() = " << X.getNumVectors ()
822 <<
" != Y.getNumVectors() = " << Y.getNumVectors () <<
".");
823 TEUCHOS_TEST_FOR_EXCEPTION(
824 STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
825 "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
826 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
827 "fixed. There is a FIXME in this file about this very issue.");
829 const LO blockMatSize = blockSize_*blockSize_;
831 const LO rowStride = blockSize_;
833 BMV yBlock (Y, * (A_block_->getGraph ()->getDomainMap ()), blockSize_);
834 const BMV xBlock (X, * (A_block_->getColMap ()), blockSize_);
836 Teuchos::Array<scalar_type> lclarray(blockSize_);
837 little_host_vec_type lclvec((
typename little_host_vec_type::value_type*)&lclarray[0], blockSize_);
841 Teuchos::Time timer (
"RBILUK::apply");
842 double startTime = timer.wallTime();
844 Teuchos::TimeMonitor timeMon (timer);
845 if (alpha == one && beta == zero) {
846 if (mode == Teuchos::NO_TRANS) {
853 const LO numVectors = xBlock.getNumVectors();
854 BMV cBlock (* (A_block_->getGraph ()->getDomainMap ()), blockSize_, numVectors);
855 BMV rBlock (* (A_block_->getGraph ()->getDomainMap ()), blockSize_, numVectors);
856 for (LO imv = 0; imv < numVectors; ++imv)
858 for (
size_t i = 0; i < D_block_->getLocalNumRows(); ++i)
861 const_host_little_vec_type xval =
862 xBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
863 little_host_vec_type cval =
864 cBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
866 Tpetra::COPY (xval, cval);
868 local_inds_host_view_type colValsL;
869 values_host_view_type valsL;
870 L_block_->getLocalRowView(local_row, colValsL, valsL);
871 LO NumL = (LO) colValsL.size();
873 for (LO j = 0; j < NumL; ++j)
875 LO col = colValsL[j];
876 const_host_little_vec_type prevVal =
877 cBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
879 const LO matOffset = blockMatSize*j;
880 little_block_host_type lij((
typename little_block_host_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
883 Tpetra::GEMV (-one, lij, prevVal, cval);
889 D_block_->applyBlock(cBlock, rBlock);
892 for (LO imv = 0; imv < numVectors; ++imv)
894 const LO numRows = D_block_->getLocalNumRows();
895 for (LO i = 0; i < numRows; ++i)
897 LO local_row = (numRows-1)-i;
898 const_host_little_vec_type rval =
899 rBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
900 little_host_vec_type yval =
901 yBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
903 Tpetra::COPY (rval, yval);
905 local_inds_host_view_type colValsU;
906 values_host_view_type valsU;
907 U_block_->getLocalRowView(local_row, colValsU, valsU);
908 LO NumU = (LO) colValsU.size();
910 for (LO j = 0; j < NumU; ++j)
912 LO col = colValsU[NumU-1-j];
913 const_host_little_vec_type prevVal =
914 yBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
916 const LO matOffset = blockMatSize*(NumU-1-j);
917 little_block_host_type uij((
typename little_block_host_type::value_type*) &valsU[matOffset], blockSize_, rowStride);
920 Tpetra::GEMV (-one, uij, prevVal, yval);
926 TEUCHOS_TEST_FOR_EXCEPTION(
927 true, std::runtime_error,
928 "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm. ");
939 MV Y_tmp (Y.getMap (), Y.getNumVectors ());
940 apply (X, Y_tmp, mode);
941 Y.update (alpha, Y_tmp, beta);
946 this->numApply_ += 1;
947 this->applyTime_ += (timer.wallTime() - startTime);