496 Build(
Level& currentLevel)
const {
499 typedef Teuchos::ScalarTraits<SC> STS;
500 typedef typename STS::magnitudeType MT;
501 const MT zero = Teuchos::ScalarTraits<MT>::zero();
503 auto A = Get< RCP<Matrix> >(currentLevel,
"A");
523 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0,
Exceptions::RuntimeError,
"A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
524 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
526 auto amalInfo = Get< RCP<AmalgamationInfo_kokkos> >(currentLevel,
"UnAmalgamationInfo");
528 const ParameterList& pL = GetParameterList();
530 std::string algo = pL.get<std::string>(
"aggregation: drop scheme");
532 double threshold = pL.get<
double>(
"aggregation: drop tol");
533 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\": threshold = " << threshold
534 <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
536 const typename STS::magnitudeType dirichletThreshold =
537 STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
539 GO numDropped = 0, numTotal = 0;
541 RCP<LWGraph_kokkos> graph;
544 typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type;
545 boundary_nodes_type boundaryNodes;
547 RCP<Matrix> filteredA;
548 if (blkSize == 1 && threshold == zero) {
555 graph = rcp(
new LWGraph_kokkos(A->getCrsGraph()->getLocalGraphDevice(), A->getRowMap(), A->getColMap(),
"graph of A"));
556 graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes);
558 numTotal = A->getLocalNumEntries();
563 }
else if (blkSize == 1 && threshold != zero) {
566 typedef typename Matrix::local_matrix_type local_matrix_type;
567 typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type;
568 typedef typename kokkos_graph_type::row_map_type::non_const_type rows_type;
569 typedef typename kokkos_graph_type::entries_type::non_const_type cols_type;
570 typedef typename local_matrix_type::values_type::non_const_type vals_type;
572 LO numRows = A->getLocalNumRows();
573 local_matrix_type kokkosMatrix = A->getLocalMatrixDevice();
574 auto nnzA = kokkosMatrix.nnz();
575 auto rowsA = kokkosMatrix.graph.row_map;
578 typedef Kokkos::ArithTraits<SC> ATS;
579 typedef typename ATS::val_type impl_Scalar;
580 typedef Kokkos::ArithTraits<impl_Scalar> impl_ATS;
582 bool reuseGraph = pL.get<
bool>(
"filtered matrix: reuse graph");
583 bool lumping = pL.get<
bool>(
"filtered matrix: use lumping");
585 GetOStream(
Runtime0) <<
"Lumping dropped entries" << std::endl;
587 const bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
590 rows_type
rows (
"FA_rows", numRows+1);
591 cols_type
colsAux(Kokkos::ViewAllocateWithoutInitializing(
"FA_aux_cols"), nnzA);
597 filteredA = MatrixFactory::Build(A->getCrsGraph());
600 RCP<ParameterList> fillCompleteParams(
new ParameterList);
601 fillCompleteParams->set(
"No Nonlocal Changes",
true);
602 filteredA->fillComplete(fillCompleteParams);
605 valsAux = filteredA->getLocalMatrixDevice().values;
609 valsAux = vals_type(Kokkos::ViewAllocateWithoutInitializing(
"FA_aux_vals"), nnzA);
612 typename boundary_nodes_type::non_const_type bndNodes(Kokkos::ViewAllocateWithoutInitializing(
"boundaryNodes"), numRows);
616 if (algo ==
"classical") {
618 RCP<Vector> ghostedDiag;
620 kokkosMatrix = local_matrix_type();
623 kokkosMatrix=A->getLocalMatrixDevice();
630 auto ghostedDiagView = ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadWrite);
634 scalarFunctor(kokkosMatrix, bndNodes, dropFunctor,
rows,
colsAux,
valsAux, reuseGraph, lumping, threshold, aggregationMayCreateDirichlet);
636 Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:scalar_filter:main_loop",
range_type(0,numRows),
637 scalarFunctor, nnzFA);
640 }
else if (algo ==
"distance laplacian") {
641 typedef Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> doubleMultiVector;
642 auto coords = Get<RCP<doubleMultiVector> >(currentLevel,
"Coordinates");
644 auto uniqueMap = A->getRowMap();
645 auto nonUniqueMap = A->getColMap();
648 RCP<const Import> importer;
651 importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
653 RCP<doubleMultiVector> ghostedCoords;
656 ghostedCoords = Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO>::Build(nonUniqueMap, coords->getNumVectors());
657 ghostedCoords->doImport(*coords, *importer, Xpetra::INSERT);
660 auto ghostedCoordsView = ghostedCoords->getDeviceLocalView(Xpetra::Access::ReadWrite);
664 RCP<Vector> localLaplDiag;
668 localLaplDiag = VectorFactory::Build(uniqueMap);
670 auto localLaplDiagView = localLaplDiag->getDeviceLocalView(Xpetra::Access::OverwriteAll);
671 auto kokkosGraph = kokkosMatrix.graph;
673 Kokkos::parallel_for(
"MueLu:CoalesceDropF:Build:scalar_filter:laplacian_diag",
range_type(0,numRows),
674 KOKKOS_LAMBDA(
const LO row) {
675 auto rowView = kokkosGraph.rowConst(row);
676 auto length = rowView.length;
678 impl_Scalar d = impl_ATS::zero();
679 for (
decltype(length) colID = 0; colID < length; colID++) {
680 auto col = rowView(colID);
682 d += impl_ATS::one()/distFunctor.distance2(row, col);
684 localLaplDiagView(row,0) = d;
689 RCP<Vector> ghostedLaplDiag;
691 SubFactoryMonitor m2(*
this,
"Ghosted Laplacian diag construction", currentLevel);
692 ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
693 ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT);
700 auto ghostedLaplDiagView = ghostedLaplDiag->getDeviceLocalView(Xpetra::Access::ReadWrite);
703 dropFunctor(ghostedLaplDiagView, distFunctor, threshold);
705 scalarFunctor(kokkosMatrix, bndNodes, dropFunctor,
rows,
colsAux,
valsAux, reuseGraph, lumping, threshold,
true);
707 Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:scalar_filter:main_loop",
range_type(0,numRows),
708 scalarFunctor, nnzFA);
713 numDropped = nnzA - nnzFA;
715 boundaryNodes = bndNodes;
721 Kokkos::parallel_scan(
"MueLu:CoalesceDropF:Build:scalar_filter:compress_rows",
range_type(0,numRows+1),
722 KOKKOS_LAMBDA(
const LO i, LO& update,
const bool& final_pass) {
734 cols_type cols(Kokkos::ViewAllocateWithoutInitializing(
"FA_cols"), nnzFA);
737 GetOStream(
Runtime1) <<
"reuse matrix graph for filtering (compress matrix columns only)" << std::endl;
741 Kokkos::parallel_for(
"MueLu:TentativePF:Build:compress_cols",
range_type(0,numRows),
742 KOKKOS_LAMBDA(
const LO i) {
744 LO rowStart =
rows(i);
745 LO rowAStart = rowsA(i);
746 size_t rownnz =
rows(i+1) -
rows(i);
747 for (
size_t j = 0; j < rownnz; j++)
748 cols(rowStart+j) =
colsAux(rowAStart+j);
752 GetOStream(
Runtime1) <<
"new matrix graph for filtering (compress matrix columns and values)" << std::endl;
755 vals = vals_type(Kokkos::ViewAllocateWithoutInitializing(
"FA_vals"), nnzFA);
757 Kokkos::parallel_for(
"MueLu:TentativePF:Build:compress_cols",
range_type(0,numRows),
758 KOKKOS_LAMBDA(
const LO i) {
759 LO rowStart =
rows(i);
760 LO rowAStart = rowsA(i);
761 size_t rownnz =
rows(i+1) -
rows(i);
762 for (
size_t j = 0; j < rownnz; j++) {
763 cols(rowStart+j) =
colsAux(rowAStart+j);
764 vals(rowStart+j) =
valsAux(rowAStart+j);
769 kokkos_graph_type kokkosGraph(cols,
rows);
774 graph = rcp(
new LWGraph_kokkos(kokkosGraph, A->getRowMap(), A->getColMap(),
"filtered graph of A"));
775 graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes);
778 numTotal = A->getLocalNumEntries();
785 local_matrix_type localFA = local_matrix_type(
"A", numRows, A->getLocalMatrixDevice().numCols(), nnzFA, vals,
rows, cols);
786 auto filteredACrs = CrsMatrixFactory::Build(localFA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap(),
787 A->getCrsGraph()->getImporter(), A->getCrsGraph()->getExporter());
788 filteredA = rcp(
new CrsMatrixWrap(filteredACrs));
791 filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
793 if (pL.get<
bool>(
"filtered matrix: reuse eigenvalue")) {
798 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
800 filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
803 }
else if (blkSize > 1 && threshold == zero) {
810 TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkSize != 0,
MueLu::Exceptions::RuntimeError,
"MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() <<
" but should be a multiply of " << blkSize);
812 const RCP<const Map> rowMap = A->getRowMap();
813 const RCP<const Map> colMap = A->getColMap();
820 const RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
821 const RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
822 Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation());
823 Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
825 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
826 rowTranslationView(rowTranslationArray.getRawPtr(),rowTranslationArray.size() );
827 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
828 colTranslationView(colTranslationArray.getRawPtr(),colTranslationArray.size() );
831 LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
832 typedef typename Kokkos::View<LocalOrdinal*, DeviceType> id_translation_type;
833 id_translation_type rowTranslation(
"dofId2nodeId",rowTranslationArray.size());
834 id_translation_type colTranslation(
"ov_dofId2nodeId",colTranslationArray.size());
835 Kokkos::deep_copy(rowTranslation, rowTranslationView);
836 Kokkos::deep_copy(colTranslation, colTranslationView);
839 blkSize = A->GetFixedBlockSize();
842 if(A->IsView(
"stridedMaps") ==
true) {
843 const RCP<const Map> myMap = A->getRowMap(
"stridedMaps");
844 const RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
846 blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
847 blkId = strMap->getStridedBlockId();
849 blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
851 auto kokkosMatrix = A->getLocalMatrixDevice();
854 typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type;
855 typedef typename kokkos_graph_type::row_map_type row_map_type;
857 typedef typename kokkos_graph_type::entries_type entries_type;
860 typename row_map_type::non_const_type dofNnz(
"nnz_map", numNodes + 1);
863 Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1a",
range_type(0,numNodes), stage1aFunctor, numDofCols);
866 Kokkos::parallel_scan(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan",
range_type(0,numNodes+1), scanFunctor);
871 typename entries_type::non_const_type dofcols(
"dofcols", numDofCols);
875 typename row_map_type::non_const_type
rows(
"nnz_nodemap", numNodes + 1);
876 typename boundary_nodes_type::non_const_type bndNodes(
"boundaryNodes", numNodes);
878 CoalesceDrop_Kokkos_Details::Stage1bcVectorFunctor <
decltype(kokkosMatrix),
decltype(dofNnz),
decltype(blkPartSize),
decltype(dofcols),
decltype(colTranslation),
decltype(singleEntryRows),
decltype(bndNodes),
bool> stage1bcFunctor(kokkosMatrix, dofNnz, blkPartSize, dofcols, colTranslation,
rows, singleEntryRows, bndNodes, pL.get<
bool>(
"aggregation: greedy Dirichlet"));
879 Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1c",
range_type(0,numNodes), stage1bcFunctor,numNodeCols);
883 Kokkos::parallel_scan(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan",
range_type(0,numNodes+1), scanNodeFunctor);
886 typename entries_type::non_const_type cols(
"nodecols", numNodeCols);
890 Kokkos::parallel_for(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1c",
range_type(0,numNodes), stage1dFunctor);
891 kokkos_graph_type kokkosGraph(cols,
rows);
894 graph = rcp(
new LWGraph_kokkos(kokkosGraph, uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
896 boundaryNodes = bndNodes;
897 graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes);
898 numTotal = A->getLocalNumEntries();
900 dofsPerNode = blkSize;
905 TEUCHOS_TEST_FOR_EXCEPTION(
true,
Exceptions::RuntimeError,
"MueLu: CoalesceDropFactory_kokkos: Block filtering is not implemented");
909 GO numLocalBoundaryNodes = 0;
910 GO numGlobalBoundaryNodes = 0;
912 Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:bnd",
range_type(0, boundaryNodes.extent(0)),
913 KOKKOS_LAMBDA(
const LO i, GO& n) {
914 if (boundaryNodes(i))
916 }, numLocalBoundaryNodes);
918 auto comm = A->getRowMap()->getComm();
919 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
920 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
923 if ((GetVerbLevel() &
Statistics1) && threshold != zero) {
924 auto comm = A->getRowMap()->getComm();
926 GO numGlobalTotal, numGlobalDropped;
930 if (numGlobalTotal != 0) {
931 GetOStream(
Statistics1) <<
"Number of dropped entries: "
932 << numGlobalDropped <<
"/" << numGlobalTotal
933 <<
" (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) <<
"%)" << std::endl;
937 Set(currentLevel,
"DofsPerNode", dofsPerNode);
938 Set(currentLevel,
"Graph", graph);
939 Set(currentLevel,
"A", filteredA);