MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_CoalesceDropFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
47#define MUELU_COALESCEDROPFACTORY_DEF_HPP
48
49#include <Xpetra_CrsGraphFactory.hpp>
50#include <Xpetra_CrsGraph.hpp>
51#include <Xpetra_ImportFactory.hpp>
52#include <Xpetra_ExportFactory.hpp>
53#include <Xpetra_MapFactory.hpp>
54#include <Xpetra_Map.hpp>
55#include <Xpetra_Matrix.hpp>
56#include <Xpetra_MultiVectorFactory.hpp>
57#include <Xpetra_MultiVector.hpp>
58#include <Xpetra_StridedMap.hpp>
59#include <Xpetra_VectorFactory.hpp>
60#include <Xpetra_Vector.hpp>
61
62#include <Xpetra_IO.hpp>
63
65
66#include "MueLu_AmalgamationFactory.hpp"
67#include "MueLu_AmalgamationInfo.hpp"
68#include "MueLu_Exceptions.hpp"
69#include "MueLu_GraphBase.hpp"
70#include "MueLu_Graph.hpp"
71#include "MueLu_Level.hpp"
72#include "MueLu_LWGraph.hpp"
73#include "MueLu_MasterList.hpp"
74#include "MueLu_Monitor.hpp"
75#include "MueLu_PreDropFunctionBaseClass.hpp"
76#include "MueLu_PreDropFunctionConstVal.hpp"
77#include "MueLu_Utilities.hpp"
78
79#ifdef HAVE_XPETRA_TPETRA
80#include "Tpetra_CrsGraphTransposer.hpp"
81#endif
82
83#include <algorithm>
84#include <cstdlib>
85#include <string>
86
87// If defined, read environment variables.
88// Should be removed once we are confident that this works.
89//#define DJS_READ_ENV_VARIABLES
90
91
92namespace MueLu {
93
94 namespace Details {
95 template<class real_type, class LO>
96 struct DropTol {
97
98 DropTol() = default;
99 DropTol(DropTol const&) = default;
100 DropTol(DropTol &&) = default;
101
102 DropTol& operator=(DropTol const&) = default;
103 DropTol& operator=(DropTol &&) = default;
104
105 DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
106 : val{val_}, diag{diag_}, col{col_}, drop{drop_}
107 {}
108
109 real_type val {Teuchos::ScalarTraits<real_type>::zero()};
110 real_type diag {Teuchos::ScalarTraits<real_type>::zero()};
111 LO col {Teuchos::OrdinalTraits<LO>::invalid()};
112 bool drop {true};
113
114 // CMS: Auxillary information for debugging info
115 // real_type aux_val {Teuchos::ScalarTraits<real_type>::nan()};
116 };
117 }
118
119
120 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
122 RCP<ParameterList> validParamList = rcp(new ParameterList());
123
124#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
125 SET_VALID_ENTRY("aggregation: drop tol");
126 SET_VALID_ENTRY("aggregation: Dirichlet threshold");
127 SET_VALID_ENTRY("aggregation: greedy Dirichlet");
128 SET_VALID_ENTRY("aggregation: row sum drop tol");
129 SET_VALID_ENTRY("aggregation: drop scheme");
130 SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
131 SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
132 SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
133
134 {
135 typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
136 // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values)
137 validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new validatorType(Teuchos::tuple<std::string>("signed classical sa","classical", "distance laplacian","signed classical","block diagonal","block diagonal classical","block diagonal distance laplacian","block diagonal signed classical","block diagonal colored signed classical"), "aggregation: drop scheme")));
138
139 }
140 SET_VALID_ENTRY("aggregation: distance laplacian algo");
141 SET_VALID_ENTRY("aggregation: classical algo");
142 SET_VALID_ENTRY("aggregation: coloring: localize color graph");
143#undef SET_VALID_ENTRY
144 validParamList->set< bool > ("lightweight wrap", true, "Experimental option for lightweight graph access");
145
146 validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
147 validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
148 validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for Coordinates");
149 validParamList->set< RCP<const FactoryBase> >("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber");
150
151 return validParamList;
152 }
153
154 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
156
157 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
159 Input(currentLevel, "A");
160 Input(currentLevel, "UnAmalgamationInfo");
161
162 const ParameterList& pL = GetParameterList();
163 if (pL.get<bool>("lightweight wrap") == true) {
164 std::string algo = pL.get<std::string>("aggregation: drop scheme");
165 if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
166 Input(currentLevel, "Coordinates");
167 }
168 if(algo == "signed classical sa")
169 ;
170 else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) {
171 Input(currentLevel, "BlockNumber");
172 }
173 }
174
175 }
176
177 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
179
180 FactoryMonitor m(*this, "Build", currentLevel);
181
182 typedef Teuchos::ScalarTraits<SC> STS;
183 typedef typename STS::magnitudeType real_type;
184 typedef Xpetra::MultiVector<real_type,LO,GO,NO> RealValuedMultiVector;
185 typedef Xpetra::MultiVectorFactory<real_type,LO,GO,NO> RealValuedMultiVectorFactory;
186
187 if (predrop_ != Teuchos::null)
188 GetOStream(Parameters0) << predrop_->description();
189
190 RCP<Matrix> realA = Get< RCP<Matrix> >(currentLevel, "A");
191 RCP<AmalgamationInfo> amalInfo = Get< RCP<AmalgamationInfo> >(currentLevel, "UnAmalgamationInfo");
192 const ParameterList & pL = GetParameterList();
193 bool doExperimentalWrap = pL.get<bool>("lightweight wrap");
194
195 GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl;
196 std::string algo = pL.get<std::string>("aggregation: drop scheme");
197 const bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
198
199 RCP<RealValuedMultiVector> Coords;
200 RCP<Matrix> A;
201
202 bool use_block_algorithm=false;
203 LO interleaved_blocksize = as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
204 bool useSignedClassicalRS = false;
205 bool useSignedClassicalSA = false;
206 bool generateColoringGraph = false;
207
208 // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it
209 // in the block diagonalizaiton). So we'll clobber the rowSumTol with -1.0 in this case
210 typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
211 RCP<LocalOrdinalVector> ghostedBlockNumber;
212 ArrayRCP<const LO> g_block_id;
213
214 if(algo == "distance laplacian" ) {
215 // Grab the coordinates for distance laplacian
216 Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
217 A = realA;
218 }
219 else if(algo == "signed classical sa") {
220 useSignedClassicalSA = true;
221 algo = "classical";
222 A = realA;
223 }
224 else if(algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") {
225 useSignedClassicalRS = true;
226 // if(realA->GetFixedBlockSize() > 1) {
227 RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector> >(currentLevel, "BlockNumber");
228 // Ghost the column block numbers if we need to
229 RCP<const Import> importer = realA->getCrsGraph()->getImporter();
230 if(!importer.is_null()) {
231 SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
232 ghostedBlockNumber= Xpetra::VectorFactory<LO,LO,GO,NO>::Build(importer->getTargetMap());
233 ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
234 }
235 else {
236 ghostedBlockNumber = BlockNumber;
237 }
238 g_block_id = ghostedBlockNumber->getData(0);
239 // }
240 if(algo == "block diagonal colored signed classical")
241 generateColoringGraph=true;
242 algo = "classical";
243 A = realA;
244
245 }
246 else if(algo == "block diagonal") {
247 // Handle the "block diagonal" filtering and then leave
248 BlockDiagonalize(currentLevel,realA,false);
249 return;
250 }
251 else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") {
252 // Handle the "block diagonal" filtering, and then continue onward
253 use_block_algorithm = true;
254 RCP<Matrix> filteredMatrix = BlockDiagonalize(currentLevel,realA,true);
255 if(algo == "block diagonal distance laplacian") {
256 // We now need to expand the coordinates by the interleaved blocksize
257 RCP<RealValuedMultiVector> OldCoords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
258 if (OldCoords->getLocalLength() != realA->getLocalNumRows()) {
259 LO dim = (LO) OldCoords->getNumVectors();
260 Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(),dim);
261 for(LO k=0; k<dim; k++){
262 ArrayRCP<const real_type> old_vec = OldCoords->getData(k);
263 ArrayRCP<real_type> new_vec = Coords->getDataNonConst(k);
264 for(LO i=0; i <(LO)OldCoords->getLocalLength(); i++) {
265 LO new_base = i*dim;
266 for(LO j=0; j<interleaved_blocksize; j++)
267 new_vec[new_base + j] = old_vec[i];
268 }
269 }
270 }
271 else {
272 Coords = OldCoords;
273 }
274 algo = "distance laplacian";
275 }
276 else if(algo == "block diagonal classical") {
277 algo = "classical";
278 }
279 // All cases
280 A = filteredMatrix;
281 rowSumTol = -1.0;
282 }
283 else {
284 A = realA;
285 }
286
287 // Distance Laplacian weights
288 Array<double> dlap_weights = pL.get<Array<double> >("aggregation: distance laplacian directional weights");
289 enum {NO_WEIGHTS=0, SINGLE_WEIGHTS, BLOCK_WEIGHTS};
290 int use_dlap_weights = NO_WEIGHTS;
291 if(algo == "distance laplacian") {
292 LO dim = (LO) Coords->getNumVectors();
293 // If anything isn't 1.0 we need to turn on the weighting
294 bool non_unity = false;
295 for (LO i=0; !non_unity && i<(LO)dlap_weights.size(); i++) {
296 if(dlap_weights[i] != 1.0) {
297 non_unity = true;
298 }
299 }
300 if(non_unity) {
301 LO blocksize = use_block_algorithm ? as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize")) : 1;
302 if((LO)dlap_weights.size() == dim)
303 use_dlap_weights = SINGLE_WEIGHTS;
304 else if((LO)dlap_weights.size() == blocksize * dim)
305 use_dlap_weights = BLOCK_WEIGHTS;
306 else {
307 TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError,
308 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
309 }
310 if (GetVerbLevel() & Statistics1)
311 GetOStream(Statistics1) << "Using distance laplacian weights: "<<dlap_weights<<std::endl;
312 }
313 }
314
315 // decide wether to use the fast-track code path for standard maps or the somewhat slower
316 // code path for non-standard maps
317 /*bool bNonStandardMaps = false;
318 if (A->IsView("stridedMaps") == true) {
319 Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
320 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
321 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
322 if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0)
323 bNonStandardMaps = true;
324 }*/
325
326 if (doExperimentalWrap) {
327 TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm");
328 TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)");
329
330 SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
331 std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
332 std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
333 real_type realThreshold = STS::magnitude(threshold);// CMS: Rename this to "magnitude threshold" sometime
334
336 // Remove this bit once we are confident that cut-based dropping works.
337#ifdef HAVE_MUELU_DEBUG
338 int distanceLaplacianCutVerbose = 0;
339#endif
340#ifdef DJS_READ_ENV_VARIABLES
341 if (getenv("MUELU_DROP_TOLERANCE_MODE")) {
342 distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE"));
343 }
344
345 if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) {
346 auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD"));
347 realThreshold = 1e-4*tmp;
348 }
349
350# ifdef HAVE_MUELU_DEBUG
351 if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) {
352 distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE"));
353 }
354# endif
355#endif
357
358 enum decisionAlgoType {defaultAlgo, unscaled_cut, scaled_cut, scaled_cut_symmetric};
359
360 decisionAlgoType distanceLaplacianAlgo = defaultAlgo;
361 decisionAlgoType classicalAlgo = defaultAlgo;
362 if (algo == "distance laplacian") {
363 if (distanceLaplacianAlgoStr == "default")
364 distanceLaplacianAlgo = defaultAlgo;
365 else if (distanceLaplacianAlgoStr == "unscaled cut")
366 distanceLaplacianAlgo = unscaled_cut;
367 else if (distanceLaplacianAlgoStr == "scaled cut")
368 distanceLaplacianAlgo = scaled_cut;
369 else if (distanceLaplacianAlgoStr == "scaled cut symmetric")
370 distanceLaplacianAlgo = scaled_cut_symmetric;
371 else
372 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\"");
373 GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize()<< std::endl;
374 } else if (algo == "classical") {
375 if (classicalAlgoStr == "default")
376 classicalAlgo = defaultAlgo;
377 else if (classicalAlgoStr == "unscaled cut")
378 classicalAlgo = unscaled_cut;
379 else if (classicalAlgoStr == "scaled cut")
380 classicalAlgo = scaled_cut;
381 else
382 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\"");
383 GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
384
385 } else
386 GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
387 Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
388
389 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
390
391
392 // NOTE: We don't support signed classical RS or SA with cut drop at present
393 TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalRS && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation");
394 TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalSA && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical sa aggregation");
395
396 GO numDropped = 0, numTotal = 0;
397 std::string graphType = "unamalgamated"; //for description purposes only
398
399
400 /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
401 BlockSize is the number of storage blocks that must kept together during the amalgamation process.
402
403 Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
404
405 numPDEs = BlockSize * storageblocksize.
406
407 If numPDEs==1
408 Matrix is point storage (classical CRS storage). storageblocksize=1 and BlockSize=1
409 No other values makes sense.
410
411 If numPDEs>1
412 If matrix uses point storage, then storageblocksize=1 and BlockSize=numPDEs.
413 If matrix uses block storage, with block size of n, then storageblocksize=n, and BlockSize=numPDEs/n.
414 Thus far, only storageblocksize=numPDEs and BlockSize=1 has been tested.
415 */
416 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0,Exceptions::RuntimeError,"A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
417 const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
418
419
420 /************************** RS or SA-style Classical Dropping (and variants) **************************/
421 if (algo == "classical") {
422 if (predrop_ == null) {
423 // ap: this is a hack: had to declare predrop_ as mutable
424 predrop_ = rcp(new PreDropFunctionConstVal(threshold));
425 }
426
427 if (predrop_ != null) {
428 RCP<PreDropFunctionConstVal> predropConstVal = rcp_dynamic_cast<PreDropFunctionConstVal>(predrop_);
429 TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast,
430 "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
431 // If a user provided a predrop function, it overwrites the XML threshold parameter
432 SC newt = predropConstVal->GetThreshold();
433 if (newt != threshold) {
434 GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl;
435 threshold = newt;
436 }
437 }
438 // At this points we either have
439 // (predrop_ != null)
440 // Therefore, it is sufficient to check only threshold
441 if ( BlockSize==1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) {
442 // Case 1: scalar problem, no dropping => just use matrix graph
443 RCP<GraphBase> graph = rcp(new Graph(A->getCrsGraph(), "graph of A"));
444 // Detect and record rows that correspond to Dirichlet boundary conditions
445 ArrayRCP<bool > boundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
446 if (rowSumTol > 0.)
447 Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes);
448
449 graph->SetBoundaryNodeMap(boundaryNodes);
450 numTotal = A->getLocalNumEntries();
451
452 if (GetVerbLevel() & Statistics1) {
453 GO numLocalBoundaryNodes = 0;
454 GO numGlobalBoundaryNodes = 0;
455 for (LO i = 0; i < boundaryNodes.size(); ++i)
456 if (boundaryNodes[i])
457 numLocalBoundaryNodes++;
458 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
459 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
460 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
461 }
462
463 Set(currentLevel, "DofsPerNode", 1);
464 Set(currentLevel, "Graph", graph);
465
466 } else if ( (BlockSize == 1 && threshold != STS::zero()) ||
467 (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) ||
468 (BlockSize == 1 && useSignedClassicalRS) ||
469 (BlockSize == 1 && useSignedClassicalSA) ) {
470 // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original
471 // graph's map information, e.g., whether index is local
472 // OR a matrix without a CrsGraph
473
474 // allocate space for the local graph
475 ArrayRCP<LO> rows (A->getLocalNumRows()+1);
476 ArrayRCP<LO> columns(A->getLocalNumEntries());
477
478 using MT = typename STS::magnitudeType;
479 RCP<Vector> ghostedDiag;
480 ArrayRCP<const SC> ghostedDiagVals;
481 ArrayRCP<const MT> negMaxOffDiagonal;
482 // RS style needs the max negative off-diagonal, SA style needs the diagonal
483 if(useSignedClassicalRS) {
484 if(ghostedBlockNumber.is_null()) {
486 if (GetVerbLevel() & Statistics1)
487 GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl;
488 }
489 else {
490 negMaxOffDiagonal = MueLu::Utilities<SC,LO,GO,NO>::GetMatrixMaxMinusOffDiagonal(*A,*ghostedBlockNumber);
491 if (GetVerbLevel() & Statistics1)
492 GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl;
493 }
494 }
495 else {
497 ghostedDiagVals = ghostedDiag->getData(0);
498 }
499 ArrayRCP<bool> boundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
500 if (rowSumTol > 0.) {
501 if(ghostedBlockNumber.is_null()) {
502 if (GetVerbLevel() & Statistics1)
503 GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl;
504 Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes);
505 } else {
506 if (GetVerbLevel() & Statistics1)
507 GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl;
508 Utilities::ApplyRowSumCriterion(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes);
509 }
510 }
511
512 LO realnnz = 0;
513 rows[0] = 0;
514 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
515 size_t nnz = A->getNumEntriesInLocalRow(row);
516 bool rowIsDirichlet = boundaryNodes[row];
517 ArrayView<const LO> indices;
518 ArrayView<const SC> vals;
519 A->getLocalRowView(row, indices, vals);
520
521 if(classicalAlgo == defaultAlgo) {
522 //FIXME the current predrop function uses the following
523 //FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid )
524 //FIXME but the threshold doesn't take into account the rows' diagonal entries
525 //FIXME For now, hardwiring the dropping in here
526
527 LO rownnz = 0;
528 if(useSignedClassicalRS) {
529 // Signed classical RS style
530 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
531 LO col = indices[colID];
532 MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]);
533 MT neg_aij = - STS::real(vals[colID]);
534 /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID],
535 g_block_id.is_null() ? -1 : g_block_id[row],
536 g_block_id.is_null() ? -1 : g_block_id[col],
537 neg_aij, max_neg_aik);*/
538 if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) {
539 columns[realnnz++] = col;
540 rownnz++;
541 } else
542 numDropped++;
543 }
544 rows[row+1] = realnnz;
545 }
546 else if(useSignedClassicalSA) {
547 // Signed classical SA style
548 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
549 LO col = indices[colID];
550
551 bool is_nonpositive = STS::real(vals[colID]) <= 0;
552 MT aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
553 MT aij = is_nonpositive ? STS::magnitude(vals[colID]*vals[colID]) : (-STS::magnitude(vals[colID]*vals[colID])); // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0
554 /*
555 if(row==1326) printf("A(%d,%d) = %6.4e, raw_aij = %6.4e aij = %6.4e aiiajj = %6.4e\n",row,col,vals[colID],
556 vals[colID],aij, aiiajj);
557 */
558
559 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
560 columns[realnnz++] = col;
561 rownnz++;
562 } else
563 numDropped++;
564 }
565 rows[row+1] = realnnz;
566 }
567 else {
568 // Standard abs classical
569 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
570 LO col = indices[colID];
571 MT aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
572 MT aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2
573
574 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
575 columns[realnnz++] = col;
576 rownnz++;
577 } else
578 numDropped++;
579 }
580 rows[row+1] = realnnz;
581 }
582 }
583 else {
584 /* Cut Algorithm */
585 //CMS
586 using DropTol = Details::DropTol<real_type,LO>;
587 std::vector<DropTol> drop_vec;
588 drop_vec.reserve(nnz);
589 const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
590 const real_type one = Teuchos::ScalarTraits<real_type>::one();
591 LO rownnz = 0;
592 // NOTE: This probably needs to be fixed for rowsum
593
594 // find magnitudes
595 for (LO colID = 0; colID < (LO)nnz; colID++) {
596 LO col = indices[colID];
597 if (row == col) {
598 drop_vec.emplace_back( zero, one, colID, false);
599 continue;
600 }
601
602 // Don't aggregate boundaries
603 if(boundaryNodes[colID]) continue;
604 typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
605 typename STS::magnitudeType aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2
606 drop_vec.emplace_back(aij, aiiajj, colID, false);
607 }
608
609 const size_t n = drop_vec.size();
610
611 if (classicalAlgo == unscaled_cut) {
612 std::sort( drop_vec.begin(), drop_vec.end()
613 , [](DropTol const& a, DropTol const& b) {
614 return a.val > b.val;
615 });
616
617 bool drop = false;
618 for (size_t i=1; i<n; ++i) {
619 if (!drop) {
620 auto const& x = drop_vec[i-1];
621 auto const& y = drop_vec[i];
622 auto a = x.val;
623 auto b = y.val;
624 if (a > realThreshold*b) {
625 drop = true;
626#ifdef HAVE_MUELU_DEBUG
627 if (distanceLaplacianCutVerbose) {
628 std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
629 }
630#endif
631 }
632 }
633 drop_vec[i].drop = drop;
634 }
635 } else if (classicalAlgo == scaled_cut) {
636 std::sort( drop_vec.begin(), drop_vec.end()
637 , [](DropTol const& a, DropTol const& b) {
638 return a.val/a.diag > b.val/b.diag;
639 });
640 bool drop = false;
641 // printf("[%d] Scaled Cut: ",(int)row);
642 // printf("%3d(%4s) ",indices[drop_vec[0].col],"keep");
643 for (size_t i=1; i<n; ++i) {
644 if (!drop) {
645 auto const& x = drop_vec[i-1];
646 auto const& y = drop_vec[i];
647 auto a = x.val/x.diag;
648 auto b = y.val/y.diag;
649 if (a > realThreshold*b) {
650 drop = true;
651
652#ifdef HAVE_MUELU_DEBUG
653 if (distanceLaplacianCutVerbose) {
654 std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
655 }
656#endif
657 }
658 // printf("%3d(%4s) ",indices[drop_vec[i].col],drop?"drop":"keep");
659
660 }
661 drop_vec[i].drop = drop;
662 }
663 // printf("\n");
664 }
665 std::sort( drop_vec.begin(), drop_vec.end()
666 , [](DropTol const& a, DropTol const& b) {
667 return a.col < b.col;
668 }
669 );
670
671 for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) {
672 LO col = indices[drop_vec[idxID].col];
673 // don't drop diagonal
674 if (row == col) {
675 columns[realnnz++] = col;
676 rownnz++;
677 continue;
678 }
679
680 if (!drop_vec[idxID].drop) {
681 columns[realnnz++] = col;
682 rownnz++;
683 } else {
684 numDropped++;
685 }
686 }
687 // CMS
688 rows[row+1] = realnnz;
689
690 }
691 }//end for row
692
693 columns.resize(realnnz);
694 numTotal = A->getLocalNumEntries();
695
696 if (aggregationMayCreateDirichlet) {
697 // If the only element remaining after filtering is diagonal, mark node as boundary
698 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
699 if (rows[row+1]- rows[row] <= 1)
700 boundaryNodes[row] = true;
701 }
702 }
703
704 RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A"));
705 graph->SetBoundaryNodeMap(boundaryNodes);
706 if (GetVerbLevel() & Statistics1) {
707 GO numLocalBoundaryNodes = 0;
708 GO numGlobalBoundaryNodes = 0;
709 for (LO i = 0; i < boundaryNodes.size(); ++i)
710 if (boundaryNodes[i])
711 numLocalBoundaryNodes++;
712 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
713 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
714 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
715 }
716 Set(currentLevel, "Graph", graph);
717 Set(currentLevel, "DofsPerNode", 1);
718
719 // If we're doing signed classical, we might want to block-diagonalize *after* the dropping
720 if(generateColoringGraph) {
721 RCP<GraphBase> colorGraph;
722 RCP<const Import> importer = A->getCrsGraph()->getImporter();
723 BlockDiagonalizeGraph(graph,ghostedBlockNumber,colorGraph,importer);
724 Set(currentLevel, "Coloring Graph",colorGraph);
725 // #define CMS_DUMP
726#ifdef CMS_DUMP
727 {
728 Xpetra::IO<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Write("m_regular_graph."+std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(graph)->GetCrsGraph());
729 Xpetra::IO<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Write("m_color_graph."+std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(colorGraph)->GetCrsGraph());
730 // int rank = graph->GetDomainMap()->getComm()->getRank();
731 // {
732 // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
733 // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
734 // colorGraph->print(*fancy,Debug);
735 // }
736 // {
737 // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
738 // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
739 // graph->print(*fancy,Debug);
740 // }
741
742 }
743#endif
744 }//end generateColoringGraph
745 } else if (BlockSize > 1 && threshold == STS::zero()) {
746 // Case 3: Multiple DOF/node problem without dropping
747 const RCP<const Map> rowMap = A->getRowMap();
748 const RCP<const Map> colMap = A->getColMap();
749
750 graphType = "amalgamated";
751
752 // build node row map (uniqueMap) and node column map (nonUniqueMap)
753 // the arrays rowTranslation and colTranslation contain the local node id
754 // given a local dof id. The data is calculated by the AmalgamationFactory and
755 // stored in the variable container "UnAmalgamationInfo"
756 RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
757 RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
758 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
759 Array<LO> colTranslation = *(amalInfo->getColTranslation());
760
761 // get number of local nodes
762 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
763
764 // Allocate space for the local graph
765 ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
766 ArrayRCP<LO> columns = ArrayRCP<LO>(A->getLocalNumEntries());
767
768 const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
769
770 // Detect and record rows that correspond to Dirichlet boundary conditions
771 // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
772 // TODO the array one bigger than the number of local rows, and the last entry can
773 // TODO hold the actual number of boundary nodes. Clever, huh?
774 ArrayRCP<bool > pointBoundaryNodes;
775 pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
776 if (rowSumTol > 0.)
777 Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
778
779
780 // extract striding information
781 LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
782 LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
783 LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
784 if (A->IsView("stridedMaps") == true) {
785 Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
786 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
787 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
788 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
789 blkId = strMap->getStridedBlockId();
790 if (blkId > -1)
791 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
792 }
793
794 // loop over all local nodes
795 LO realnnz = 0;
796 rows[0] = 0;
797 Array<LO> indicesExtra;
798 for (LO row = 0; row < numRows; row++) {
799 ArrayView<const LO> indices;
800 indicesExtra.resize(0);
801
802 // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
803 // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
804 // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
805 // with local ids.
806 // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
807 // node.
808 bool isBoundary = false;
809 if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
810 for (LO j = 0; j < blkPartSize; j++) {
811 if (pointBoundaryNodes[row*blkPartSize+j]) {
812 isBoundary = true;
813 break;
814 }
815 }
816 } else {
817 isBoundary = true;
818 for (LO j = 0; j < blkPartSize; j++) {
819 if (!pointBoundaryNodes[row*blkPartSize+j]) {
820 isBoundary = false;
821 break;
822 }
823 }
824 }
825
826 // Merge rows of A
827 // The array indicesExtra contains local column node ids for the current local node "row"
828 if (!isBoundary)
829 MergeRows(*A, row, indicesExtra, colTranslation);
830 else
831 indicesExtra.push_back(row);
832 indices = indicesExtra;
833 numTotal += indices.size();
834
835 // add the local column node ids to the full columns array which
836 // contains the local column node ids for all local node rows
837 LO nnz = indices.size(), rownnz = 0;
838 for (LO colID = 0; colID < nnz; colID++) {
839 LO col = indices[colID];
840 columns[realnnz++] = col;
841 rownnz++;
842 }
843
844 if (rownnz == 1) {
845 // If the only element remaining after filtering is diagonal, mark node as boundary
846 // FIXME: this should really be replaced by the following
847 // if (indices.size() == 1 && indices[0] == row)
848 // boundaryNodes[row] = true;
849 // We do not do it this way now because there is no framework for distinguishing isolated
850 // and boundary nodes in the aggregation algorithms
851 amalgBoundaryNodes[row] = true;
852 }
853 rows[row+1] = realnnz;
854 } //for (LO row = 0; row < numRows; row++)
855 columns.resize(realnnz);
856
857 RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
858 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
859
860 if (GetVerbLevel() & Statistics1) {
861 GO numLocalBoundaryNodes = 0;
862 GO numGlobalBoundaryNodes = 0;
863
864 for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
865 if (amalgBoundaryNodes[i])
866 numLocalBoundaryNodes++;
867
868 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
869 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
870 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
871 << " agglomerated Dirichlet nodes" << std::endl;
872 }
873
874 Set(currentLevel, "Graph", graph);
875 Set(currentLevel, "DofsPerNode", blkSize); // full block size
876
877 } else if (BlockSize > 1 && threshold != STS::zero()) {
878 // Case 4: Multiple DOF/node problem with dropping
879 const RCP<const Map> rowMap = A->getRowMap();
880 const RCP<const Map> colMap = A->getColMap();
881 graphType = "amalgamated";
882
883 // build node row map (uniqueMap) and node column map (nonUniqueMap)
884 // the arrays rowTranslation and colTranslation contain the local node id
885 // given a local dof id. The data is calculated by the AmalgamationFactory and
886 // stored in the variable container "UnAmalgamationInfo"
887 RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
888 RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
889 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
890 Array<LO> colTranslation = *(amalInfo->getColTranslation());
891
892 // get number of local nodes
893 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
894
895 // Allocate space for the local graph
896 ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
897 ArrayRCP<LO> columns = ArrayRCP<LO>(A->getLocalNumEntries());
898
899 const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
900
901 // Detect and record rows that correspond to Dirichlet boundary conditions
902 // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
903 // TODO the array one bigger than the number of local rows, and the last entry can
904 // TODO hold the actual number of boundary nodes. Clever, huh?
905 ArrayRCP<bool > pointBoundaryNodes;
906 pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
907 if (rowSumTol > 0.)
908 Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
909
910
911 // extract striding information
912 LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
913 LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
914 LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
915 if (A->IsView("stridedMaps") == true) {
916 Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
917 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
918 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
919 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
920 blkId = strMap->getStridedBlockId();
921 if (blkId > -1)
922 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
923 }
924
925 // extract diagonal data for dropping strategy
927 const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
928
929 // loop over all local nodes
930 LO realnnz = 0;
931 rows[0] = 0;
932 Array<LO> indicesExtra;
933 for (LO row = 0; row < numRows; row++) {
934 ArrayView<const LO> indices;
935 indicesExtra.resize(0);
936
937 // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
938 // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
939 // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
940 // with local ids.
941 // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
942 // node.
943 bool isBoundary = false;
944 if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
945 for (LO j = 0; j < blkPartSize; j++) {
946 if (pointBoundaryNodes[row*blkPartSize+j]) {
947 isBoundary = true;
948 break;
949 }
950 }
951 } else {
952 isBoundary = true;
953 for (LO j = 0; j < blkPartSize; j++) {
954 if (!pointBoundaryNodes[row*blkPartSize+j]) {
955 isBoundary = false;
956 break;
957 }
958 }
959 }
960
961 // Merge rows of A
962 // The array indicesExtra contains local column node ids for the current local node "row"
963 if (!isBoundary)
964 MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
965 else
966 indicesExtra.push_back(row);
967 indices = indicesExtra;
968 numTotal += indices.size();
969
970 // add the local column node ids to the full columns array which
971 // contains the local column node ids for all local node rows
972 LO nnz = indices.size(), rownnz = 0;
973 for (LO colID = 0; colID < nnz; colID++) {
974 LO col = indices[colID];
975 columns[realnnz++] = col;
976 rownnz++;
977 }
978
979 if (rownnz == 1) {
980 // If the only element remaining after filtering is diagonal, mark node as boundary
981 // FIXME: this should really be replaced by the following
982 // if (indices.size() == 1 && indices[0] == row)
983 // boundaryNodes[row] = true;
984 // We do not do it this way now because there is no framework for distinguishing isolated
985 // and boundary nodes in the aggregation algorithms
986 amalgBoundaryNodes[row] = true;
987 }
988 rows[row+1] = realnnz;
989 } //for (LO row = 0; row < numRows; row++)
990 columns.resize(realnnz);
991
992 RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
993 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
994
995 if (GetVerbLevel() & Statistics1) {
996 GO numLocalBoundaryNodes = 0;
997 GO numGlobalBoundaryNodes = 0;
998
999 for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1000 if (amalgBoundaryNodes[i])
1001 numLocalBoundaryNodes++;
1002
1003 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1004 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1005 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
1006 << " agglomerated Dirichlet nodes" << std::endl;
1007 }
1008
1009 Set(currentLevel, "Graph", graph);
1010 Set(currentLevel, "DofsPerNode", blkSize); // full block size
1011 }
1012
1013 } else if (algo == "distance laplacian") {
1014 LO blkSize = A->GetFixedBlockSize();
1015 GO indexBase = A->getRowMap()->getIndexBase();
1016 // [*0*] : FIXME
1017 // ap: somehow, if I move this line to [*1*], Belos throws an error
1018 // I'm not sure what's going on. Do we always have to Get data, if we did
1019 // DeclareInput for it?
1020 // RCP<RealValuedMultiVector> Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
1021
1022 // Detect and record rows that correspond to Dirichlet boundary conditions
1023 // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
1024 // TODO the array one bigger than the number of local rows, and the last entry can
1025 // TODO hold the actual number of boundary nodes. Clever, huh?
1026 ArrayRCP<bool > pointBoundaryNodes;
1027 pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
1028 if (rowSumTol > 0.)
1029 Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
1030
1031 if ( (blkSize == 1) && (threshold == STS::zero()) ) {
1032 // Trivial case: scalar problem, no dropping. Can return original graph
1033 RCP<GraphBase> graph = rcp(new Graph(A->getCrsGraph(), "graph of A"));
1034 graph->SetBoundaryNodeMap(pointBoundaryNodes);
1035 graphType="unamalgamated";
1036 numTotal = A->getLocalNumEntries();
1037
1038 if (GetVerbLevel() & Statistics1) {
1039 GO numLocalBoundaryNodes = 0;
1040 GO numGlobalBoundaryNodes = 0;
1041 for (LO i = 0; i < pointBoundaryNodes.size(); ++i)
1042 if (pointBoundaryNodes[i])
1043 numLocalBoundaryNodes++;
1044 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1045 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1046 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1047 }
1048
1049 Set(currentLevel, "DofsPerNode", blkSize);
1050 Set(currentLevel, "Graph", graph);
1051
1052 } else {
1053 // ap: We make quite a few assumptions here; general case may be a lot different,
1054 // but much much harder to implement. We assume that:
1055 // 1) all maps are standard maps, not strided maps
1056 // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic
1057 // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i
1058 //
1059 // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo,
1060 // but as I totally don't understand that code, here is my solution
1061
1062 // [*1*]: see [*0*]
1063
1064 // Check that the number of local coordinates is consistent with the #rows in A
1065 TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements()/blkSize != Coords->getLocalLength(), Exceptions::Incompatible,
1066 "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() << ") by modulo block size ("<< blkSize <<").");
1067
1068 const RCP<const Map> colMap = A->getColMap();
1069 RCP<const Map> uniqueMap, nonUniqueMap;
1070 Array<LO> colTranslation;
1071 if (blkSize == 1) {
1072 uniqueMap = A->getRowMap();
1073 nonUniqueMap = A->getColMap();
1074 graphType="unamalgamated";
1075
1076 } else {
1077 uniqueMap = Coords->getMap();
1078 TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible,
1079 "Different index bases for matrix and coordinates");
1080
1081 AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation);
1082
1083 graphType = "amalgamated";
1084 }
1085 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
1086
1087 RCP<RealValuedMultiVector> ghostedCoords;
1088 RCP<Vector> ghostedLaplDiag;
1089 Teuchos::ArrayRCP<SC> ghostedLaplDiagData;
1090 if (threshold != STS::zero()) {
1091 // Get ghost coordinates
1092 RCP<const Import> importer;
1093 {
1094 SubFactoryMonitor m1(*this, "Import construction", currentLevel);
1095 if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) {
1096 GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl;
1097 importer = realA->getCrsGraph()->getImporter();
1098 } else {
1099 GetOStream(Warnings0) << "Constructing new importer instance" << std::endl;
1100 importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
1101 }
1102 } //subtimer
1103 ghostedCoords = Xpetra::MultiVectorFactory<real_type,LO,GO,NO>::Build(nonUniqueMap, Coords->getNumVectors());
1104 {
1105 SubFactoryMonitor m1(*this, "Coordinate import", currentLevel);
1106 ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT);
1107 } //subtimer
1108
1109 // Construct Distance Laplacian diagonal
1110 RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
1111 Array<LO> indicesExtra;
1112 Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1113 if (threshold != STS::zero()) {
1114 const size_t numVectors = ghostedCoords->getNumVectors();
1115 coordData.reserve(numVectors);
1116 for (size_t j = 0; j < numVectors; j++) {
1117 Teuchos::ArrayRCP<const real_type> tmpData=ghostedCoords->getData(j);
1118 coordData.push_back(tmpData);
1119 }
1120 }
1121 {
1122 SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel);
1123 ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
1124 for (LO row = 0; row < numRows; row++) {
1125 ArrayView<const LO> indices;
1126
1127 if (blkSize == 1) {
1128 ArrayView<const SC> vals;
1129 A->getLocalRowView(row, indices, vals);
1130
1131 } else {
1132 // Merge rows of A
1133 indicesExtra.resize(0);
1134 MergeRows(*A, row, indicesExtra, colTranslation);
1135 indices = indicesExtra;
1136 }
1137
1138 LO nnz = indices.size();
1139 bool haveAddedToDiag = false;
1140 for (LO colID = 0; colID < nnz; colID++) {
1141 const LO col = indices[colID];
1142
1143 if (row != col) {
1144 if(use_dlap_weights == SINGLE_WEIGHTS) {
1145 /*printf("[%d,%d] Unweighted Distance = %6.4e Weighted Distance = %6.4e\n",row,col,
1146 MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col),
1147 MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col));*/
1148 localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col);
1149 }
1150 else if(use_dlap_weights == BLOCK_WEIGHTS) {
1151 int block_id = row % interleaved_blocksize;
1152 int block_start = block_id * interleaved_blocksize;
1153 localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col);
1154 }
1155 else {
1156 // printf("[%d,%d] Unweighted Distance = %6.4e\n",row,col,MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col));
1157 localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
1158 }
1159 haveAddedToDiag = true;
1160 }
1161 }
1162 // Deal with the situation where boundary conditions have only been enforced on rows, but not on columns.
1163 // We enforce dropping of these entries by assigning a very large number to the diagonal entries corresponding to BCs.
1164 if (!haveAddedToDiag)
1165 localLaplDiagData[row] = STS::rmax();
1166 }
1167 } //subtimer
1168 {
1169 SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel);
1170 ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
1171 ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT);
1172 ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
1173 } //subtimer
1174
1175 } else {
1176 GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl;
1177 }
1178
1179 // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian
1180
1181 // allocate space for the local graph
1182 ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
1183 ArrayRCP<LO> columns = ArrayRCP<LO>(A->getLocalNumEntries());
1184
1185#ifdef HAVE_MUELU_DEBUG
1186 // DEBUGGING
1187 for(LO i=0; i<(LO)columns.size(); i++) columns[i]=-666;
1188#endif
1189
1190 // Extra array for if we're allowing symmetrization with cutting
1191 ArrayRCP<LO> rows_stop;
1192 bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric;
1193 if(use_stop_array)
1194 rows_stop.resize(numRows);
1195
1196 const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
1197
1198 LO realnnz = 0;
1199 rows[0] = 0;
1200
1201 Array<LO> indicesExtra;
1202 {
1203 SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel);
1204 Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1205 if (threshold != STS::zero()) {
1206 const size_t numVectors = ghostedCoords->getNumVectors();
1207 coordData.reserve(numVectors);
1208 for (size_t j = 0; j < numVectors; j++) {
1209 Teuchos::ArrayRCP<const real_type> tmpData=ghostedCoords->getData(j);
1210 coordData.push_back(tmpData);
1211 }
1212 }
1213
1214 ArrayView<const SC> vals;//CMS hackery
1215 for (LO row = 0; row < numRows; row++) {
1216 ArrayView<const LO> indices;
1217 indicesExtra.resize(0);
1218 bool isBoundary = false;
1219
1220 if (blkSize == 1) {
1221 // ArrayView<const SC> vals;//CMS uncomment
1222 A->getLocalRowView(row, indices, vals);
1223 isBoundary = pointBoundaryNodes[row];
1224 } else {
1225 // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
1226 for (LO j = 0; j < blkSize; j++) {
1227 if (!pointBoundaryNodes[row*blkSize+j]) {
1228 isBoundary = false;
1229 break;
1230 }
1231 }
1232
1233 // Merge rows of A
1234 if (!isBoundary)
1235 MergeRows(*A, row, indicesExtra, colTranslation);
1236 else
1237 indicesExtra.push_back(row);
1238 indices = indicesExtra;
1239 }
1240 numTotal += indices.size();
1241
1242 LO nnz = indices.size(), rownnz = 0;
1243
1244 if(use_stop_array) {
1245 rows[row+1] = rows[row]+nnz;
1246 realnnz = rows[row];
1247 }
1248
1249 if (threshold != STS::zero()) {
1250 // default
1251 if (distanceLaplacianAlgo == defaultAlgo) {
1252 /* Standard Distance Laplacian */
1253 for (LO colID = 0; colID < nnz; colID++) {
1254
1255 LO col = indices[colID];
1256
1257 if (row == col) {
1258 columns[realnnz++] = col;
1259 rownnz++;
1260 continue;
1261 }
1262
1263 // We do not want the distance Laplacian aggregating boundary nodes
1264 if(isBoundary) continue;
1265
1266 SC laplVal;
1267 if(use_dlap_weights == SINGLE_WEIGHTS) {
1268 laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col);
1269 }
1270 else if(use_dlap_weights == BLOCK_WEIGHTS) {
1271 int block_id = row % interleaved_blocksize;
1272 int block_start = block_id * interleaved_blocksize;
1273 laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col);
1274 }
1275 else {
1276 laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
1277 }
1278 real_type aiiajj = STS::magnitude(realThreshold*realThreshold * ghostedLaplDiagData[row]*ghostedLaplDiagData[col]);
1279 real_type aij = STS::magnitude(laplVal*laplVal);
1280
1281 if (aij > aiiajj) {
1282 columns[realnnz++] = col;
1283 rownnz++;
1284 } else {
1285 numDropped++;
1286 }
1287 }
1288 } else {
1289 /* Cut Algorithm */
1290 using DropTol = Details::DropTol<real_type,LO>;
1291 std::vector<DropTol> drop_vec;
1292 drop_vec.reserve(nnz);
1293 const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
1294 const real_type one = Teuchos::ScalarTraits<real_type>::one();
1295
1296 // find magnitudes
1297 for (LO colID = 0; colID < nnz; colID++) {
1298
1299 LO col = indices[colID];
1300
1301 if (row == col) {
1302 drop_vec.emplace_back( zero, one, colID, false);
1303 continue;
1304 }
1305 // We do not want the distance Laplacian aggregating boundary nodes
1306 if(isBoundary) continue;
1307
1308 SC laplVal;
1309 if(use_dlap_weights == SINGLE_WEIGHTS) {
1310 laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col);
1311 }
1312 else if(use_dlap_weights == BLOCK_WEIGHTS) {
1313 int block_id = row % interleaved_blocksize;
1314 int block_start = block_id * interleaved_blocksize;
1315 laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col);
1316 }
1317 else {
1318 laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
1319 }
1320
1321 real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row]*ghostedLaplDiagData[col]);
1322 real_type aij = STS::magnitude(laplVal*laplVal);
1323
1324 drop_vec.emplace_back(aij, aiiajj, colID, false);
1325 }
1326
1327 const size_t n = drop_vec.size();
1328
1329 if (distanceLaplacianAlgo == unscaled_cut) {
1330
1331 std::sort( drop_vec.begin(), drop_vec.end()
1332 , [](DropTol const& a, DropTol const& b) {
1333 return a.val > b.val;
1334 }
1335 );
1336
1337 bool drop = false;
1338 for (size_t i=1; i<n; ++i) {
1339 if (!drop) {
1340 auto const& x = drop_vec[i-1];
1341 auto const& y = drop_vec[i];
1342 auto a = x.val;
1343 auto b = y.val;
1344 if (a > realThreshold*b) {
1345 drop = true;
1346#ifdef HAVE_MUELU_DEBUG
1347 if (distanceLaplacianCutVerbose) {
1348 std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
1349 }
1350#endif
1351 }
1352 }
1353 drop_vec[i].drop = drop;
1354 }
1355 }
1356 else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) {
1357
1358 std::sort( drop_vec.begin(), drop_vec.end()
1359 , [](DropTol const& a, DropTol const& b) {
1360 return a.val/a.diag > b.val/b.diag;
1361 }
1362 );
1363
1364 bool drop = false;
1365 for (size_t i=1; i<n; ++i) {
1366 if (!drop) {
1367 auto const& x = drop_vec[i-1];
1368 auto const& y = drop_vec[i];
1369 auto a = x.val/x.diag;
1370 auto b = y.val/y.diag;
1371 if (a > realThreshold*b) {
1372 drop = true;
1373#ifdef HAVE_MUELU_DEBUG
1374 if (distanceLaplacianCutVerbose) {
1375 std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
1376 }
1377#endif
1378 }
1379 }
1380 drop_vec[i].drop = drop;
1381 }
1382 }
1383
1384 std::sort( drop_vec.begin(), drop_vec.end()
1385 , [](DropTol const& a, DropTol const& b) {
1386 return a.col < b.col;
1387 }
1388 );
1389
1390 for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) {
1391 LO col = indices[drop_vec[idxID].col];
1392
1393
1394 // don't drop diagonal
1395 if (row == col) {
1396 columns[realnnz++] = col;
1397 rownnz++;
1398 // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val);
1399 continue;
1400 }
1401
1402 if (!drop_vec[idxID].drop) {
1403 columns[realnnz++] = col;
1404 // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1405 rownnz++;
1406 } else {
1407 // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1408 numDropped++;
1409 }
1410 }
1411 }
1412 } else {
1413 // Skip laplace calculation and threshold comparison for zero threshold
1414 for (LO colID = 0; colID < nnz; colID++) {
1415 LO col = indices[colID];
1416 columns[realnnz++] = col;
1417 rownnz++;
1418 }
1419 }
1420
1421 if ( rownnz == 1) {
1422 // If the only element remaining after filtering is diagonal, mark node as boundary
1423 // FIXME: this should really be replaced by the following
1424 // if (indices.size() == 1 && indices[0] == row)
1425 // boundaryNodes[row] = true;
1426 // We do not do it this way now because there is no framework for distinguishing isolated
1427 // and boundary nodes in the aggregation algorithms
1428 amalgBoundaryNodes[row] = true;
1429 }
1430
1431 if(use_stop_array)
1432 rows_stop[row] = rownnz + rows[row];
1433 else
1434 rows[row+1] = realnnz;
1435 } //for (LO row = 0; row < numRows; row++)
1436
1437 } //subtimer
1438
1439 if (use_stop_array) {
1440 // Do symmetrization of the cut matrix
1441 // NOTE: We assume nested row/column maps here
1442 for (LO row = 0; row < numRows; row++) {
1443 for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) {
1444 LO col = columns[colidx];
1445 if(col >= numRows) continue;
1446
1447 bool found = false;
1448 for(LO t_col = rows[col] ; !found && t_col < rows_stop[col]; t_col++) {
1449 if (columns[t_col] == row)
1450 found = true;
1451 }
1452 // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing
1453 // into a Dirichlet unknown. In that case don't.
1454 if(!found && !pointBoundaryNodes[col] && rows_stop[col] < rows[col+1]) {
1455 LO new_idx = rows_stop[col];
1456 // printf("(%d,%d) SYMADD entry\n",col,row);
1457 columns[new_idx] = row;
1458 rows_stop[col]++;
1459 numDropped--;
1460 }
1461 }
1462 }
1463
1464 // Condense everything down
1465 LO current_start=0;
1466 for (LO row = 0; row < numRows; row++) {
1467 LO old_start = current_start;
1468 for (LO col = rows[row]; col < rows_stop[row]; col++) {
1469 if(current_start != col) {
1470 columns[current_start] = columns[col];
1471 }
1472 current_start++;
1473 }
1474 rows[row] = old_start;
1475 }
1476 rows[numRows] = realnnz = current_start;
1477
1478 }
1479
1480 columns.resize(realnnz);
1481
1482 RCP<GraphBase> graph;
1483 {
1484 SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel);
1485 graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1486 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1487 } //subtimer
1488
1489 if (GetVerbLevel() & Statistics1) {
1490 GO numLocalBoundaryNodes = 0;
1491 GO numGlobalBoundaryNodes = 0;
1492
1493 for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1494 if (amalgBoundaryNodes[i])
1495 numLocalBoundaryNodes++;
1496
1497 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1498 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1499 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes"
1500 << " using threshold " << dirichletThreshold << std::endl;
1501 }
1502
1503 Set(currentLevel, "Graph", graph);
1504 Set(currentLevel, "DofsPerNode", blkSize);
1505 }
1506 }
1507
1508 if ((GetVerbLevel() & Statistics1) && !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) {
1509 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1510 GO numGlobalTotal, numGlobalDropped;
1511 MueLu_sumAll(comm, numTotal, numGlobalTotal);
1512 MueLu_sumAll(comm, numDropped, numGlobalDropped);
1513 GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1514 if (numGlobalTotal != 0)
1515 GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1516 GetOStream(Statistics1) << std::endl;
1517 }
1518
1519 } else {
1520 //what Tobias has implemented
1521
1522 SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
1523 //GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1524 GetOStream(Runtime0) << "algorithm = \"" << "failsafe" << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1525 Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
1526
1527 RCP<const Map> rowMap = A->getRowMap();
1528 RCP<const Map> colMap = A->getColMap();
1529
1530 LO blockdim = 1; // block dim for fixed size blocks
1531 GO indexBase = rowMap->getIndexBase(); // index base of maps
1532 GO offset = 0;
1533
1534 // 1) check for blocking/striding information
1535 if(A->IsView("stridedMaps") &&
1536 Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap("stridedMaps")) != Teuchos::null) {
1537 Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!)
1538 RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap());
1539 TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null,Exceptions::BadCast,"MueLu::CoalesceFactory::Build: cast to strided row map failed.");
1540 blockdim = strMap->getFixedBlockSize();
1541 offset = strMap->getOffset();
1542 oldView = A->SwitchToView(oldView);
1543 GetOStream(Statistics1) << "CoalesceDropFactory::Build():" << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl;
1544 } else GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1545
1546 // 2) get row map for amalgamated matrix (graph of A)
1547 // with same distribution over all procs as row map of A
1548 RCP<const Map> nodeMap = amalInfo->getNodeRowMap();
1549 GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl;
1550
1551 // 3) create graph of amalgamated matrix
1552 RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries()*blockdim);
1553
1554 LO numRows = A->getRowMap()->getLocalNumElements();
1555 LO numNodes = nodeMap->getLocalNumElements();
1556 const ArrayRCP<bool> amalgBoundaryNodes(numNodes, false);
1557 const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node
1558 bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid
1559
1560 // 4) do amalgamation. generate graph of amalgamated matrix
1561 // Note, this code is much more inefficient than the leightwight implementation
1562 // Most of the work has already been done in the AmalgamationFactory
1563 for(LO row=0; row<numRows; row++) {
1564 // get global DOF id
1565 GO grid = rowMap->getGlobalElement(row);
1566
1567 // reinitialize boolean helper variable
1568 bIsDiagonalEntry = false;
1569
1570 // translate grid to nodeid
1571 GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase);
1572
1573 size_t nnz = A->getNumEntriesInLocalRow(row);
1574 Teuchos::ArrayView<const LO> indices;
1575 Teuchos::ArrayView<const SC> vals;
1576 A->getLocalRowView(row, indices, vals);
1577
1578 RCP<std::vector<GO> > cnodeIds = Teuchos::rcp(new std::vector<GO>); // global column block ids
1579 LO realnnz = 0;
1580 for(LO col=0; col<Teuchos::as<LO>(nnz); col++) {
1581 GO gcid = colMap->getGlobalElement(indices[col]); // global column id
1582
1583 if(vals[col]!=STS::zero()) {
1584 GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase);
1585 cnodeIds->push_back(cnodeId);
1586 realnnz++; // increment number of nnz in matrix row
1587 if (grid == gcid) bIsDiagonalEntry = true;
1588 }
1589 }
1590
1591 if(realnnz == 1 && bIsDiagonalEntry == true) {
1592 LO lNodeId = nodeMap->getLocalElement(nodeId);
1593 numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId
1594 if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes
1595 amalgBoundaryNodes[lNodeId] = true;
1596 }
1597
1598 Teuchos::ArrayRCP<GO> arr_cnodeIds = Teuchos::arcp( cnodeIds );
1599
1600 if(arr_cnodeIds.size() > 0 )
1601 crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1602 }
1603 // fill matrix graph
1604 crsGraph->fillComplete(nodeMap,nodeMap);
1605
1606 // 5) create MueLu Graph object
1607 RCP<GraphBase> graph = rcp(new Graph(crsGraph, "amalgamated graph of A"));
1608
1609 // Detect and record rows that correspond to Dirichlet boundary conditions
1610 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1611
1612 if (GetVerbLevel() & Statistics1) {
1613 GO numLocalBoundaryNodes = 0;
1614 GO numGlobalBoundaryNodes = 0;
1615 for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1616 if (amalgBoundaryNodes[i])
1617 numLocalBoundaryNodes++;
1618 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1619 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1620 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1621 }
1622
1623 // 6) store results in Level
1624 //graph->SetBoundaryNodeMap(gBoundaryNodeMap);
1625 Set(currentLevel, "DofsPerNode", blockdim);
1626 Set(currentLevel, "Graph", graph);
1627
1628 } //if (doExperimentalWrap) ... else ...
1629
1630
1631 } //Build
1632
1633 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1634 void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRows(const Matrix& A, const LO row, Array<LO>& cols, const Array<LO>& translation) const {
1635 typedef typename ArrayView<const LO>::size_type size_type;
1636
1637 // extract striding information
1638 LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1639 if (A.IsView("stridedMaps") == true) {
1640 Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1641 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1642 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1643 if (strMap->getStridedBlockId() > -1)
1644 blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1645 }
1646
1647 // count nonzero entries in all dof rows associated with node row
1648 size_t nnz = 0, pos = 0;
1649 for (LO j = 0; j < blkSize; j++)
1650 nnz += A.getNumEntriesInLocalRow(row*blkSize+j);
1651
1652 if (nnz == 0) {
1653 cols.resize(0);
1654 return;
1655 }
1656
1657 cols.resize(nnz);
1658
1659 // loop over all local dof rows associated with local node "row"
1660 ArrayView<const LO> inds;
1661 ArrayView<const SC> vals;
1662 for (LO j = 0; j < blkSize; j++) {
1663 A.getLocalRowView(row*blkSize+j, inds, vals);
1664 size_type numIndices = inds.size();
1665
1666 if (numIndices == 0) // skip empty dof rows
1667 continue;
1668
1669 // cols: stores all local node ids for current local node id "row"
1670 cols[pos++] = translation[inds[0]];
1671 for (size_type k = 1; k < numIndices; k++) {
1672 LO nodeID = translation[inds[k]];
1673 // Here we try to speed up the process by reducing the size of an array
1674 // to sort. This works if the column nonzeros belonging to the same
1675 // node are stored consequently.
1676 if (nodeID != cols[pos-1])
1677 cols[pos++] = nodeID;
1678 }
1679 }
1680 cols.resize(pos);
1681 nnz = pos;
1682
1683 // Sort and remove duplicates
1684 std::sort(cols.begin(), cols.end());
1685 pos = 0;
1686 for (size_t j = 1; j < nnz; j++)
1687 if (cols[j] != cols[pos])
1688 cols[++pos] = cols[j];
1689 cols.resize(pos+1);
1690 }
1691
1692 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1693 void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP<const SC>& ghostedDiagVals, SC threshold, Array<LO>& cols, const Array<LO>& translation) const {
1694 typedef typename ArrayView<const LO>::size_type size_type;
1695 typedef Teuchos::ScalarTraits<SC> STS;
1696
1697 // extract striding information
1698 LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1699 if (A.IsView("stridedMaps") == true) {
1700 Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1701 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1702 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1703 if (strMap->getStridedBlockId() > -1)
1704 blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1705 }
1706
1707 // count nonzero entries in all dof rows associated with node row
1708 size_t nnz = 0, pos = 0;
1709 for (LO j = 0; j < blkSize; j++)
1710 nnz += A.getNumEntriesInLocalRow(row*blkSize+j);
1711
1712 if (nnz == 0) {
1713 cols.resize(0);
1714 return;
1715 }
1716
1717 cols.resize(nnz);
1718
1719 // loop over all local dof rows associated with local node "row"
1720 ArrayView<const LO> inds;
1721 ArrayView<const SC> vals;
1722 for (LO j = 0; j < blkSize; j++) {
1723 A.getLocalRowView(row*blkSize+j, inds, vals);
1724 size_type numIndices = inds.size();
1725
1726 if (numIndices == 0) // skip empty dof rows
1727 continue;
1728
1729 // cols: stores all local node ids for current local node id "row"
1730 LO prevNodeID = -1;
1731 for (size_type k = 0; k < numIndices; k++) {
1732 LO dofID = inds[k];
1733 LO nodeID = translation[inds[k]];
1734
1735 // we avoid a square root by using squared values
1736 typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold*ghostedDiagVals[dofID]*ghostedDiagVals[row*blkSize+j]); // eps^2 * |a_ii| * |a_jj|
1737 typename STS::magnitudeType aij = STS::magnitude(vals[k]*vals[k]);
1738
1739 // check dropping criterion
1740 if (aij > aiiajj || (row*blkSize+j == dofID)) {
1741 // accept entry in graph
1742
1743 // Here we try to speed up the process by reducing the size of an array
1744 // to sort. This works if the column nonzeros belonging to the same
1745 // node are stored consequently.
1746 if (nodeID != prevNodeID) {
1747 cols[pos++] = nodeID;
1748 prevNodeID = nodeID;
1749 }
1750 }
1751 }
1752 }
1753 cols.resize(pos);
1754 nnz = pos;
1755
1756 // Sort and remove duplicates
1757 std::sort(cols.begin(), cols.end());
1758 pos = 0;
1759 for (size_t j = 1; j < nnz; j++)
1760 if (cols[j] != cols[pos])
1761 cols[++pos] = cols[j];
1762 cols.resize(pos+1);
1763
1764 return;
1765 }
1766
1767
1768
1769 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1770 Teuchos::RCP<Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BlockDiagonalize(Level & currentLevel,const RCP<Matrix>& A,bool generate_matrix) const {
1771 typedef Teuchos::ScalarTraits<SC> STS;
1772
1773 const ParameterList & pL = GetParameterList();
1774 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
1775 const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
1776
1777 RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector> >(currentLevel, "BlockNumber");
1778 RCP<LocalOrdinalVector> ghostedBlockNumber;
1779 GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)"<<std::endl;
1780
1781 // Ghost the column block numbers if we need to
1782 RCP<const Import> importer = A->getCrsGraph()->getImporter();
1783 if(!importer.is_null()) {
1784 SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
1785 ghostedBlockNumber= Xpetra::VectorFactory<LO,LO,GO,NO>::Build(importer->getTargetMap());
1786 ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
1787 }
1788 else {
1789 ghostedBlockNumber = BlockNumber;
1790 }
1791
1792 // Accessors for block numbers
1793 Teuchos::ArrayRCP<const LO> row_block_number = BlockNumber->getData(0);
1794 Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1795
1796 // allocate space for the local graph
1797 ArrayRCP<size_t> rows_mat;
1798 ArrayRCP<LO> rows_graph,columns;
1799 ArrayRCP<SC> values;
1800 RCP<CrsMatrixWrap> crs_matrix_wrap;
1801
1802 if(generate_matrix) {
1803 crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0));
1804 crs_matrix_wrap->getCrsMatrix()->allocateAllValues(A->getLocalNumEntries(), rows_mat, columns, values);
1805 }
1806 else {
1807 rows_graph.resize(A->getLocalNumRows()+1);
1808 columns.resize(A->getLocalNumEntries());
1809 values.resize(A->getLocalNumEntries());
1810 }
1811
1812 LO realnnz = 0;
1813 GO numDropped = 0, numTotal = 0;
1814 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
1815 LO row_block = row_block_number[row];
1816 size_t nnz = A->getNumEntriesInLocalRow(row);
1817 ArrayView<const LO> indices;
1818 ArrayView<const SC> vals;
1819 A->getLocalRowView(row, indices, vals);
1820
1821 LO rownnz = 0;
1822 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
1823 LO col = indices[colID];
1824 LO col_block = col_block_number[col];
1825
1826 if(row_block == col_block) {
1827 if(generate_matrix) values[realnnz] = vals[colID];
1828 columns[realnnz++] = col;
1829 rownnz++;
1830 } else
1831 numDropped++;
1832 }
1833 if(generate_matrix) rows_mat[row+1] = realnnz;
1834 else rows_graph[row+1] = realnnz;
1835 }
1836
1837 ArrayRCP<bool> boundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
1838 if (rowSumTol > 0.)
1839 Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes);
1840
1841
1842 if(!generate_matrix) {
1843 // We can't resize an Arrayrcp and pass the checks for setAllValues
1844 values.resize(realnnz);
1845 columns.resize(realnnz);
1846 }
1847 numTotal = A->getLocalNumEntries();
1848
1849 if (GetVerbLevel() & Statistics1) {
1850 GO numLocalBoundaryNodes = 0;
1851 GO numGlobalBoundaryNodes = 0;
1852 for (LO i = 0; i < boundaryNodes.size(); ++i)
1853 if (boundaryNodes[i])
1854 numLocalBoundaryNodes++;
1855 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1856 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1857 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1858
1859 GO numGlobalTotal, numGlobalDropped;
1860 MueLu_sumAll(comm, numTotal, numGlobalTotal);
1861 MueLu_sumAll(comm, numDropped, numGlobalDropped);
1862 GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1863 if (numGlobalTotal != 0)
1864 GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1865 GetOStream(Statistics1) << std::endl;
1866 }
1867
1868 Set(currentLevel, "Filtering", true);
1869
1870 if(generate_matrix) {
1871 // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import
1872 // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects
1873 // here, which is legit, because we never use them anyway.
1874 crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat,columns,values);
1875 crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap());
1876 }
1877 else {
1878 RCP<GraphBase> graph = rcp(new LWGraph(rows_graph, columns, A->getRowMap(), A->getColMap(), "block-diagonalized graph of A"));
1879 graph->SetBoundaryNodeMap(boundaryNodes);
1880 Set(currentLevel, "Graph", graph);
1881 }
1882
1883
1884 Set(currentLevel, "DofsPerNode", 1);
1885 return crs_matrix_wrap;
1886 }
1887
1888
1889 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1890 void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BlockDiagonalizeGraph(const RCP<GraphBase> & inputGraph, const RCP<LocalOrdinalVector> & ghostedBlockNumber, RCP<GraphBase> & outputGraph, RCP<const Import> & importer) const {
1891
1892 TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null.");
1893 const ParameterList & pL = GetParameterList();
1894
1895 const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
1896
1897 GetOStream(Statistics1) << "Using BlockDiagonal Graph after Dropping (with provided blocking)";
1898 if (localizeColoringGraph)
1899 GetOStream(Statistics1) << ", with localization" <<std::endl;
1900 else
1901 GetOStream(Statistics1) << ", without localization" <<std::endl;
1902
1903 // Accessors for block numbers
1904 Teuchos::ArrayRCP<const LO> row_block_number = ghostedBlockNumber->getData(0);
1905 Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1906
1907 // allocate space for the local graph
1908 ArrayRCP<size_t> rows_mat;
1909 ArrayRCP<LO> rows_graph,columns;
1910
1911 rows_graph.resize(inputGraph->GetNodeNumVertices()+1);
1912 columns.resize(inputGraph->GetNodeNumEdges());
1913
1914 LO realnnz = 0;
1915 GO numDropped = 0, numTotal = 0;
1916 const LO numRows = Teuchos::as<LO>(inputGraph->GetDomainMap()->getLocalNumElements());
1917 if (localizeColoringGraph) {
1918
1919 for (LO row = 0; row < numRows; ++row) {
1920 LO row_block = row_block_number[row];
1921 ArrayView<const LO> indices = inputGraph->getNeighborVertices(row);
1922
1923 LO rownnz = 0;
1924 for (LO colID = 0; colID < Teuchos::as<LO>(indices.size()); colID++) {
1925 LO col = indices[colID];
1926 LO col_block = col_block_number[col];
1927
1928 if((row_block == col_block) && (col < numRows)) {
1929 columns[realnnz++] = col;
1930 rownnz++;
1931 } else
1932 numDropped++;
1933 }
1934 rows_graph[row+1] = realnnz;
1935 }
1936 } else {
1937 // ghosting of boundary node map
1938 Teuchos::ArrayRCP<const bool> boundaryNodes = inputGraph->GetBoundaryNodeMap();
1939 auto boundaryNodesVector = Xpetra::VectorFactory<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Build(inputGraph->GetDomainMap());
1940 for (size_t i=0; i<inputGraph->GetNodeNumVertices(); i++)
1941 boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
1942 // Xpetra::IO<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Write("boundary",*boundaryNodesVector);
1943 auto boundaryColumnVector = Xpetra::VectorFactory<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Build(inputGraph->GetImportMap());
1944 boundaryColumnVector->doImport(*boundaryNodesVector,*importer, Xpetra::INSERT);
1945 auto boundaryColumn = boundaryColumnVector->getData(0);
1946
1947 for (LO row = 0; row < numRows; ++row) {
1948 LO row_block = row_block_number[row];
1949 ArrayView<const LO> indices = inputGraph->getNeighborVertices(row);
1950
1951 LO rownnz = 0;
1952 for (LO colID = 0; colID < Teuchos::as<LO>(indices.size()); colID++) {
1953 LO col = indices[colID];
1954 LO col_block = col_block_number[col];
1955
1956 if((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) {
1957 columns[realnnz++] = col;
1958 rownnz++;
1959 } else
1960 numDropped++;
1961 }
1962 rows_graph[row+1] = realnnz;
1963 }
1964 }
1965
1966 columns.resize(realnnz);
1967 numTotal = inputGraph->GetNodeNumEdges();
1968
1969 if (GetVerbLevel() & Statistics1) {
1970 RCP<const Teuchos::Comm<int> > comm = inputGraph->GetDomainMap()->getComm();
1971 GO numGlobalTotal, numGlobalDropped;
1972 MueLu_sumAll(comm, numTotal, numGlobalTotal);
1973 MueLu_sumAll(comm, numDropped, numGlobalDropped);
1974 GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1975 if (numGlobalTotal != 0)
1976 GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1977 GetOStream(Statistics1) << std::endl;
1978 }
1979
1980 if (localizeColoringGraph) {
1981 outputGraph = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1982 outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
1983 } else {
1984 TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra);
1985#ifdef HAVE_XPETRA_TPETRA
1986 auto outputGraph2 = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1987
1988 auto tpGraph = Xpetra::toTpetra(rcp_const_cast<const CrsGraph>(outputGraph2->GetCrsGraph()));
1989 auto sym = rcp(new Tpetra::CrsGraphTransposer<LocalOrdinal,GlobalOrdinal,Node>(tpGraph));
1990 auto tpGraphSym = sym->symmetrize();
1991
1992 auto colIndsSym = // FIXME persistingView is temporary; better fix would be change to LWGraph constructor
1993 Kokkos::Compat::persistingView(tpGraphSym->getLocalIndicesHost());
1994
1995 auto rowsSym = tpGraphSym->getLocalRowPtrsHost();
1996 ArrayRCP<LO> rows_graphSym;
1997 rows_graphSym.resize(rowsSym.size());
1998 for (size_t row = 0; row < rowsSym.size(); row++)
1999 rows_graphSym[row] = rowsSym[row];
2000 outputGraph = rcp(new LWGraph(rows_graphSym, colIndsSym, inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A"));
2001 outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
2002#endif
2003 }
2004
2005 }
2006
2007
2008
2009} //namespace MueLu
2010
2011#endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
#define MueLu_sumAll(rcpComm, in, out)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
Translate global (row/column) id to global amalgamation block id.
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > BlockDiagonalize(Level &currentLevel, const RCP< Matrix > &A, bool generate_matrix) const
void DeclareInput(Level &currentLevel) const
Input.
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level &currentLevel) const
Build an object with this factory.
void BlockDiagonalizeGraph(const RCP< GraphBase > &inputGraph, const RCP< LocalOrdinalVector > &ghostedBlockNumber, RCP< GraphBase > &outputGraph, RCP< const Import > &importer) const
Exception indicating invalid cast attempted.
Exception throws to report incompatible objects (like maps).
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
MueLu representation of a compressed row storage graph.
Lightweight MueLu representation of a compressed row storage graph.
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
MueLu utility class.
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
static Teuchos::ArrayRCP< Magnitude > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
static RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar > > &v, LocalOrdinal i0, LocalOrdinal i1)
Namespace for MueLu classes and methods.
@ Warnings0
Important warning messages (one line)
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
@ Warnings1
Additional warnings.
@ Parameters0
Print class parameters.
DropTol & operator=(DropTol const &)=default
DropTol(DropTol &&)=default
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
DropTol & operator=(DropTol &&)=default
DropTol(DropTol const &)=default