MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_ClassicalPFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_CLASSICALPFACTORY_DEF_HPP
47#define MUELU_CLASSICALPFACTORY_DEF_HPP
48
49#include <Xpetra_MultiVectorFactory.hpp>
50#include <Xpetra_VectorFactory.hpp>
51#include <Xpetra_CrsGraphFactory.hpp>
52#include <Xpetra_Matrix.hpp>
53#include <Xpetra_Map.hpp>
54#include <Xpetra_Map.hpp>
55#include <Xpetra_MapFactory.hpp>
56#include <Xpetra_Vector.hpp>
57#include <Xpetra_Import.hpp>
58#include <Xpetra_ImportUtils.hpp>
59#include <Xpetra_IO.hpp>
60#include <Xpetra_StridedMapFactory.hpp>
61
62#include <Teuchos_OrdinalTraits.hpp>
63
64#include "MueLu_MasterList.hpp"
65#include "MueLu_Monitor.hpp"
66#include "MueLu_PerfUtils.hpp"
68#include "MueLu_ClassicalMapFactory.hpp"
69#include "MueLu_Utilities.hpp"
70#include "MueLu_AmalgamationInfo.hpp"
71#include "MueLu_GraphBase.hpp"
72
73
74//#define CMS_DEBUG
75//#define CMS_DUMP
76
77namespace {
78
79template<class SC>
80int Sign(SC val) {
81 using STS = typename Teuchos::ScalarTraits<SC>;
82 typename STS::magnitudeType MT_ZERO = Teuchos::ScalarTraits<typename STS::magnitudeType>::zero();
83 if(STS::real(val) > MT_ZERO) return 1;
84 else if(STS::real(val) < MT_ZERO) return -1;
85 else return 0;
86}
87
88}// anonymous namepsace
89
90namespace MueLu {
91
92
93
94
95 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
97 RCP<ParameterList> validParamList = rcp(new ParameterList());
98#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
99 SET_VALID_ENTRY("aggregation: deterministic");
100 SET_VALID_ENTRY("aggregation: coloring algorithm");
101 SET_VALID_ENTRY("aggregation: classical scheme");
102
103 // To know if we need BlockNumber
104 SET_VALID_ENTRY("aggregation: drop scheme");
105 {
106 typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
107 validParamList->getEntry("aggregation: classical scheme").setValidator(rcp(new validatorType(Teuchos::tuple<std::string>("direct","ext+i","classical modified"), "aggregation: classical scheme")));
108
109 }
110
111#undef SET_VALID_ENTRY
112 validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
113 // validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
114 validParamList->set< RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
115 validParamList->set< RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
116 validParamList->set< RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the CoarseMap");
117 validParamList->set< RCP<const FactoryBase> >("FC Splitting", Teuchos::null, "Generating factory of the FC Splitting");
118 validParamList->set< RCP<const FactoryBase> >("BlockNumber", Teuchos::null, "Generating factory for Block Number");
119 // validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
120
121 return validParamList;
122 }
123
124 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
126 Input(fineLevel, "A");
127 Input(fineLevel, "Graph");
128 Input(fineLevel, "DofsPerNode");
129 // Input(fineLevel, "UnAmalgamationInfo");
130 Input(fineLevel, "CoarseMap");
131 Input(fineLevel, "FC Splitting");
132
133 const ParameterList& pL = GetParameterList();
134 std::string drop_algo = pL.get<std::string>("aggregation: drop scheme");
135 if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") {
136 Input(fineLevel, "BlockNumber");
137 }
138
139 }
140
141 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
143 return BuildP(fineLevel, coarseLevel);
144 }
145
146 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
148 FactoryMonitor m(*this, "Build", coarseLevel);
149 // using STS = Teuchos::ScalarTraits<SC>;
150
151 // We start by assuming that someone did a reasonable strength of connection
152 // algorithm before we start to get our Graph, DofsPerNode and UnAmalgamationInfo
153
154 // We begin by getting a MIS (from a graph coloring) and then at that point we need
155 // to start generating entries for the prolongator.
156 RCP<const Matrix> A = Get< RCP<Matrix> >(fineLevel, "A");
157 RCP<const Map> ownedCoarseMap = Get<RCP<const Map> >(fineLevel,"CoarseMap");
158 RCP<const LocalOrdinalVector> owned_fc_splitting = Get<RCP<LocalOrdinalVector> >(fineLevel,"FC Splitting");
159 RCP<const GraphBase> graph = Get< RCP<GraphBase> >(fineLevel, "Graph");
160 // LO nDofsPerNode = Get<LO>(fineLevel, "DofsPerNode");
161 // RCP<AmalgamationInfo> amalgInfo = Get< RCP<AmalgamationInfo> > (fineLevel, "UnAmalgamationInfo");
162 RCP<const Import> Importer = A->getCrsGraph()->getImporter();
163 Xpetra::UnderlyingLib lib = ownedCoarseMap->lib();
164
165 // RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
166 RCP<Matrix> P;
167 // SC SC_ZERO = STS::zero();
168 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
171 const ParameterList& pL = GetParameterList();
172
173 // FIXME: This guy doesn't work right now for NumPDEs != 1
174 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() != 1, Exceptions::RuntimeError,"ClassicalPFactory: Multiple PDEs per node not supported yet");
175
176 // NOTE: Let's hope we never need to deal with this case
177 TEUCHOS_TEST_FOR_EXCEPTION(!A->getRowMap()->isSameAs(*A->getDomainMap()),Exceptions::RuntimeError,"ClassicalPFactory: MPI Ranks > 1 not supported yet");
178
179 // Do we need ghosts rows of A and myPointType?
180 std::string scheme = pL.get<std::string>("aggregation: classical scheme");
181 bool need_ghost_rows =false;
182 if(scheme == "ext+i")
183 need_ghost_rows=true;
184 else if(scheme == "direct")
185 need_ghost_rows=false;
186 else if(scheme == "classical modified")
187 need_ghost_rows=true;
188 // NOTE: ParameterList validator will check this guy so we don't really need an "else" here
189
190 if (GetVerbLevel() & Statistics1) {
191 GetOStream(Statistics1) << "ClassicalPFactory: scheme = "<<scheme<<std::endl;
192 }
193
194 // Ghost the FC splitting and grab the data (if needed)
195 RCP<const LocalOrdinalVector> fc_splitting;
196 ArrayRCP<const LO> myPointType;
197 if(Importer.is_null()) {
198 fc_splitting = owned_fc_splitting;
199 }
200 else {
201 RCP<LocalOrdinalVector> fc_splitting_nonconst = LocalOrdinalVectorFactory::Build(A->getCrsGraph()->getColMap());
202 fc_splitting_nonconst->doImport(*owned_fc_splitting,*Importer,Xpetra::INSERT);
203 fc_splitting = fc_splitting_nonconst;
204 }
205 myPointType = fc_splitting->getData(0);
206
207
208 /* Ghost A (if needed) */
209 RCP<const Matrix> Aghost;
210 RCP<const LocalOrdinalVector> fc_splitting_ghost;
211 ArrayRCP<const LO> myPointType_ghost;
212 RCP<const Import> remoteOnlyImporter;
213 if(need_ghost_rows && !Importer.is_null()){
214 ArrayView<const LO> remoteLIDs = Importer->getRemoteLIDs();
215 size_t numRemote = Importer->getNumRemoteIDs();
216 Array<GO> remoteRows(numRemote);
217 for (size_t i = 0; i < numRemote; i++)
218 remoteRows[i] = Importer->getTargetMap()->getGlobalElement(remoteLIDs[i]);
219
220 RCP<const Map> remoteRowMap = MapFactory::Build(lib,Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), remoteRows(),
221 A->getDomainMap()->getIndexBase(), A->getDomainMap()->getComm());
222
223 remoteOnlyImporter = Importer->createRemoteOnlyImport(remoteRowMap);
224 RCP<const CrsMatrix> Acrs = rcp_dynamic_cast<const CrsMatrixWrap>(A)->getCrsMatrix();
225 RCP<CrsMatrix> Aghost_crs = CrsMatrixFactory::Build(Acrs,*remoteOnlyImporter,A->getDomainMap(),remoteOnlyImporter->getTargetMap());
226 Aghost = rcp(new CrsMatrixWrap(Aghost_crs));
227 // CAG: It seems that this isn't actually needed?
228 // We also may need need to ghost myPointType for Aghost
229 // RCP<const Import> Importer2 = Aghost->getCrsGraph()->getImporter();
230 // if(Importer2.is_null()) {
231 // RCP<LocalOrdinalVector> fc_splitting_ghost_nonconst = LocalOrdinalVectorFactory::Build(Aghost->getColMap());
232 // fc_splitting_ghost_nonconst->doImport(*owned_fc_splitting,*Importer,Xpetra::INSERT);
233 // fc_splitting_ghost = fc_splitting_ghost_nonconst;
234 // myPointType_ghost = fc_splitting_ghost->getData(0);
235 // }
236 }
237
238 /* Generate the ghosted Coarse map using the "Tuminaro maneuver" (if needed)*/
239 RCP<const Map> coarseMap;
240 if(Importer.is_null())
241 coarseMap = ownedCoarseMap;
242 else {
243 // Generate a domain vector with the coarse ID's as entries for C points
244 GhostCoarseMap(*A,*Importer,myPointType,ownedCoarseMap,coarseMap);
245 }
246
247 /* Get the block number, if we need it (and ghost it) */
248 RCP<LocalOrdinalVector> BlockNumber;
249 std::string drop_algo = pL.get<std::string>("aggregation: drop scheme");
250 if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") {
251 RCP<LocalOrdinalVector> OwnedBlockNumber;
252 OwnedBlockNumber = Get<RCP<LocalOrdinalVector> >(fineLevel, "BlockNumber");
253 if(Importer.is_null())
254 BlockNumber = OwnedBlockNumber;
255 else{
256 BlockNumber = LocalOrdinalVectorFactory::Build(A->getColMap());
257 BlockNumber->doImport(*OwnedBlockNumber,*Importer,Xpetra::INSERT);
258 }
259 }
260
261#if defined(CMS_DEBUG) || defined(CMS_DUMP)
262 {
263 RCP<const CrsMatrix> Acrs = rcp_dynamic_cast<const CrsMatrixWrap>(A)->getCrsMatrix();
264 int rank = A->getRowMap()->getComm()->getRank();
265 printf("[%d] A local size = %dx%d\n",rank,(int)Acrs->getRowMap()->getLocalNumElements(),(int)Acrs->getColMap()->getLocalNumElements());
266
267 printf("[%d] graph local size = %dx%d\n",rank,(int)graph->GetDomainMap()->getLocalNumElements(),(int)graph->GetImportMap()->getLocalNumElements());
268
269 std::ofstream ofs(std::string("dropped_graph_") + std::to_string(fineLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
270 RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
271 graph->print(*fancy,Debug);
272 std::string out_fc = std::string("fc_splitting_") + std::to_string(fineLevel.GetLevelID()) +std::string("_") + std::to_string(rank)+ std::string(".dat");
273 std::string out_block = std::string("block_number_") + std::to_string(fineLevel.GetLevelID()) +std::string("_") + std::to_string(rank)+ std::string(".dat");
274
275 // We don't support writing LO vectors in Xpetra (boo!) so....
276 using real_type = typename Teuchos::ScalarTraits<SC>::magnitudeType;
277 using RealValuedMultiVector = typename Xpetra::MultiVector<real_type,LO,GO,NO>;
278 typedef Xpetra::MultiVectorFactory<real_type,LO,GO,NO> RealValuedMultiVectorFactory;
279
280 RCP<RealValuedMultiVector> mv = RealValuedMultiVectorFactory::Build(fc_splitting->getMap(),1);
281 ArrayRCP<real_type> mv_data= mv->getDataNonConst(0);
282
283 // FC Splitting
284 ArrayRCP<const LO> fc_data= fc_splitting->getData(0);
285 for(LO i=0; i<(LO)fc_data.size(); i++)
286 mv_data[i] = Teuchos::as<real_type>(fc_data[i]);
287 Xpetra::IO<real_type,LO,GO,NO>::Write(out_fc,*mv);
288
289 // Block Number
290 if(!BlockNumber.is_null()) {
291 RCP<RealValuedMultiVector> mv2 = RealValuedMultiVectorFactory::Build(BlockNumber->getMap(),1);
292 ArrayRCP<real_type> mv_data2= mv2->getDataNonConst(0);
293 ArrayRCP<const LO> b_data= BlockNumber->getData(0);
294 for(LO i=0; i<(LO)b_data.size(); i++) {
295 mv_data2[i] = Teuchos::as<real_type>(b_data[i]);
296 }
297 Xpetra::IO<real_type,LO,GO,NO>::Write(out_block,*mv2);
298 }
299 }
300
301
302#endif
303
304
305 /* Generate reindexing arrays */
306 // Note: cpoint2pcol is ghosted if myPointType is
307 // NOTE: Since the ghosted coarse column map follows the ordering of
308 // the fine column map, this *should* work, because it is in local indices.
309 // pcol2cpoint - Takes a LCID for P and turns in into an LCID for A.
310 // cpoint2pcol - Takes a LCID for A --- if it is a C Point --- and turns in into an LCID for P.
311 Array<LO> cpoint2pcol(myPointType.size(),LO_INVALID);
312 Array<LO> pcol2cpoint(coarseMap->getLocalNumElements(),LO_INVALID);
313 LO num_c_points = 0;
314 LO num_f_points =0;
315 for(LO i=0; i<(LO) myPointType.size(); i++) {
316 if(myPointType[i] == C_PT) {
317 cpoint2pcol[i] = num_c_points;
318 num_c_points++;
319 }
320 else if (myPointType[i] == F_PT)
321 num_f_points++;
322 }
323 for(LO i=0; i<(LO)cpoint2pcol.size(); i++) {
324 if(cpoint2pcol[i] != LO_INVALID)
325 pcol2cpoint[cpoint2pcol[i]] =i;
326 }
327
328 // Generate edge strength flags (this will make everything easier later)
329 // These do *not* need to be ghosted (unlike A)
330 Teuchos::Array<size_t> eis_rowptr;
331 Teuchos::Array<bool> edgeIsStrong;
332 {
333 SubFactoryMonitor sfm(*this,"Strength Flags",coarseLevel);
334 GenerateStrengthFlags(*A,*graph,eis_rowptr,edgeIsStrong);
335 }
336
337 // Phase 3: Generate the P matrix
338 RCP<const Map> coarseColMap = coarseMap;
339 RCP<const Map> coarseDomainMap = ownedCoarseMap;
340 if(scheme == "ext+i") {
341 SubFactoryMonitor sfm(*this,"Ext+i Interpolation",coarseLevel);
342 Coarsen_Ext_Plus_I(*A,Aghost,*graph,coarseColMap,coarseDomainMap,num_c_points,num_f_points,myPointType(),myPointType_ghost(),cpoint2pcol,pcol2cpoint,eis_rowptr,edgeIsStrong,BlockNumber,P);
343 }
344 else if(scheme == "direct") {
345 SubFactoryMonitor sfm(*this,"Direct Interpolation",coarseLevel);
346 Coarsen_Direct(*A,Aghost,*graph,coarseColMap,coarseDomainMap,num_c_points,num_f_points,myPointType(),myPointType_ghost(),cpoint2pcol,pcol2cpoint,eis_rowptr,edgeIsStrong,BlockNumber,P);
347 }
348 else if(scheme == "classical modified") {
349 SubFactoryMonitor sfm(*this,"Classical Modified Interpolation",coarseLevel);
350 Coarsen_ClassicalModified(*A,Aghost,*graph,coarseColMap,coarseDomainMap,num_c_points,num_f_points,myPointType(),myPointType_ghost(),cpoint2pcol,pcol2cpoint,eis_rowptr,edgeIsStrong,BlockNumber,remoteOnlyImporter,P);
351 }
352 // NOTE: ParameterList validator will check this guy so we don't really need an "else" here
353
354#ifdef CMS_DEBUG
355 Xpetra::IO<SC,LO,GO,NO>::Write("classical_p.mat."+std::to_string(coarseLevel.GetLevelID()), *P);
356 // Xpetra::IO<SC,LO,GO,NO>::WriteLocal("classical_p.mat."+std::to_string(coarseLevel.GetLevelID()), *P);
357#endif
358
359 // Save output
360 Set(coarseLevel,"P",P);
361 RCP<const CrsGraph> pg = P->getCrsGraph();
362 Set(coarseLevel,"P Graph",pg);
363
364 //RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
365 // P->apply(*fineNullspace, *coarseNullspace, Teuchos::TRANS, Teuchos::ScalarTraits<SC>::one(), Teuchos::ScalarTraits<SC>::zero());
366 // Set(coarseLevel, "Nullspace", coarseNullspace);
367
368 if (IsPrint(Statistics1)) {
369 RCP<ParameterList> params = rcp(new ParameterList());
370 params->set("printLoadBalancingInfo", true);
371 GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*P, "P", params);
372 }
373 }
374/* ************************************************************************* */
375template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
377Coarsen_ClassicalModified(const Matrix & A,const RCP<const Matrix> & Aghost, const GraphBase & graph, RCP<const Map> & coarseColMap, RCP<const Map> & coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO> & myPointType, const Teuchos::ArrayView<const LO> & myPointType_ghost, const Teuchos::Array<LO> & cpoint2pcol, const Teuchos::Array<LO> & pcol2cpoint, Teuchos::Array<size_t> & eis_rowptr, Teuchos::Array<bool> & edgeIsStrong, RCP<LocalOrdinalVector> & BlockNumber, RCP<const Import> remoteOnlyImporter,RCP<Matrix> & P) const {
378 /* ============================================================= */
379 /* Phase 3 : Classical Modified Interpolation */
380 /* De Sterck, Falgout, Nolting and Yang. "Distance-two */
381 /* interpolation for parallel algebraic multigrid", NLAA 2008 */
382 /* 15:115-139 */
383 /* ============================================================= */
384 /* Definitions: */
385 /* F = F-points */
386 /* C = C-points */
387 /* N_i = non-zero neighbors of node i */
388 /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
389 /* F_i^s = F \cap S_i [strong F-neighbors of i] */
390 /* C_i^s = C \cap S_i [strong C-neighbors of i] */
391
392 /* N_i^w = N_i\ (F_i^s \cup C_i^s) [weak neighbors of i] */
393 /* This guy has a typo. The paper had a \cap instead of \cup */
394 /* I would note that this set can contain both F-points and */
395 /* C-points. They're just weak neighbors of this guy. */
396 /* Note that N_i^w \cup F_i^s \cup C_i^s = N_i by construction */
397
398
399 /* \bar{a}_ij = { 0, if sign(a_ij) == sign(a_ii) */
400 /* { a_ij, otherwise */
401
402 /* F_i^s\star = {k\in N_i | C_i^s \cap C_k^s = \emptyset} */
403 /* [set of F-neighbors of i that do not share a strong */
404 /* C-neighbor with i] */
405
406
407 /* Rewritten Equation (9) on p. 120 */
408 /* \tilde{a}_ii = (a_ij + \sum_{k\in{N_i^w \cup F_i^s\star}} a_ik */
409 /* */
410 /* f_ij = \sum_{k\in{F_i^s\setminusF_i^s*}} \frac{a_ik \bar{a}_kj}{\sum_{m\inC_i^s \bar{a}_km}} */
411 /* */
412 /* w_ij = \frac{1}{\tilde{a}_ii} ( a_ij + f_ij) for all j in C_i^s */
413
414 // const point_type F_PT = ClassicalMapFactory::F_PT;
417 using STS = typename Teuchos::ScalarTraits<SC>;
418 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
419 // size_t ST_INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
420 SC SC_ZERO = STS::zero();
421#ifdef CMS_DEBUG
422 int rank = A.getRowMap()->getComm()->getRank();
423#endif
424
425 // Get the block number if we have it.
426 ArrayRCP<const LO> block_id;
427 if(!BlockNumber.is_null())
428 block_id = BlockNumber->getData(0);
429
430 // Initial (estimated) allocation
431 // NOTE: If we only used Tpetra, then we could use these guys as is, but because Epetra, we can't, so there
432 // needs to be a copy below.
433 size_t Nrows = A.getLocalNumRows();
434 double c_point_density = (double)num_c_points / (num_c_points+num_f_points);
435 double mean_strong_neighbors_per_row = (double) graph.GetNodeNumEdges() / graph.GetNodeNumVertices();
436 // double mean_neighbors_per_row = (double)A.getLocalNumEntries() / Nrows;
437 double nnz_per_row_est = c_point_density*mean_strong_neighbors_per_row;
438
439 size_t nnz_est = std::max(Nrows,std::min((size_t)A.getLocalNumEntries(),(size_t)(nnz_per_row_est*Nrows)));
440 Array<size_t> tmp_rowptr(Nrows+1);
441 Array<LO> tmp_colind(nnz_est);
442
443 // Algorithm (count+realloc)
444 // For each row, i,
445 // - Count the number of elements in \hat{C}_j, aka [C-neighbors and C-neighbors of strong F-neighbors of i]
446 size_t ct=0;
447 for(LO row=0; row < (LO) Nrows; row++) {
448 size_t row_start = eis_rowptr[row];
449 ArrayView<const LO> indices;
450 ArrayView<const SC> vals;
451 std::set<LO> C_hat;
452 if(myPointType[row] == DIRICHLET_PT) {
453 // Dirichlet points get ignored completely
454 }
455 else if(myPointType[row] == C_PT) {
456 // C-Points get a single 1 in their row
457 C_hat.insert(cpoint2pcol[row]);
458 }
459 else {
460 // F-Points have a more complicated interpolation
461
462 // C-neighbors of row
463 A.getLocalRowView(row, indices, vals);
464 for(LO j=0; j<indices.size(); j++)
465 if(myPointType[indices[j]] == C_PT && edgeIsStrong[row_start + j])
466 C_hat.insert(cpoint2pcol[indices[j]]);
467 }// end else
468
469 // Realloc if needed
470 if(ct + (size_t)C_hat.size() > (size_t)tmp_colind.size()) {
471 tmp_colind.resize(std::max(ct+(size_t)C_hat.size(),(size_t)2*tmp_colind.size()));
472 }
473
474 // Copy
475 std::copy(C_hat.begin(), C_hat.end(),tmp_colind.begin()+ct);
476 ct+=C_hat.size();
477 tmp_rowptr[row+1] = tmp_rowptr[row] + C_hat.size();
478 }
479 // Resize down
480 tmp_colind.resize(tmp_rowptr[Nrows]);
481
482 RCP<CrsMatrix> Pcrs = CrsMatrixFactory::Build(A.getRowMap(),coarseColMap,0);
483 ArrayRCP<size_t> P_rowptr;
484 ArrayRCP<LO> P_colind;
485 ArrayRCP<SC> P_values;
486
487 if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
488 P_rowptr = Teuchos::arcpFromArray(tmp_rowptr);
489 P_colind = Teuchos::arcpFromArray(tmp_colind);
490 P_values.resize(P_rowptr[Nrows]);
491 } else {
492 // Make the matrix and then get the graph out of it (necessary for Epetra)
493 // NOTE: The lack of an Epetra_CrsGraph::ExpertStaticFillComplete makes this
494 // impossible to do the obvious way
495 Pcrs->allocateAllValues(tmp_rowptr[Nrows],P_rowptr,P_colind,P_values);
496 std::copy(tmp_rowptr.begin(),tmp_rowptr.end(), P_rowptr.begin());
497 std::copy(tmp_colind.begin(),tmp_colind.end(), P_colind.begin());
498 Pcrs->setAllValues(P_rowptr,P_colind,P_values);
499 Pcrs->expertStaticFillComplete(/*domain*/coarseDomainMap, /*range*/A.getDomainMap());
500 }
501
502 // Generate a remote-ghosted version of the graph (if we're in parallel)
503 RCP<const CrsGraph> Pgraph;
504 RCP<const CrsGraph> Pghost;
505 // TODO: We might want to be more efficient here and actually use
506 // Pgraph in the matrix constructor.
507 ArrayRCP<const size_t> Pghost_rowptr;
508 ArrayRCP<const LO> Pghost_colind;
509 if(!remoteOnlyImporter.is_null()) {
510 if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
511 RCP<CrsGraph> tempPgraph = CrsGraphFactory::Build(A.getRowMap(),coarseColMap,P_rowptr,P_colind);
512 tempPgraph->fillComplete(coarseDomainMap,A.getDomainMap());
513 Pgraph = tempPgraph;
514 } else {
515 // Epetra does not have a graph constructor that uses rowptr and colind.
516 Pgraph = Pcrs->getCrsGraph();
517 }
518 TEUCHOS_ASSERT(!Pgraph.is_null());
519 Pghost = CrsGraphFactory::Build(Pgraph,*remoteOnlyImporter,Pgraph->getDomainMap(),remoteOnlyImporter->getTargetMap());
520 Pghost->getAllIndices(Pghost_rowptr,Pghost_colind);
521 }
522
523 // Gustavson-style perfect hashing
524 ArrayRCP<LO> Acol_to_Pcol(A.getColMap()->getLocalNumElements(),LO_INVALID);
525
526 // Get a quick reindexing array from Pghost LCIDs to P LCIDs
527 ArrayRCP<LO> Pghostcol_to_Pcol;
528 if(!Pghost.is_null()) {
529 Pghostcol_to_Pcol.resize(Pghost->getColMap()->getLocalNumElements(),LO_INVALID);
530 for(LO i=0; i<(LO) Pghost->getColMap()->getLocalNumElements(); i++)
531 Pghostcol_to_Pcol[i] = Pgraph->getColMap()->getLocalElement(Pghost->getColMap()->getGlobalElement(i));
532 }//end Pghost
533
534 // Get a quick reindexing array from Aghost LCIDs to A LCIDs
535 ArrayRCP<LO> Aghostcol_to_Acol;
536 if(!Aghost.is_null()) {
537 Aghostcol_to_Acol.resize(Aghost->getColMap()->getLocalNumElements(),LO_INVALID);
538 for(LO i=0; i<(LO)Aghost->getColMap()->getLocalNumElements(); i++)
539 Aghostcol_to_Acol[i] = A.getColMap()->getLocalElement(Aghost->getColMap()->getGlobalElement(i));
540 }//end Aghost
541
542
543 // Algorithm (numeric)
544 for(LO i=0; i < (LO)Nrows; i++) {
545 if(myPointType[i] == DIRICHLET_PT) {
546 // Dirichlet points get ignored completely
547#ifdef CMS_DEBUG
548 // DEBUG
549 printf("[%d] ** A(%d,:) is a Dirichlet-Point.\n",rank,i);
550#endif
551 }
552 else if (myPointType[i] == C_PT) {
553 // C Points get a single 1 in their row
554 P_values[P_rowptr[i]] = Teuchos::ScalarTraits<SC>::one();
555#ifdef CMS_DEBUG
556 // DEBUG
557 printf("[%d] ** A(%d,:) is a C-Point.\n",rank,i);
558#endif
559 }
560 else {
561 // F Points get all of the fancy stuff
562#ifdef CMS_DEBUG
563 // DEBUG
564 printf("[%d] ** A(%d,:) is a F-Point.\n",rank,i);
565#endif
566
567 // Get all of the relevant information about this row
568 ArrayView<const LO> A_indices_i, A_indices_k;
569 ArrayView<const SC> A_vals_i, A_vals_k;
570 A.getLocalRowView(i, A_indices_i, A_vals_i);
571 size_t row_start = eis_rowptr[i];
572
573 ArrayView<const LO> P_indices_i = P_colind.view(P_rowptr[i],P_rowptr[i+1] - P_rowptr[i]);
574 ArrayView<SC> P_vals_i = P_values.view(P_rowptr[i],P_rowptr[i+1] - P_rowptr[i]);
575
576 // FIXME: Do we need this?
577 for(LO j=0; j<(LO)P_vals_i.size(); j++)
578 P_vals_i[j] = SC_ZERO;
579
580 // Stash the hash: Flag any strong C-points with their index into P_colind
581 // NOTE: We'll consider any points that are LO_INVALID or less than P_rowptr[i] as not strong C-points
582 for(LO j=0; j<(LO)P_indices_i.size(); j++) {
583 Acol_to_Pcol[pcol2cpoint[P_indices_i[j]]] = P_rowptr[i] + j;
584 }
585
586 // Loop over the entries in the row
587 SC first_denominator = SC_ZERO;
588#ifdef CMS_DEBUG
589 SC a_ii = SC_ZERO;
590#endif
591 for(LO k0=0; k0<(LO)A_indices_i.size(); k0++) {
592 LO k = A_indices_i[k0];
593 SC a_ik = A_vals_i[k0];
594 LO pcol_k = Acol_to_Pcol[k];
595
596 if(k == i) {
597 // Case A: Diagonal value (add to first denominator)
598 // FIXME: Add BlockNumber matching here
599 first_denominator += a_ik;
600#ifdef CMS_DEBUG
601 a_ii = a_ik;
602 printf("- A(%d,%d) is the diagonal\n",i,k);
603#endif
604
605 }
606 else if(myPointType[k] == DIRICHLET_PT) {
607 // Case B: Ignore dirichlet connections completely
608#ifdef CMS_DEBUG
609 printf("- A(%d,%d) is a Dirichlet point\n",i,k);
610#endif
611
612 }
613 else if(pcol_k != LO_INVALID && pcol_k >= (LO)P_rowptr[i]) {
614 // Case C: a_ik is strong C-Point (goes directly into the weight)
615 P_values[pcol_k] += a_ik;
616#ifdef CMS_DEBUG
617 printf("- A(%d,%d) is a strong C-Point\n",i,k);
618#endif
619 }
620 else if (!edgeIsStrong[row_start + k0]) {
621 // Case D: Weak non-Dirichlet neighbor (add to first denominator)
622 if(block_id.size() == 0 || block_id[i] == block_id[k]) {
623 first_denominator += a_ik;
624#ifdef CMS_DEBUG
625 printf("- A(%d,%d) is weak adding to diagonal(%d,%d) (%6.4e)\n",i,k,block_id[i],block_id[k],a_ik);
626 }
627 else {
628 printf("- A(%d,%d) is weak but does not match blocks (%d,%d), discarding\n",i,k,block_id[i],block_id[k]);
629#endif
630 }
631
632 }
633 else {//Case E
634 // Case E: Strong F-Point (adds to the first denominator if we don't share a
635 // a strong C-Point with i; adds to the second denominator otherwise)
636#ifdef CMS_DEBUG
637 printf("- A(%d,%d) is a strong F-Point\n",i,k);
638#endif
639
640 SC a_kk = SC_ZERO;
641 SC second_denominator = SC_ZERO;
642 int sign_akk = 0;
643
644 if(k < (LO)Nrows) {
645 // Grab the diagonal a_kk
646 A.getLocalRowView(k, A_indices_k, A_vals_k);
647 for(LO m0=0; m0<(LO)A_indices_k.size(); m0++){
648 LO m = A_indices_k[m0];
649 if(k == m) {
650 a_kk = A_vals_k[m0];
651 break;
652 }
653 }//end for A_indices_k
654
655 // Compute the second denominator term
656 sign_akk = Sign(a_kk);
657 for(LO m0=0; m0<(LO)A_indices_k.size(); m0++){
658 LO m = A_indices_k[m0];
659 if(m != k && Acol_to_Pcol[A_indices_k[m0]] >= (LO)P_rowptr[i]) {
660 SC a_km = A_vals_k[m0];
661 second_denominator+= (Sign(a_km) == sign_akk ? SC_ZERO : a_km);
662 }
663 }//end for A_indices_k
664
665 // Now we have the second denominator, for this particular strong F point.
666 // So we can now add the sum to the w_ij components for the P values
667 if(second_denominator != SC_ZERO) {
668 for(LO j0=0; j0<(LO)A_indices_k.size(); j0++) {
669 LO j = A_indices_k[j0];
670 // NOTE: Row k should be in fis_star, so I should have to check for diagonals here
671 // printf("Acol_to_Pcol[%d] = %d P_values.size() = %d\n",j,Acol_to_Pcol[j],(int)P_values.size());
672 if(Acol_to_Pcol[j] >= (LO)P_rowptr[i]) {
673 SC a_kj = A_vals_k[j0];
674 SC sign_akj_val = sign_akk == Sign(a_kj) ? SC_ZERO : a_kj;
675 P_values[Acol_to_Pcol[j]] += a_ik * sign_akj_val / second_denominator;
676#ifdef CMS_DEBUG
677 printf("- - Unscaled P(%d,A-%d) += %6.4e = %5.4e\n",i,j,a_ik * sign_akj_val / second_denominator,P_values[Acol_to_Pcol[j]]);
678#endif
679 }
680 }//end for A_indices_k
681 }//end if second_denominator != 0
682 else {
683#ifdef CMS_DEBUG
684 printf("- - A(%d,%d) second denominator is zero\n",i,k);
685#endif
686 if(block_id.size() == 0 || block_id[i] == block_id[k])
687 first_denominator += a_ik;
688 }//end else second_denominator != 0
689 }// end if k < Nrows
690 else {
691 // Ghost row
692 LO kless = k-Nrows;
693 // Grab the diagonal a_kk
694 // NOTE: ColMap is not locally fitted to the RowMap
695 // so we need to check GIDs here
696 Aghost->getLocalRowView(kless, A_indices_k, A_vals_k);
697 GO k_g = Aghost->getRowMap()->getGlobalElement(kless);
698 for(LO m0=0; m0<(LO)A_indices_k.size(); m0++){
699 GO m_g = Aghost->getColMap()->getGlobalElement(A_indices_k[m0]);
700 if(k_g == m_g) {
701 a_kk = A_vals_k[m0];
702 break;
703 }
704 }//end for A_indices_k
705
706 // Compute the second denominator term
707 sign_akk = Sign(a_kk);
708 for(LO m0=0; m0<(LO)A_indices_k.size(); m0++){
709 GO m_g = Aghost->getColMap()->getGlobalElement(A_indices_k[m0]);
710 LO mghost = A_indices_k[m0];//Aghost LCID
711 LO m = Aghostcol_to_Acol[mghost]; //A's LID (could be LO_INVALID)
712 if(m_g != k_g && m != LO_INVALID && Acol_to_Pcol[m] >= (LO)P_rowptr[i]) {
713 SC a_km = A_vals_k[m0];
714 second_denominator+= (Sign(a_km) == sign_akk ? SC_ZERO : a_km);
715 }
716 }//end for A_indices_k
717
718
719 // Now we have the second denominator, for this particular strong F point.
720 // So we can now add the sum to the w_ij components for the P values
721 if(second_denominator != SC_ZERO) {
722 for(LO j0=0; j0<(LO)A_indices_k.size(); j0++){
723 LO jghost = A_indices_k[j0];//Aghost LCID
724 LO j = Aghostcol_to_Acol[jghost]; //A's LID (could be LO_INVALID)
725 // NOTE: Row k should be in fis_star, so I should have to check for diagonals here
726 if((j != LO_INVALID) && (Acol_to_Pcol[j] >= (LO)P_rowptr[i])) {
727 SC a_kj = A_vals_k[j0];
728 SC sign_akj_val = sign_akk == Sign(a_kj) ? SC_ZERO : a_kj;
729 P_values[Acol_to_Pcol[j]] += a_ik * sign_akj_val / second_denominator;
730#ifdef CMS_DEBUG
731 printf("- - Unscaled P(%d,A-%d) += %6.4e\n",i,j,a_ik * sign_akj_val / second_denominator);
732#endif
733 }
734
735 }//end for A_indices_k
736 }//end if second_denominator != 0
737 else {
738#ifdef CMS_DEBUG
739 printf("- - A(%d,%d) second denominator is zero\n",i,k);
740#endif
741 if(block_id.size() == 0 || block_id[i] == block_id[k])
742 first_denominator += a_ik;
743 }//end else second_denominator != 0
744 }//end else k < Nrows
745 }//end else Case A,...,E
746
747 }//end for A_indices_i
748
749 // Now, downscale by the first_denominator
750 if(first_denominator != SC_ZERO) {
751 for(LO j0=0; j0<(LO)P_indices_i.size(); j0++) {
752#ifdef CMS_DEBUG
753 SC old_pij = P_vals_i[j0];
754 P_vals_i[j0] /= -first_denominator;
755 printf("P(%d,%d) = %6.4e = %6.4e / (%6.4e + %6.4e)\n",i,P_indices_i[j0],P_vals_i[j0],old_pij,a_ii,first_denominator - a_ii);
756#else
757 P_vals_i[j0] /= -first_denominator;
758#endif
759 }//end for P_indices_i
760 }//end if first_denominator != 0
761
762 }//end else C-Point
763
764 }// end if i < Nrows
765
766 // Finish up
767 Pcrs->setAllValues(P_rowptr,P_colind,P_values);
768 Pcrs->expertStaticFillComplete(/*domain*/coarseDomainMap, /*range*/A.getDomainMap());
769 // Wrap from CrsMatrix to Matrix
770 P = rcp(new CrsMatrixWrap(Pcrs));
771
772}//end Coarsen_ClassicalModified
773
774
775/* ************************************************************************* */
776template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
778Coarsen_Direct(const Matrix & A,const RCP<const Matrix> & Aghost, const GraphBase & graph, RCP<const Map> & coarseColMap, RCP<const Map> & coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO> & myPointType, const Teuchos::ArrayView<const LO> & myPointType_ghost, const Teuchos::Array<LO> & cpoint2pcol, const Teuchos::Array<LO> & pcol2cpoint, Teuchos::Array<size_t> & eis_rowptr, Teuchos::Array<bool> & edgeIsStrong, RCP<LocalOrdinalVector> & BlockNumber, RCP<Matrix> & P) const {
779 /* ============================================================= */
780 /* Phase 3 : Direct Interpolation */
781 /* We do not use De Sterck, Falgout, Nolting and Yang (2008) */
782 /* here. Instead we follow: */
783 /* Trottenberg, Oosterlee and Schueller, Multigrid, 2001. */
784 /* with some modifications inspirted by PyAMG */
785 /* ============================================================= */
786 /* Definitions: */
787 /* F = F-points */
788 /* C = C-points */
789 /* N_i = non-zero neighbors of node i */
790 /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
791 /* F_i^s = F \cap S_i [strong F-neighbors of i] */
792 /* C_i^s = C \cap S_i [strong C-neighbors of i] */
793 /* P_i = Set of interpolatory variables for row i [here = C_i^s] */
794
795 /* (A.2.17) from p. 426 */
796 /* a_ij^- = { a_ij, if a_ij < 0 */
797 /* { 0, otherwise */
798 /* a_ij^+ = { a_ij, if a_ij > 0 */
799 /* { 0, otherwise */
800 /* P_i^- = P_i \cap {k | a_ij^- != 0 and a_ij^- = a_ij} */
801 /* [strong C-neighbors with negative edges] */
802 /* P_i^+ = P_i \cap {k | a_ij^+ != 0 and a_ij^+ = a_ij} */
803 /* [strong C-neighbors with positive edges] */
804
805
806 /* de Sterck et al., gives us this: */
807 /* Rewritten Equation (6) on p. 119 */
808 /* w_ij = - a_ji / a_ii \frac{\sum_{k\in N_i} a_ik} {\sum k\inC_i^s} a_ik}, j\in C_i^s */
809
810 /* Trottenberg et al. (A.7.6) and (A.7.7) on p. 479 gives this: */
811 /* alpha_i = \frac{ \sum_{j\in N_i} a_ij^- }{ \sum_{k\in P_i} a_ik^- } */
812 /* beta_i = \frac{ \sum_{j\in N_i} a_ij^+ }{ \sum_{k\in P_i} a_ik^+ } */
813 /* w_ik = { - alpha_i (a_ik / a_ii), if k\in P_i^- */
814 /* { - beta_i (a_ik / a_ii), if k\in P_i^+ */
815 /* NOTE: The text says to modify, if P_i^+ is zero but it isn't entirely clear how that */
816 /* works. We'll follow the PyAMG implementation in a few important ways. */
817
820
821 // Initial (estimated) allocation
822 // NOTE: If we only used Tpetra, then we could use these guys as is, but because Epetra, we can't, so there
823 // needs to be a copy below.
824 using STS = typename Teuchos::ScalarTraits<SC>;
825 using MT = typename STS::magnitudeType;
826 using MTS = typename Teuchos::ScalarTraits<MT>;
827 size_t Nrows = A.getLocalNumRows();
828 double c_point_density = (double)num_c_points / (num_c_points+num_f_points);
829 double mean_strong_neighbors_per_row = (double) graph.GetNodeNumEdges() / graph.GetNodeNumVertices();
830 // double mean_neighbors_per_row = (double)A.getLocalNumEntries() / Nrows;
831 double nnz_per_row_est = c_point_density*mean_strong_neighbors_per_row;
832
833 size_t nnz_est = std::max(Nrows,std::min((size_t)A.getLocalNumEntries(),(size_t)(nnz_per_row_est*Nrows)));
834 SC SC_ZERO = STS::zero();
835 MT MT_ZERO = MTS::zero();
836 Array<size_t> tmp_rowptr(Nrows+1);
837 Array<LO> tmp_colind(nnz_est);
838
839 // Algorithm (count+realloc)
840 // For each row, i,
841 // - Count the number of elements in \hat{C}_j, aka [C-neighbors and C-neighbors of strong F-neighbors of i]
842 size_t ct=0;
843 for(LO row=0; row < (LO) Nrows; row++) {
844 size_t row_start = eis_rowptr[row];
845 ArrayView<const LO> indices;
846 ArrayView<const SC> vals;
847 std::set<LO> C_hat;
848 if(myPointType[row] == DIRICHLET_PT) {
849 // Dirichlet points get ignored completely
850 }
851 else if(myPointType[row] == C_PT) {
852 // C-Points get a single 1 in their row
853 C_hat.insert(cpoint2pcol[row]);
854 }
855 else {
856 // F-Points have a more complicated interpolation
857
858 // C-neighbors of row
859 A.getLocalRowView(row, indices, vals);
860 for(LO j=0; j<indices.size(); j++)
861 if(myPointType[indices[j]] == C_PT && edgeIsStrong[row_start + j])
862 C_hat.insert(cpoint2pcol[indices[j]]);
863 }// end else
864
865 // Realloc if needed
866 if(ct + (size_t)C_hat.size() > (size_t)tmp_colind.size()) {
867 tmp_colind.resize(std::max(ct+(size_t)C_hat.size(),(size_t)2*tmp_colind.size()));
868 }
869
870 // Copy
871 std::copy(C_hat.begin(), C_hat.end(),tmp_colind.begin()+ct);
872 ct+=C_hat.size();
873 tmp_rowptr[row+1] = tmp_rowptr[row] + C_hat.size();
874 }
875 // Resize down
876 tmp_colind.resize(tmp_rowptr[Nrows]);
877
878 // Allocate memory & copy
879 P = rcp(new CrsMatrixWrap(A.getRowMap(), coarseColMap, 0));
880 RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
881 ArrayRCP<size_t> P_rowptr;
882 ArrayRCP<LO> P_colind;
883 ArrayRCP<SC> P_values;
884
885#ifdef CMS_DEBUG
886printf("CMS: Allocating P w/ %d nonzeros\n",(int)tmp_rowptr[Nrows]);
887#endif
888 PCrs->allocateAllValues(tmp_rowptr[Nrows], P_rowptr, P_colind, P_values);
889 TEUCHOS_TEST_FOR_EXCEPTION(tmp_rowptr.size() !=P_rowptr.size(), Exceptions::RuntimeError,"ClassicalPFactory: Allocation size error (rowptr)");
890 TEUCHOS_TEST_FOR_EXCEPTION(tmp_colind.size() !=P_colind.size(), Exceptions::RuntimeError,"ClassicalPFactory: Allocation size error (colind)");
891 // FIXME: This can be short-circuited for Tpetra, if we decide we care
892 for(LO i=0; i<(LO)Nrows+1; i++)
893 P_rowptr[i] = tmp_rowptr[i];
894 for(LO i=0; i<(LO)tmp_rowptr[Nrows]; i++)
895 P_colind[i] = tmp_colind[i];
896
897
898 // Algorithm (numeric)
899 for(LO i=0; i < (LO)Nrows; i++) {
900 if(myPointType[i] == DIRICHLET_PT) {
901 // Dirichlet points get ignored completely
902#ifdef CMS_DEBUG
903 // DEBUG
904 printf("** A(%d,:) is a Dirichlet-Point.\n",i);
905#endif
906 }
907 else if (myPointType[i] == C_PT) {
908 // C Points get a single 1 in their row
909 P_values[P_rowptr[i]] = Teuchos::ScalarTraits<SC>::one();
910#ifdef CMS_DEBUG
911 // DEBUG
912 printf("** A(%d,:) is a C-Point.\n",i);
913#endif
914 }
915 else {
916 /* Trottenberg et al. (A.7.6) and (A.7.7) on p. 479 gives this: */
917 /* alpha_i = \frac{ \sum_{j\in N_i} a_ij^- }{ \sum_{k\in P_i} a_ik^- } */
918 /* beta_i = \frac{ \sum_{j\in N_i} a_ij^+ }{ \sum_{k\in P_i} a_ik^+ } */
919 /* w_ik = { - alpha_i (a_ik / a_ii), if k\in P_i^- */
920 /* { - beta_i (a_ik / a_ii), if k\in P_i^+ */
921 ArrayView<const LO> A_indices_i, A_incides_k;
922 ArrayView<const SC> A_vals_i, A_indices_k;
923 A.getLocalRowView(i, A_indices_i, A_vals_i);
924 size_t row_start = eis_rowptr[i];
925
926 ArrayView<LO> P_indices_i = P_colind.view(P_rowptr[i],P_rowptr[i+1] - P_rowptr[i]);
927 ArrayView<SC> P_vals_i = P_values.view(P_rowptr[i],P_rowptr[i+1] - P_rowptr[i]);
928
929#ifdef CMS_DEBUG
930 // DEBUG
931 {
932 char mylabel[5]="FUCD";
933 char sw[3]="ws";
934 printf("** A(%d,:) = ",i);
935 for(LO j=0; j<(LO)A_indices_i.size(); j++){
936 printf("%6.4e(%d-%c%c) ",A_vals_i[j],A_indices_i[j],mylabel[1+myPointType[A_indices_i[j]]],sw[(int)edgeIsStrong[row_start+j]]);
937 }
938 printf("\n");
939 }
940#endif
941
942 SC a_ii = SC_ZERO;
943 SC pos_numerator = SC_ZERO, neg_numerator = SC_ZERO;
944 SC pos_denominator = SC_ZERO, neg_denominator = SC_ZERO;
945 // Find the diagonal and compute the sum ratio
946 for(LO j=0; j<(LO)A_indices_i.size(); j++) {
947 SC a_ik = A_vals_i[j];
948 LO k = A_indices_i[j];
949
950 // Diagonal
951 if(i == k) {
952 a_ii = a_ik;
953 }
954 // Only strong C-neighbors are in the denomintor
955 if(myPointType[k] == C_PT && edgeIsStrong[row_start + j]) {
956 if(STS::real(a_ik) > MT_ZERO) pos_denominator += a_ik;
957 else neg_denominator += a_ik;
958 }
959
960 // All neighbors are in the numerator
961 // NOTE: As per PyAMG, this does not include the diagonal
962 if(i != k) {
963 if(STS::real(a_ik) > MT_ZERO) pos_numerator += a_ik;
964 else neg_numerator += a_ik;
965 }
966 }
967 SC alpha = (neg_denominator == MT_ZERO) ? SC_ZERO : (neg_numerator / neg_denominator);
968 SC beta = (pos_denominator == MT_ZERO) ? SC_ZERO : (pos_numerator / pos_denominator);
969 alpha /= -a_ii;
970 beta /= -a_ii;
971
972 // Loop over the entries
973 for(LO p_j=0; p_j<(LO)P_indices_i.size(); p_j++){
974 LO P_col = pcol2cpoint[P_indices_i[p_j]];
975 SC a_ij = SC_ZERO;
976
977 // Find A_ij (if it is there)
978 // FIXME: We can optimize this if we assume sorting
979 for(LO a_j =0; a_j<(LO)A_indices_i.size(); a_j++) {
980 if(A_indices_i[a_j] == P_col) {
981 a_ij = A_vals_i[a_j];
982 break;
983 }
984 }
985 SC w_ij = (STS::real(a_ij) < 0 ) ? (alpha * a_ij) : (beta * a_ij);
986#ifdef CMS_DEBUG
987 SC alpha_or_beta = (STS::real(a_ij) < 0 ) ? alpha : beta;
988 printf("P(%d,%d/%d) = - %6.4e * %6.4e = %6.4e\n",i,P_indices_i[p_j],pcol2cpoint[P_indices_i[p_j]],alpha_or_beta,a_ij,w_ij);
989#endif
990 P_vals_i[p_j] = w_ij;
991 }//end for A_indices_i
992 }//end else C_PT
993 }//end for Numrows
994
995 // Finish up
996 PCrs->setAllValues(P_rowptr, P_colind, P_values);
997 PCrs->expertStaticFillComplete(/*domain*/coarseDomainMap, /*range*/A.getDomainMap());
998}
999
1000
1001/* ************************************************************************* */
1002template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1004Coarsen_Ext_Plus_I(const Matrix & A,const RCP<const Matrix> & Aghost, const GraphBase & graph, RCP<const Map> & coarseColMap, RCP<const Map> & coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO> & myPointType, const Teuchos::ArrayView<const LO> & myPointType_ghost, const Teuchos::Array<LO> & cpoint2pcol, const Teuchos::Array<LO> & pcol2cpoint, Teuchos::Array<size_t> & eis_rowptr, Teuchos::Array<bool> & edgeIsStrong, RCP<LocalOrdinalVector> & BlockNumber, RCP<Matrix> & P) const {
1005
1006 /* ============================================================= */
1007 /* Phase 3 : Extended+i Interpolation */
1008 /* De Sterck, Falgout, Nolting and Yang. "Distance-two */
1009 /* interpolation for parallel algebraic multigrid", NLAA 2008 */
1010 /* 15:115-139 */
1011 /* ============================================================= */
1012 /* Definitions: */
1013 /* F = F-points */
1014 /* C = C-points */
1015 /* N_i = non-zero neighbors of node i */
1016 /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
1017 /* F_i^s = F \cap S_i [strong F-neighbors of i] */
1018 /* C_i^s = C \cap S_i [strong C-neighbors of i] */
1019 /* N_i^w = N_i\ (F_i^s \cup C_i^s) [weak neighbors of i] */
1020 /* This guy has a typo. The paper had a \cap instead of \cup */
1021 /* I would note that this set can contain both F-points and */
1022 /* C-points. They're just weak neighbors of this guy. */
1023 /* Note that N_i^w \cup F_i^s \cup C_i^s = N_i by construction */
1024
1025 /* \hat{C}_i = C_i \cup (\bigcup_{j\inF_i^s} C_j) */
1026 /* [C-neighbors and C-neighbors of strong F-neighbors of i] */
1027 /* */
1028
1029 /* \bar{a}_ij = { 0, if sign(a_ij) == sign(a_ii) */
1030 /* { a_ij, otherwise */
1031
1032
1033 /* Rewritten Equation (19) on p. 123 */
1034 /* f_ik = \frac{\bar{a}_kj}{\sum{l\in \hat{C}_i\cup {i}} \bar{a}_kl */
1035 /* w_ij = -\tilde{a}_ii^{-1} (a_ij + \sum_{k\inF_i^s} a_ik f_ik */
1036 /* for j in \hat{C}_i */
1037
1038 /* Rewritten Equation (20) on p. 124 [for the lumped diagonal] */
1039 /* g_ik = \frac{\bar{a}_ki}{\sum{l\in \hat{C}_i\cup {i}} \bar{a}_kl */
1040 /* \tilde{a}_ii = a_ii + \sum_{n\inN_i^w\setminus \hat{C}_i} a_in + \sum_{k\inF_i^s} a_ik g_ik */
1041 TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"ClassicalPFactory: Ext+i not implemented");
1042
1043}
1044
1045
1046
1047
1048/* ************************************************************************* */
1049template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1051GenerateStrengthFlags(const Matrix & A,const GraphBase & graph, Teuchos::Array<size_t> & eis_rowptr,Teuchos::Array<bool> & edgeIsStrong) const {
1052 // To make this easier, we'll create a bool array equal to the nnz in the matrix
1053 // so we know whether each edge is strong or not. This will save us a bunch of
1054 // trying to match the graph and matrix later
1055 size_t Nrows = A.getLocalNumRows();
1056 eis_rowptr.resize(Nrows+1);
1057
1058 if(edgeIsStrong.size() == 0) {
1059 // Preferred
1060 edgeIsStrong.resize(A.getLocalNumEntries(),false);
1061 }
1062 else {
1063 edgeIsStrong.resize(A.getLocalNumEntries(),false);
1064 edgeIsStrong.assign(A.getLocalNumEntries(),false);
1065 }
1066
1067 eis_rowptr[0] = 0;
1068 for (LO i=0; i<(LO)Nrows; i++) {
1069 LO rowstart = eis_rowptr[i];
1070 ArrayView<const LO> A_indices;
1071 ArrayView<const SC> A_values;
1072 A.getLocalRowView(i, A_indices, A_values);
1073 LO A_size = (LO) A_indices.size();
1074
1075 ArrayView<const LO> G_indices = graph.getNeighborVertices(i);
1076 LO G_size = (LO) G_indices.size();
1077
1078 // Both of these guys should be in the same (sorted) order, but let's check
1079 bool is_ok=true;
1080 for(LO j=0; j<A_size-1; j++)
1081 if (A_indices[j] >= A_indices[j+1]) { is_ok=false; break;}
1082 for(LO j=0; j<G_size-1; j++)
1083 if (G_indices[j] >= G_indices[j+1]) { is_ok=false; break;}
1084 TEUCHOS_TEST_FOR_EXCEPTION(!is_ok, Exceptions::RuntimeError,"ClassicalPFactory: Exected A and Graph to be sorted");
1085
1086 // Now cycle through and set the flags - if the edge is in G it is strong
1087 for(LO g_idx=0, a_idx=0; g_idx < G_size; g_idx++) {
1088 LO col = G_indices[g_idx];
1089 while (A_indices[a_idx] != col && a_idx < A_size) a_idx++;
1090 if(a_idx == A_size) {is_ok=false;break;}
1091 edgeIsStrong[rowstart+a_idx] = true;
1092 }
1093
1094 eis_rowptr[i+1] = eis_rowptr[i] + A_size;
1095 }
1096}
1097
1098
1099/* ************************************************************************* */
1100template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1102GhostCoarseMap(const Matrix &A, const Import & Importer, const ArrayRCP<const LO> myPointType, const RCP<const Map> & coarseMap, RCP<const Map> & coarseColMap) const {
1104 const GO GO_INVALID = Teuchos::OrdinalTraits<GO>::invalid();
1105 RCP<GlobalOrdinalVector> d_coarseIds = GlobalOrdinalVectorFactory::Build(A.getRowMap());
1106 ArrayRCP<GO> d_data = d_coarseIds->getDataNonConst(0);
1107 LO ct=0;
1108
1109 for(LO i=0; i<(LO)d_data.size(); i++) {
1110 if(myPointType[i] == C_PT) {
1111 d_data[i] = coarseMap->getGlobalElement(ct);
1112 ct++;
1113 }
1114 else
1115 d_data[i] = GO_INVALID;
1116 }
1117
1118 // Ghost this guy
1119 RCP<GlobalOrdinalVector> c_coarseIds = GlobalOrdinalVectorFactory::Build(A.getColMap());
1120 c_coarseIds->doImport(*d_coarseIds,Importer,Xpetra::INSERT);
1121
1122 // If we assume that A is in Aztec ordering, then any subset of A's unknowns will
1123 // be in Aztec ordering as well, which means we can just condense these guys down
1124 // Overallocate, count and view
1125 ArrayRCP<GO> c_data = c_coarseIds->getDataNonConst(0);
1126
1127 Array<GO> c_gids(c_data.size());
1128 LO count=0;
1129
1130 for(LO i=0; i<(LO)c_data.size(); i++) {
1131 if(c_data[i] != GO_INVALID) {
1132 c_gids[count] = c_data[i];
1133 count++;
1134 }
1135 }
1136 // FIXME: Assumes scalar PDE
1137 std::vector<size_t> stridingInfo_(1);
1138 stridingInfo_[0]=1;
1139 GO domainGIDOffset = 0;
1140
1141 coarseColMap = StridedMapFactory::Build(coarseMap->lib(),
1142 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
1143 c_gids.view(0,count),
1144 coarseMap->getIndexBase(),
1145 stridingInfo_,
1146 coarseMap->getComm(),
1147 domainGIDOffset);
1148
1149}
1150
1151
1152} //namespace MueLu
1153
1154
1155
1156#define MUELU_CLASSICALPFACTORY_SHORT
1157#endif // MUELU_CLASSICALPFACTORY_DEF_HPP
1158
1159
#define SET_VALID_ENTRY(name)
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void Coarsen_Direct(const Matrix &A, const RCP< const Matrix > &Aghost, const GraphBase &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< Matrix > &P) const
void GenerateStrengthFlags(const Matrix &A, const GraphBase &graph, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void Coarsen_Ext_Plus_I(const Matrix &A, const RCP< const Matrix > &Aghost, const GraphBase &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< Matrix > &P) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
typename ClassicalMapFactory::point_type point_type
void Coarsen_ClassicalModified(const Matrix &A, const RCP< const Matrix > &Aghost, const GraphBase &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< const Import > remoteOnlyImporter, RCP< Matrix > &P) const
void GhostCoarseMap(const Matrix &A, const Import &Importer, const ArrayRCP< const LO > myPointType, const RCP< const Map > &coarseMap, RCP< const Map > &coarseColMap) const
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
MueLu representation of a graph.
virtual size_t GetNodeNumEdges() const =0
Return number of edges owned by the calling node.
virtual Teuchos::ArrayView< const LocalOrdinal > getNeighborVertices(LocalOrdinal v) const =0
Return the list of vertices adjacent to the vertex 'v'.
virtual size_t GetNodeNumVertices() const =0
Return number of vertices owned by the calling node.
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
Namespace for MueLu classes and methods.
@ Debug
Print additional debugging information.
@ Statistics1
Print more statistics.