MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_SaPFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_SAPFACTORY_KOKKOS_DEF_HPP
47#define MUELU_SAPFACTORY_KOKKOS_DEF_HPP
48
49#ifdef out
50#include "KokkosKernels_Handle.hpp"
51#include "KokkosSparse_spgemm.hpp"
52#include "KokkosSparse_spmv.hpp"
53#endif
55
56#include <Xpetra_Matrix.hpp>
57#include <Xpetra_IteratorOps.hpp>
58
60#include "MueLu_Level.hpp"
61#include "MueLu_MasterList.hpp"
62#include "MueLu_Monitor.hpp"
63#include "MueLu_PerfUtils.hpp"
65#include "MueLu_TentativePFactory.hpp"
66#include "MueLu_Utilities_kokkos.hpp"
67
68#include <sstream>
69
70namespace MueLu {
71
72 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
74 RCP<ParameterList> validParamList = rcp(new ParameterList());
75
76#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
77 SET_VALID_ENTRY("sa: damping factor");
78 SET_VALID_ENTRY("sa: calculate eigenvalue estimate");
79 SET_VALID_ENTRY("sa: eigenvalue estimate num iterations");
80 SET_VALID_ENTRY("sa: use rowsumabs diagonal scaling");
81 SET_VALID_ENTRY("sa: enforce constraints");
82 SET_VALID_ENTRY("tentative: calculate qr");
83 SET_VALID_ENTRY("sa: max eigenvalue");
84#undef SET_VALID_ENTRY
85
86 validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process");
87 validParamList->set< RCP<const FactoryBase> >("P", Teuchos::null, "Tentative prolongator factory");
88
89 // Make sure we don't recursively validate options for the matrixmatrix kernels
90 ParameterList norecurse;
91 norecurse.disableRecursiveValidation();
92 validParamList->set<ParameterList> ("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
93
94
95 return validParamList;
96 }
97
98 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
100 Input(fineLevel, "A");
101
102 // Get default tentative prolongator factory
103 // Getting it that way ensure that the same factory instance will be used for both SaPFactory_kokkos and NullspaceFactory.
104 RCP<const FactoryBase> initialPFact = GetFactory("P");
105 if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); }
106 coarseLevel.DeclareInput("P", initialPFact.get(), this);
107 }
108
109 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
111 return BuildP(fineLevel, coarseLevel);
112 }
113
114 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
116 FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel);
117
118 typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude;
119
120 // Get default tentative prolongator factory
121 // Getting it that way ensure that the same factory instance will be used for both SaPFactory_kokkos and NullspaceFactory.
122 // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
123 RCP<const FactoryBase> initialPFact = GetFactory("P");
124 if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); }
125 const ParameterList& pL = GetParameterList();
126
127 // Level Get
128 RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A");
129 RCP<Matrix> Ptent = coarseLevel.Get< RCP<Matrix> >("P", initialPFact.get());
130 RCP<Matrix> finalP;
131 // If Tentative facctory bailed out (e.g., number of global aggregates is 0), then SaPFactory bails
132 // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
133 if (Ptent == Teuchos::null) {
134 finalP = Teuchos::null;
135 Set(coarseLevel, "P", finalP);
136 return;
137 }
138
139 if(restrictionMode_) {
140 SubFactoryMonitor m2(*this, "Transpose A", coarseLevel);
141
142 A = Utilities_kokkos::Transpose(*A, true); // build transpose of A explicitly
143 }
144
145 //Build final prolongator
146
147 // Reuse pattern if available
148 RCP<ParameterList> APparams;
149 if(pL.isSublist("matrixmatrix: kernel params"))
150 APparams=rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
151 else
152 APparams= rcp(new ParameterList);
153 if (coarseLevel.IsAvailable("AP reuse data", this)) {
154 GetOStream(static_cast<MsgType>(Runtime0 | Test)) << "Reusing previous AP data" << std::endl;
155
156 APparams = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", this);
157
158 if (APparams->isParameter("graph"))
159 finalP = APparams->get< RCP<Matrix> >("graph");
160 }
161 // By default, we don't need global constants for SaP
162 APparams->set("compute global constants: temporaries",APparams->get("compute global constants: temporaries",false));
163 APparams->set("compute global constants",APparams->get("compute global constants",false));
164
165 const SC dampingFactor = as<SC>(pL.get<double>("sa: damping factor"));
166 const LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations"));
167 const bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate");
168 const bool useAbsValueRowSum = pL.get<bool> ("sa: use rowsumabs diagonal scaling");
169 const bool doQRStep = pL.get<bool>("tentative: calculate qr");
170 const bool enforceConstraints= pL.get<bool>("sa: enforce constraints");
171 const SC userDefinedMaxEigen = as<SC>(pL.get<double>("sa: max eigenvalue"));
172
173 // Sanity checking
174 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && enforceConstraints,Exceptions::RuntimeError,
175 "MueLu::TentativePFactory::MakeTentative: cannot use 'enforce constraints' and 'calculate qr' at the same time");
176
177 if (dampingFactor != Teuchos::ScalarTraits<SC>::zero()) {
178
179 SC lambdaMax;
180 RCP<Vector> invDiag;
181 if (Teuchos::ScalarTraits<SC>::real(userDefinedMaxEigen) == Teuchos::ScalarTraits<SC>::real(-1.0))
182 {
183 SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
184 lambdaMax = A->GetMaxEigenvalueEstimate();
185 if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
186 GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = "<< maxEigenIterations <<
187 ( (useAbsValueRowSum) ? ", use rowSumAbs diagonal)" : ", use point diagonal)") << std::endl;
188 Magnitude stopTol = 1e-4;
189 invDiag = Utilities_kokkos::GetMatrixDiagonalInverse(*A, Teuchos::ScalarTraits<SC>::eps()*100, useAbsValueRowSum);
190 if (useAbsValueRowSum)
191 lambdaMax = Utilities_kokkos::PowerMethod(*A, invDiag, maxEigenIterations, stopTol);
192 else
193 lambdaMax = Utilities_kokkos::PowerMethod(*A, true, maxEigenIterations, stopTol);
194 A->SetMaxEigenvalueEstimate(lambdaMax);
195 } else {
196 GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl;
197 }
198 }
199 else {
200 lambdaMax = userDefinedMaxEigen;
201 A->SetMaxEigenvalueEstimate(lambdaMax);
202 }
203 GetOStream(Statistics0) << "Prolongator damping factor = " << dampingFactor/lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl;
204
205 {
206 SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel);
207 {
208 SubFactoryMonitor m3(*this, "Diagonal Extraction", coarseLevel);
209 if (useAbsValueRowSum)
210 GetOStream(Runtime0) << "Using rowSumAbs diagonal" << std::endl;
211 if (invDiag == Teuchos::null)
212 invDiag = Utilities_kokkos::GetMatrixDiagonalInverse(*A, Teuchos::ScalarTraits<SC>::eps()*100, useAbsValueRowSum);
213 }
214 SC omega = dampingFactor / lambdaMax;
215 TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(Teuchos::ScalarTraits<SC>::magnitude(omega)), Exceptions::RuntimeError, "Prolongator damping factor needs to be finite.");
216
217 {
218 SubFactoryMonitor m3(*this, "Xpetra::IteratorOps::Jacobi", coarseLevel);
219 // finalP = Ptent + (I - \omega D^{-1}A) Ptent
220 finalP = Xpetra::IteratorOps<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2), std::string("MueLu::SaP-") + toString(coarseLevel.GetLevelID()), APparams);
221 if (enforceConstraints) SatisfyPConstraints( A, finalP);
222 }
223 }
224
225 } else {
226 finalP = Ptent;
227 }
228
229 // Level Set
230 if (!restrictionMode_) {
231 // prolongation factory is in prolongation mode
232 if(!finalP.is_null()) {std::ostringstream oss; oss << "P_" << coarseLevel.GetLevelID(); finalP->setObjectLabel(oss.str());}
233 Set(coarseLevel, "P", finalP);
234
235 // NOTE: EXPERIMENTAL
236 if (Ptent->IsView("stridedMaps"))
237 finalP->CreateView("stridedMaps", Ptent);
238
239 } else {
240 // prolongation factory is in restriction mode
241 RCP<Matrix> R = Utilities_kokkos::Transpose(*finalP, true);
242 Set(coarseLevel, "R", R);
243 if(!R.is_null()) {std::ostringstream oss; oss << "R_" << coarseLevel.GetLevelID(); R->setObjectLabel(oss.str());}
244
245 // NOTE: EXPERIMENTAL
246 if (Ptent->IsView("stridedMaps"))
247 R->CreateView("stridedMaps", Ptent, true);
248 }
249
250 if (IsPrint(Statistics2)) {
251 RCP<ParameterList> params = rcp(new ParameterList());
252 params->set("printLoadBalancingInfo", true);
253 params->set("printCommInfo", true);
254 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*finalP, (!restrictionMode_ ? "P" : "R"), params);
255 }
256
257 } //Build()
258
259 // Analyze the grid transfer produced by smoothed aggregation and make
260 // modifications if it does not look right. In particular, if there are
261 // negative entries or entries larger than 1, modify P's rows.
262 //
263 // Note: this kind of evaluation probably only makes sense if not doing QR
264 // when constructing tentative P.
265 //
266 // For entries that do not satisfy the two constraints (>= 0 or <=1) we set
267 // these entries to the constraint value and modify the rest of the row
268 // so that the row sum remains the same as before by adding an equal
269 // amount to each remaining entry. However, if the original row sum value
270 // violates the constraints, we set the row sum back to 1 (the row sum of
271 // tentative P). After doing the modification to a row, we need to check
272 // again the entire row to make sure that the modified row does not violate
273 // the constraints.
274
275template<typename local_matrix_type>
277
278 using Scalar= typename local_matrix_type::non_const_value_type;
279 using SC=Scalar;
280 using LO=typename local_matrix_type::non_const_ordinal_type;
281 using Device= typename local_matrix_type::device_type;
282 const Scalar zero = Kokkos::ArithTraits<SC>::zero();
283 const Scalar one = Kokkos::ArithTraits<SC>::one();
285 local_matrix_type localP;
286 Kokkos::View<SC**, Device> ConstraintViolationSum;
287 Kokkos::View<SC**, Device> Rsum;
288 Kokkos::View<size_t**, Device> nPositive;
289
290 constraintKernel(LO nPDEs_,local_matrix_type localP_) : nPDEs(nPDEs_), localP(localP_)
291 {
292 ConstraintViolationSum = Kokkos::View<SC**, Device>("ConstraintViolationSum", localP_.numRows(), nPDEs);
293 Rsum = Kokkos::View<SC**, Device>("Rsum", localP_.numRows(), nPDEs);
294 nPositive = Kokkos::View<size_t**, Device>("nPositive", localP_.numRows(), nPDEs);
295 }
296
297 KOKKOS_INLINE_FUNCTION
298 void operator() (const size_t rowIdx) const {
299
300 auto rowPtr = localP.graph.row_map;
301 auto values = localP.values;
302
303 bool checkRow = true;
304
305 if (rowPtr(rowIdx + 1) == rowPtr(rowIdx)) checkRow = false;
306
307
308 while (checkRow) {
309
310 // check constraints and compute the row sum
311
312 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
313 Rsum(rowIdx, entryIdx%nPDEs) += values(entryIdx);
314 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) < Kokkos::ArithTraits<SC>::real(zero)) {
315
316 ConstraintViolationSum(rowIdx, entryIdx%nPDEs) += values(entryIdx);
317 values(entryIdx) = zero;
318 }
319 else {
320 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) != Kokkos::ArithTraits<SC>::real(zero))
321 nPositive(rowIdx, entryIdx%nPDEs) = nPositive(rowIdx, entryIdx%nPDEs) + 1;
322
323 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) > Kokkos::ArithTraits<SC>::real(1.00001 )) {
324 ConstraintViolationSum(rowIdx, entryIdx%nPDEs) += (values(entryIdx) - one);
325 values(entryIdx) = one;
326 }
327 }
328 }
329
330 checkRow = false;
331
332 // take into account any row sum that violates the contraints
333
334 for (size_t k=0; k < (size_t) nPDEs; k++) {
335
336 if (Kokkos::ArithTraits<SC>::real(Rsum(rowIdx, k)) < Kokkos::ArithTraits<SC>::magnitude(zero)) {
337 ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) - Rsum(rowIdx, k); // rstumin
338 }
339 else if (Kokkos::ArithTraits<SC>::real(Rsum(rowIdx, k)) > Kokkos::ArithTraits<SC>::magnitude(1.00001)) {
340 ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k)+ (one - Rsum(rowIdx, k)); // rstumin
341 }
342 }
343
344 // check if row need modification
345 for (size_t k=0; k < (size_t) nPDEs; k++) {
346 if (Kokkos::ArithTraits<SC>::magnitude(ConstraintViolationSum(rowIdx, k)) != Kokkos::ArithTraits<SC>::magnitude(zero))
347 checkRow = true;
348 }
349 // modify row
350 if (checkRow) {
351 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
352 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) > Kokkos::ArithTraits<SC>::real(zero)) {
353 values(entryIdx) = values(entryIdx) +
354 (ConstraintViolationSum(rowIdx, entryIdx%nPDEs)/ (Scalar (nPositive(rowIdx, entryIdx%nPDEs)) != zero ? Scalar (nPositive(rowIdx, entryIdx%nPDEs)) : one));
355 }
356 }
357 for (size_t k=0; k < (size_t) nPDEs; k++) ConstraintViolationSum(rowIdx, k) = zero;
358 }
359 for (size_t k=0; k < (size_t) nPDEs; k++) Rsum(rowIdx, k) = zero;
360 for (size_t k=0; k < (size_t) nPDEs; k++) nPositive(rowIdx, k) = 0;
361 } // while (checkRow) ...
362
363 }
364};
365
366template<typename local_matrix_type>
368
369 using Scalar= typename local_matrix_type::non_const_value_type;
370 using SC=Scalar;
371 using LO=typename local_matrix_type::non_const_ordinal_type;
372 using Device= typename local_matrix_type::device_type;
373 using KAT = Kokkos::ArithTraits<SC>;
374 const Scalar zero = Kokkos::ArithTraits<SC>::zero();
375 const Scalar one = Kokkos::ArithTraits<SC>::one();
377 local_matrix_type localP;
378 Kokkos::View<SC**, Device> origSorted;
379 Kokkos::View<SC**, Device> fixedSorted;
380 Kokkos::View<LO**, Device> inds;
381
382 optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_) : nPDEs(nPDEs_), localP(localP_)
383 {
384 origSorted = Kokkos::View<SC**, Device>("origSorted" , localP_.numRows(), maxRowEntries_);
385 fixedSorted = Kokkos::View<SC**, Device>("fixedSorted", localP_.numRows(), maxRowEntries_);
386 inds = Kokkos::View<LO**, Device>("inds" , localP_.numRows(), maxRowEntries_);
387 }
388
389 KOKKOS_INLINE_FUNCTION
390 void operator() (const size_t rowIdx) const {
391
392 auto rowPtr = localP.graph.row_map;
393 auto values = localP.values;
394
395 auto nnz = rowPtr(rowIdx+1) - rowPtr(rowIdx);
396
397 if (nnz != 0) {
398
399 Scalar rsumTarget = zero;
400 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) rsumTarget += values(entryIdx);
401
402 {
403 SC aBigNumber;
404 SC rowSumDeviation, temp, delta;
405 SC closestToLeftBoundDist, closestToRghtBoundDist;
406 LO closestToLeftBound, closestToRghtBound;
407 bool flipped;
408
409 SC leftBound = zero;
410 SC rghtBound = one;
411 if ((KAT::real(rsumTarget) >= KAT::real(leftBound*(static_cast<SC>(nnz)))) &&
412 (KAT::real(rsumTarget) <= KAT::real(rghtBound*(static_cast<SC>(nnz))))){ // has Feasible solution
413
414 flipped = false;
415 // compute aBigNumber to handle some corner cases where we need
416 // something large so that an if statement will be false
417 aBigNumber = KAT::zero();
418 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++){
419 if ( KAT::magnitude( values(entryIdx) ) > KAT::magnitude(aBigNumber))
420 aBigNumber = KAT::magnitude( values(entryIdx) );
421 }
422 aBigNumber = aBigNumber+ (KAT::magnitude(leftBound) + KAT::magnitude(rghtBound))*(100*one);
423
424 LO ind = 0;
425 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++){
426 origSorted(rowIdx, ind) = values(entryIdx);
427 inds(rowIdx, ind) = ind;
428 ind++;
429 }
430
431 auto sortVals = Kokkos::subview( origSorted, rowIdx, Kokkos::ALL() );
432 auto sortInds = Kokkos::subview( inds, rowIdx, Kokkos::make_pair(0,ind));
433 // Need permutation indices to sort row values from smallest to largest,
434 // and unsort once row constraints are applied.
435 // serial insertion sort workaround from https://github.com/kokkos/kokkos/issues/645
436 for (LO i = 1; i < static_cast<LO>(nnz); ++i){
437 ind = sortInds(i);
438 LO j = i;
439
440 if ( KAT::real(sortVals(sortInds(i))) < KAT::real(sortVals(sortInds(0))) ){
441 for ( ; j > 0; --j) sortInds(j) = sortInds(j - 1);
442
443 sortInds(0) = ind;
444 } else {
445 for ( ; KAT::real(sortVals(ind)) < KAT::real(sortVals(sortInds(j-1))); --j) sortInds(j) = sortInds(j-1);
446
447 sortInds(j) = ind;
448 }
449 }
450
451
452 for (LO i = 0; i < static_cast<LO>(nnz); i++) origSorted(rowIdx, i) = values(rowPtr(rowIdx) + inds(rowIdx, i)); //values is no longer used
453 // find entry in origSorted just to the right of the leftBound
454 closestToLeftBound = 0;
455 while ((closestToLeftBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToLeftBound)) <= KAT::real(leftBound))) closestToLeftBound++;
456
457 // find entry in origSorted just to the right of the rghtBound
458 closestToRghtBound = closestToLeftBound;
459 while ((closestToRghtBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToRghtBound)) <= KAT::real(rghtBound))) closestToRghtBound++;
460
461 // compute distance between closestToLeftBound and the left bound and the
462 // distance between closestToRghtBound and the right bound.
463
464 closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
465 if (closestToRghtBound == static_cast<LO>(nnz)) closestToRghtBoundDist= aBigNumber;
466 else closestToRghtBoundDist= origSorted(rowIdx, closestToRghtBound) - rghtBound;
467
468 // compute how far the rowSum is off from the target row sum taking into account
469 // numbers that have been shifted to satisfy bound constraint
470
471 rowSumDeviation = leftBound*(static_cast<SC>(closestToLeftBound)) + (static_cast<SC>(nnz-closestToRghtBound))*rghtBound - rsumTarget;
472 for (LO i=closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted(rowIdx, i);
473
474 // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
475 // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
476 // Later we will flip the data back to its original form.
477 if (KAT::real(rowSumDeviation) < KAT::real(KAT::zero())) {
478 flipped = true;
479 temp = leftBound; leftBound = -rghtBound; rghtBound = temp;
480
481 /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
482
483 //TODO: the following is bad for GPU performance. Switch to bit shifting if brave.
484 if ((nnz%2) == 1) origSorted(rowIdx, (nnz/2) ) = -origSorted(rowIdx, (nnz/2) );
485 for (LO i=0; i < static_cast<LO>(nnz/2); i++) {
486 temp=origSorted(rowIdx, i);
487 origSorted(rowIdx, i) = -origSorted(rowIdx, nnz-1-i);
488 origSorted(rowIdx, nnz-i-1) = -temp;
489 }
490
491 /* reverse bounds */
492
493 LO itemp = closestToLeftBound;
494 closestToLeftBound = nnz-closestToRghtBound;
495 closestToRghtBound = nnz-itemp;
496 closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
497 if (closestToRghtBound == static_cast<LO>(nnz)) closestToRghtBoundDist= aBigNumber;
498 else closestToRghtBoundDist= origSorted(rowIdx, closestToRghtBound) - rghtBound;
499
500 rowSumDeviation = -rowSumDeviation;
501 }
502
503 // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
504
505 for (LO i = 0; i < closestToLeftBound; i++) fixedSorted(rowIdx, i) = leftBound;
506 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i);
507 for (LO i = closestToRghtBound; i < static_cast<LO>(nnz); i++) fixedSorted(rowIdx, i) = rghtBound;
508
509 while ((KAT::magnitude(rowSumDeviation) > KAT::magnitude((one*1.e-10)*rsumTarget))){ // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
510 if (closestToRghtBound != closestToLeftBound)
511 delta = rowSumDeviation/ static_cast<SC>(closestToRghtBound - closestToLeftBound);
512 else delta = aBigNumber;
513
514 if (KAT::magnitude(closestToLeftBoundDist) <= KAT::magnitude(closestToRghtBoundDist)) {
515 if (KAT::magnitude(delta) <= KAT::magnitude(closestToLeftBoundDist)) {
516 rowSumDeviation = zero;
517 for (LO i = closestToLeftBound; i < closestToRghtBound ; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
518 }
519 else {
520 rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
521 fixedSorted(rowIdx, closestToLeftBound) = leftBound;
522 closestToLeftBound++;
523 if (closestToLeftBound < static_cast<LO>(nnz)) closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
524 else closestToLeftBoundDist = aBigNumber;
525 }
526 }
527 else {
528 if (KAT::magnitude(delta) <= KAT::magnitude(closestToRghtBoundDist)) {
529 rowSumDeviation = 0;
530 for (LO i = closestToLeftBound; i < closestToRghtBound ; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
531 }
532 else {
533 rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
534 // if (closestToRghtBound < nEntries) {
535 fixedSorted(rowIdx, closestToRghtBound) = origSorted(rowIdx, closestToRghtBound);
536 closestToRghtBound++;
537 // }
538 if (closestToRghtBound >= static_cast<LO>(nnz)) closestToRghtBoundDist = aBigNumber;
539 else closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
540 }
541 }
542 }
543
544 auto rowStart = rowPtr(rowIdx);
545 if (flipped) {
546 /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
547
548 if ((nnz%2) == 1) fixedSorted(rowIdx, (nnz/2) ) = -fixedSorted(rowIdx, (nnz/2) );
549 for (LO i=0; i < static_cast<LO>(nnz/2); i++) {
550 temp=fixedSorted(rowIdx, i);
551 fixedSorted(rowIdx, i) = -fixedSorted(rowIdx, nnz-1-i);
552 fixedSorted(rowIdx, nnz-i-1) = -temp;
553 }
554 }
555 // unsort and update row values with new values
556 for (LO i = 0; i < static_cast<LO>(nnz); i++) values(rowStart + inds(rowIdx, i)) = fixedSorted(rowIdx, i);
557
558 } else { // row does not have feasible solution to match constraint
559 // just set all entries to the same value giving a row sum of 1
560 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) values(entryIdx) = one/( static_cast<SC>(nnz) );
561 }
562 }
563
564 }
565
566 }
567};
568
569
570 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
571 void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::SatisfyPConstraints(const RCP<Matrix> A, RCP<Matrix>& P) const {
572
573 using Device = typename Matrix::local_matrix_type::device_type;
574 LO nPDEs = A->GetFixedBlockSize();
575
576 using local_mat_type = typename Matrix::local_matrix_type;
577 constraintKernel<local_mat_type> myKernel(nPDEs,P->getLocalMatrixDevice() );
578 Kokkos::parallel_for("enforce constraint",Kokkos::RangePolicy<typename Device::execution_space>(0, P->getRowMap()->getLocalNumElements() ),
579 myKernel );
580
581 } //SatsifyPConstraints()
582
583 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
584 void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::optimalSatisfyPConstraintsForScalarPDEs( RCP<Matrix>& P) const {
585
586 using Device = typename Matrix::local_matrix_type::device_type;
587 LO nPDEs = 1;//A->GetFixedBlockSize();
588
589 using local_mat_type = typename Matrix::local_matrix_type;
590 optimalSatisfyConstraintsForScalarPDEsKernel<local_mat_type> myKernel(nPDEs,P->getLocalMaxNumRowEntries(),P->getLocalMatrixDevice() );
591 Kokkos::parallel_for("enforce constraint",Kokkos::RangePolicy<typename Device::execution_space>(0, P->getLocalNumRows() ),
592 myKernel );
593
594 } //SatsifyPConstraints()
595
596} //namespace MueLu
597
598#endif // MUELU_SAPFACTORY_KOKKOS_DEF_HPP
599
600//TODO: restrictionMode_ should use the parameter list.
#define SET_VALID_ENTRY(name)
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
Factory for building Smoothed Aggregation prolongators.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static SC PowerMethod(const Matrix &A, bool scaleByDiag=true, LO niters=10, Magnitude tolerance=1e-2, bool verbose=false, unsigned int seed=123)
Simple transpose for Tpetra::CrsMatrix types.
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=TST::eps() *100, const bool doLumped=false)
Extract Matrix Diagonal.
static RCP< Matrix > Transpose(Matrix &Op, bool optimizeTranspose=false, const std::string &label=std::string())
Transpose a Xpetra::Matrix.
Namespace for MueLu classes and methods.
@ Statistics2
Print even more statistics.
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
@ Statistics0
Print statistics that do not involve significant additional computation.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
scalar_type dampingFactor
typename local_matrix_type::device_type Device
typename local_matrix_type::non_const_value_type Scalar
Kokkos::View< SC **, Device > ConstraintViolationSum
Kokkos::View< size_t **, Device > nPositive
constraintKernel(LO nPDEs_, local_matrix_type localP_)
typename local_matrix_type::non_const_ordinal_type LO
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
Kokkos::View< SC **, Device > Rsum
typename local_matrix_type::non_const_value_type Scalar
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
typename local_matrix_type::non_const_ordinal_type LO
optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)