Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Experimental_RBILUK_def.hpp
1/*@HEADER
2// ***********************************************************************
3//
4// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40//@HEADER
41*/
42
43#ifndef IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
44#define IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
45
46#include "Tpetra_BlockMultiVector.hpp"
47#include "Tpetra_BlockView.hpp"
48#include "Ifpack2_OverlappingRowMatrix.hpp"
49#include "Ifpack2_LocalFilter.hpp"
50#include "Ifpack2_Utilities.hpp"
51#include "Ifpack2_RILUK.hpp"
52
53//#define IFPACK2_RBILUK_INITIAL
54#define IFPACK2_RBILUK_INITIAL_NOKK
55
56#ifndef IFPACK2_RBILUK_INITIAL_NOKK
57#include "KokkosBatched_Gemm_Decl.hpp"
58#include "KokkosBatched_Gemm_Serial_Impl.hpp"
59#include "KokkosBatched_Util.hpp"
60#endif
61
62namespace Ifpack2 {
63
64namespace Experimental {
65
66template<class MatrixType>
67RBILUK<MatrixType>::RBILUK (const Teuchos::RCP<const row_matrix_type>& Matrix_in)
68 : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) ),
69 A_(Matrix_in),
70 A_block_(Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(Matrix_in))
71{}
72
73template<class MatrixType>
74RBILUK<MatrixType>::RBILUK (const Teuchos::RCP<const block_crs_matrix_type>& Matrix_in)
75 : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) ),
76 A_block_(Matrix_in)
77{}
78
79
80template<class MatrixType>
82
83
84template<class MatrixType>
85void
86RBILUK<MatrixType>::setMatrix (const Teuchos::RCP<const block_crs_matrix_type>& A)
87{
88 // FIXME (mfh 04 Nov 2015) What about A_? When does that get (re)set?
89
90 // It's legal for A to be null; in that case, you may not call
91 // initialize() until calling setMatrix() with a nonnull input.
92 // Regardless, setting the matrix invalidates any previous
93 // factorization.
94 if (A.getRawPtr () != A_block_.getRawPtr ())
95 {
96 this->isAllocated_ = false;
97 this->isInitialized_ = false;
98 this->isComputed_ = false;
99 this->Graph_ = Teuchos::null;
100 L_block_ = Teuchos::null;
101 U_block_ = Teuchos::null;
102 D_block_ = Teuchos::null;
103 A_block_ = A;
104 }
105}
106
107
108
109template<class MatrixType>
110const typename RBILUK<MatrixType>::block_crs_matrix_type&
112{
113 TEUCHOS_TEST_FOR_EXCEPTION(
114 L_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getL: The L factor "
115 "is null. Please call initialize() (and preferably also compute()) "
116 "before calling this method. If the input matrix has not yet been set, "
117 "you must first call setMatrix() with a nonnull input matrix before you "
118 "may call initialize() or compute().");
119 return *L_block_;
120}
121
122
123template<class MatrixType>
124const typename RBILUK<MatrixType>::block_crs_matrix_type&
126{
127 TEUCHOS_TEST_FOR_EXCEPTION(
128 D_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getD: The D factor "
129 "(of diagonal entries) is null. Please call initialize() (and "
130 "preferably also compute()) before calling this method. If the input "
131 "matrix has not yet been set, you must first call setMatrix() with a "
132 "nonnull input matrix before you may call initialize() or compute().");
133 return *D_block_;
134}
135
136
137template<class MatrixType>
138const typename RBILUK<MatrixType>::block_crs_matrix_type&
140{
141 TEUCHOS_TEST_FOR_EXCEPTION(
142 U_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getU: The U factor "
143 "is null. Please call initialize() (and preferably also compute()) "
144 "before calling this method. If the input matrix has not yet been set, "
145 "you must first call setMatrix() with a nonnull input matrix before you "
146 "may call initialize() or compute().");
147 return *U_block_;
148}
149
150template<class MatrixType>
152{
153 using Teuchos::null;
154 using Teuchos::rcp;
155
156 if (! this->isAllocated_) {
157 // Deallocate any existing storage. This avoids storing 2x
158 // memory, since RCP op= does not deallocate until after the
159 // assignment.
160 this->L_ = null;
161 this->U_ = null;
162 this->D_ = null;
163 L_block_ = null;
164 U_block_ = null;
165 D_block_ = null;
166
167 // Allocate Matrix using ILUK graphs
168 L_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getL_Graph (), blockSize_) );
169 U_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getU_Graph (), blockSize_) );
170 D_block_ = rcp(new block_crs_matrix_type(*(Ifpack2::Details::computeDiagonalGraph(*(this->Graph_->getOverlapGraph()))),
171 blockSize_) );
172 L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
173 U_block_->setAllToScalar (STM::zero ());
174 D_block_->setAllToScalar (STM::zero ());
175
176 }
177 this->isAllocated_ = true;
178}
179
180template<class MatrixType>
181Teuchos::RCP<const typename RBILUK<MatrixType>::block_crs_matrix_type>
183 return A_block_;
184}
185
186template<class MatrixType>
188{
189 using Teuchos::RCP;
190 using Teuchos::rcp;
191 using Teuchos::rcp_dynamic_cast;
192 const char prefix[] = "Ifpack2::Experimental::RBILUK::initialize: ";
193
194 // FIXME (mfh 04 Nov 2015) Apparently it's OK for A_ to be null.
195 // That probably means that this preconditioner was created with a
196 // BlockCrsMatrix directly, so it doesn't need the LocalFilter.
197
198 // TEUCHOS_TEST_FOR_EXCEPTION
199 // (A_.is_null (), std::runtime_error, prefix << "The matrix (A_, the "
200 // "RowMatrix) is null. Please call setMatrix() with a nonnull input "
201 // "before calling this method.");
202
203 if (A_block_.is_null ()) {
204 // FIXME (mfh 04 Nov 2015) Why does the input have to be a
205 // LocalFilter? Why can't we just take a regular matrix, and
206 // apply a LocalFilter only if necessary, like other "local"
207 // Ifpack2 preconditioners already do?
208 RCP<const LocalFilter<row_matrix_type> > filteredA =
209 rcp_dynamic_cast<const LocalFilter<row_matrix_type> >(A_);
210 TEUCHOS_TEST_FOR_EXCEPTION
211 (filteredA.is_null (), std::runtime_error, prefix <<
212 "Cannot cast to filtered matrix.");
213 RCP<const OverlappingRowMatrix<row_matrix_type> > overlappedA =
214 rcp_dynamic_cast<const OverlappingRowMatrix<row_matrix_type> > (filteredA->getUnderlyingMatrix ());
215 if (! overlappedA.is_null ()) {
216 A_block_ = rcp_dynamic_cast<const block_crs_matrix_type>(overlappedA->getUnderlyingMatrix());
217 } else {
218 //If there is no overlap, filteredA could be the block CRS matrix
219 A_block_ = rcp_dynamic_cast<const block_crs_matrix_type>(filteredA->getUnderlyingMatrix());
220 }
221 }
222
223 TEUCHOS_TEST_FOR_EXCEPTION
224 (A_block_.is_null (), std::runtime_error, prefix << "The matrix (A_block_, "
225 "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
226 "input before calling this method.");
227 TEUCHOS_TEST_FOR_EXCEPTION
228 (! A_block_->isFillComplete (), std::runtime_error, prefix << "The matrix "
229 "(A_block_, the BlockCrsMatrix) is not fill complete. You may not invoke "
230 "initialize() or compute() with this matrix until the matrix is fill "
231 "complete. Note: BlockCrsMatrix is fill complete if and only if its "
232 "underlying graph is fill complete.");
233
234 blockSize_ = A_block_->getBlockSize();
235
236 Teuchos::Time timer ("RBILUK::initialize");
237 double startTime = timer.wallTime();
238 { // Start timing
239 Teuchos::TimeMonitor timeMon (timer);
240
241 // Calling initialize() means that the user asserts that the graph
242 // of the sparse matrix may have changed. We must not just reuse
243 // the previous graph in that case.
244 //
245 // Regarding setting isAllocated_ to false: Eventually, we may want
246 // some kind of clever memory reuse strategy, but it's always
247 // correct just to blow everything away and start over.
248 this->isInitialized_ = false;
249 this->isAllocated_ = false;
250 this->isComputed_ = false;
251 this->Graph_ = Teuchos::null;
252
253 typedef Tpetra::CrsGraph<local_ordinal_type,
255 node_type> crs_graph_type;
256
257 RCP<const crs_graph_type> matrixCrsGraph = Teuchos::rcpFromRef(A_block_->getCrsGraph() );
258 this->Graph_ = rcp (new Ifpack2::IlukGraph<crs_graph_type,kk_handle_type> (matrixCrsGraph,
259 this->LevelOfFill_, 0));
260
261 this->Graph_->initialize ();
262 allocate_L_and_U_blocks ();
263#ifdef IFPACK2_RBILUK_INITIAL
264 initAllValues (*A_block_);
265#endif
266 } // Stop timing
267
268 this->isInitialized_ = true;
269 this->numInitialize_ += 1;
270 this->initializeTime_ += (timer.wallTime() - startTime);
271}
272
273
274template<class MatrixType>
275void
277initAllValues (const block_crs_matrix_type& A)
278{
279 using Teuchos::RCP;
280 typedef Tpetra::Map<LO,GO,node_type> map_type;
281
282 LO NumIn = 0, NumL = 0, NumU = 0;
283 bool DiagFound = false;
284 size_t NumNonzeroDiags = 0;
285 size_t MaxNumEntries = A.getLocalMaxNumRowEntries();
286 LO blockMatSize = blockSize_*blockSize_;
287
288 // First check that the local row map ordering is the same as the local portion of the column map.
289 // The extraction of the strictly lower/upper parts of A, as well as the factorization,
290 // implicitly assume that this is the case.
291 Teuchos::ArrayView<const GO> rowGIDs = A.getRowMap()->getLocalElementList();
292 Teuchos::ArrayView<const GO> colGIDs = A.getColMap()->getLocalElementList();
293 bool gidsAreConsistentlyOrdered=true;
294 GO indexOfInconsistentGID=0;
295 for (GO i=0; i<rowGIDs.size(); ++i) {
296 if (rowGIDs[i] != colGIDs[i]) {
297 gidsAreConsistentlyOrdered=false;
298 indexOfInconsistentGID=i;
299 break;
300 }
301 }
302 TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered==false, std::runtime_error,
303 "The ordering of the local GIDs in the row and column maps is not the same"
304 << std::endl << "at index " << indexOfInconsistentGID
305 << ". Consistency is required, as all calculations are done with"
306 << std::endl << "local indexing.");
307
308 // Allocate temporary space for extracting the strictly
309 // lower and upper parts of the matrix A.
310 Teuchos::Array<LO> LI(MaxNumEntries);
311 Teuchos::Array<LO> UI(MaxNumEntries);
312 Teuchos::Array<scalar_type> LV(MaxNumEntries*blockMatSize);
313 Teuchos::Array<scalar_type> UV(MaxNumEntries*blockMatSize);
314
315 Teuchos::Array<scalar_type> diagValues(blockMatSize);
316
317 L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
318 U_block_->setAllToScalar (STM::zero ());
319 D_block_->setAllToScalar (STM::zero ()); // Set diagonal values to zero
320
321 // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
322 // host, so sync to host first. The const_cast is unfortunate but
323 // is our only option to make this correct.
324
325 /*
326 const_cast<block_crs_matrix_type&> (A).sync_host ();
327 L_block_->sync_host ();
328 U_block_->sync_host ();
329 D_block_->sync_host ();
330 // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
331 L_block_->modify_host ();
332 U_block_->modify_host ();
333 D_block_->modify_host ();
334 */
335
336 RCP<const map_type> rowMap = L_block_->getRowMap ();
337
338 // First we copy the user's matrix into L and U, regardless of fill level.
339 // It is important to note that L and U are populated using local indices.
340 // This means that if the row map GIDs are not monotonically increasing
341 // (i.e., permuted or gappy), then the strictly lower (upper) part of the
342 // matrix is not the one that you would get if you based L (U) on GIDs.
343 // This is ok, as the *order* of the GIDs in the rowmap is a better
344 // expression of the user's intent than the GIDs themselves.
345
346 //TODO BMK: Revisit this fence when BlockCrsMatrix is refactored.
347 Kokkos::fence();
348 using inds_type = typename row_matrix_type::local_inds_host_view_type;
349 using vals_type = typename row_matrix_type::values_host_view_type;
350 for (size_t myRow=0; myRow<A.getLocalNumRows(); ++myRow) {
351 LO local_row = myRow;
352
353 //TODO JJH 4April2014 An optimization is to use getLocalRowView. Not all matrices support this,
354 // we'd need to check via the Tpetra::RowMatrix method supportsRowViews().
355 inds_type InI;
356 vals_type InV;
357 A.getLocalRowView(local_row, InI, InV);
358 NumIn = (LO)InI.size();
359
360 // Split into L and U (we don't assume that indices are ordered).
361
362 NumL = 0;
363 NumU = 0;
364 DiagFound = false;
365
366 for (LO j = 0; j < NumIn; ++j) {
367 const LO k = InI[j];
368 const LO blockOffset = blockMatSize*j;
369
370 if (k == local_row) {
371 DiagFound = true;
372 // Store perturbed diagonal in Tpetra::Vector D_
373 for (LO jj = 0; jj < blockMatSize; ++jj)
374 diagValues[jj] = this->Rthresh_ * InV[blockOffset+jj] + IFPACK2_SGN(InV[blockOffset+jj]) * this->Athresh_;
375 D_block_->replaceLocalValues(local_row, &InI[j], diagValues.getRawPtr(), 1);
376 }
377 else if (k < 0) { // Out of range
378 TEUCHOS_TEST_FOR_EXCEPTION(
379 true, std::runtime_error, "Ifpack2::RILUK::initAllValues: current "
380 "GID k = " << k << " < 0. I'm not sure why this is an error; it is "
381 "probably an artifact of the undocumented assumptions of the "
382 "original implementation (likely copied and pasted from Ifpack). "
383 "Nevertheless, the code I found here insisted on this being an error "
384 "state, so I will throw an exception here.");
385 }
386 else if (k < local_row) {
387 LI[NumL] = k;
388 const LO LBlockOffset = NumL*blockMatSize;
389 for (LO jj = 0; jj < blockMatSize; ++jj)
390 LV[LBlockOffset+jj] = InV[blockOffset+jj];
391 NumL++;
392 }
393 else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
394 UI[NumU] = k;
395 const LO UBlockOffset = NumU*blockMatSize;
396 for (LO jj = 0; jj < blockMatSize; ++jj)
397 UV[UBlockOffset+jj] = InV[blockOffset+jj];
398 NumU++;
399 }
400 }
401
402 // Check in things for this row of L and U
403
404 if (DiagFound) {
405 ++NumNonzeroDiags;
406 } else
407 {
408 for (LO jj = 0; jj < blockSize_; ++jj)
409 diagValues[jj*(blockSize_+1)] = this->Athresh_;
410 D_block_->replaceLocalValues(local_row, &local_row, diagValues.getRawPtr(), 1);
411 }
412
413 if (NumL) {
414 L_block_->replaceLocalValues(local_row, &LI[0], &LV[0], NumL);
415 }
416
417 if (NumU) {
418 U_block_->replaceLocalValues(local_row, &UI[0], &UV[0], NumU);
419 }
420 }
421
422 // NOTE (mfh 27 May 2016) Sync back to device, in case compute()
423 // ever gets a device implementation.
424 /*
425 {
426 typedef typename block_crs_matrix_type::device_type device_type;
427 const_cast<block_crs_matrix_type&> (A).template sync<device_type> ();
428 L_block_->template sync<device_type> ();
429 U_block_->template sync<device_type> ();
430 D_block_->template sync<device_type> ();
431 }
432 */
433 this->isInitialized_ = true;
434}
435
436namespace { // (anonymous)
437
438// For a given Kokkos::View type, possibly unmanaged, get the
439// corresponding managed Kokkos::View type. This is handy for
440// translating from little_block_type or little_host_vec_type (both
441// possibly unmanaged) to their managed versions.
442template<class LittleBlockType>
443struct GetManagedView {
444 static_assert (Kokkos::is_view<LittleBlockType>::value,
445 "LittleBlockType must be a Kokkos::View.");
446 typedef Kokkos::View<typename LittleBlockType::non_const_data_type,
447 typename LittleBlockType::array_layout,
448 typename LittleBlockType::device_type> managed_non_const_type;
449 static_assert (static_cast<int> (managed_non_const_type::rank) ==
450 static_cast<int> (LittleBlockType::rank),
451 "managed_non_const_type::rank != LittleBlock::rank. "
452 "Please report this bug to the Ifpack2 developers.");
453};
454
455} // namespace (anonymous)
456
457template<class MatrixType>
459{
460 typedef impl_scalar_type IST;
461 const char prefix[] = "Ifpack2::Experimental::RBILUK::compute: ";
462
463 // initialize() checks this too, but it's easier for users if the
464 // error shows them the name of the method that they actually
465 // called, rather than the name of some internally called method.
466 TEUCHOS_TEST_FOR_EXCEPTION
467 (A_block_.is_null (), std::runtime_error, prefix << "The matrix (A_block_, "
468 "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
469 "input before calling this method.");
470 TEUCHOS_TEST_FOR_EXCEPTION
471 (! A_block_->isFillComplete (), std::runtime_error, prefix << "The matrix "
472 "(A_block_, the BlockCrsMatrix) is not fill complete. You may not invoke "
473 "initialize() or compute() with this matrix until the matrix is fill "
474 "complete. Note: BlockCrsMatrix is fill complete if and only if its "
475 "underlying graph is fill complete.");
476
477 if (! this->isInitialized ()) {
478 initialize (); // Don't count this in the compute() time
479 }
480
481 // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
482 // host, so sync to host first. The const_cast is unfortunate but
483 // is our only option to make this correct.
484 if (! A_block_.is_null ()) {
485 Teuchos::RCP<block_crs_matrix_type> A_nc =
486 Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
487 // A_nc->sync_host ();
488 }
489 /*
490 L_block_->sync_host ();
491 U_block_->sync_host ();
492 D_block_->sync_host ();
493 // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
494 L_block_->modify_host ();
495 U_block_->modify_host ();
496 D_block_->modify_host ();
497 */
498
499 Teuchos::Time timer ("RBILUK::compute");
500 double startTime = timer.wallTime();
501 { // Start timing
502 Teuchos::TimeMonitor timeMon (timer);
503 this->isComputed_ = false;
504
505 // MinMachNum should be officially defined, for now pick something a little
506 // bigger than IEEE underflow value
507
508// const scalar_type MinDiagonalValue = STS::rmin ();
509// const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
510 initAllValues (*A_block_);
511
512 size_t NumIn;
513 LO NumL, NumU, NumURead;
514
515 // Get Maximum Row length
516 const size_t MaxNumEntries =
517 L_block_->getLocalMaxNumRowEntries () + U_block_->getLocalMaxNumRowEntries () + 1;
518
519 const LO blockMatSize = blockSize_*blockSize_;
520
521 // FIXME (mfh 08 Nov 2015, 24 May 2016) We need to move away from
522 // expressing these strides explicitly, in order to finish #177
523 // (complete Kokkos-ization of BlockCrsMatrix) thoroughly.
524 const LO rowStride = blockSize_;
525
526 Teuchos::Array<int> ipiv_teuchos(blockSize_);
527 Kokkos::View<int*, Kokkos::HostSpace,
528 Kokkos::MemoryUnmanaged> ipiv (ipiv_teuchos.getRawPtr (), blockSize_);
529 Teuchos::Array<IST> work_teuchos(blockSize_);
530 Kokkos::View<IST*, Kokkos::HostSpace,
531 Kokkos::MemoryUnmanaged> work (work_teuchos.getRawPtr (), blockSize_);
532
533 size_t num_cols = U_block_->getColMap()->getLocalNumElements();
534 Teuchos::Array<int> colflag(num_cols);
535
536 typename GetManagedView<little_block_host_type>::managed_non_const_type diagModBlock ("diagModBlock", blockSize_, blockSize_);
537 typename GetManagedView<little_block_host_type>::managed_non_const_type matTmp ("matTmp", blockSize_, blockSize_);
538 typename GetManagedView<little_block_host_type>::managed_non_const_type multiplier ("multiplier", blockSize_, blockSize_);
539
540// Teuchos::ArrayRCP<scalar_type> DV = D_->get1dViewNonConst(); // Get view of diagonal
541
542 // Now start the factorization.
543
544 // Need some integer workspace and pointers
545 LO NumUU;
546 for (size_t j = 0; j < num_cols; ++j) {
547 colflag[j] = -1;
548 }
549 Teuchos::Array<LO> InI(MaxNumEntries, 0);
550 Teuchos::Array<scalar_type> InV(MaxNumEntries*blockMatSize,STM::zero());
551
552 const LO numLocalRows = L_block_->getLocalNumRows ();
553 for (LO local_row = 0; local_row < numLocalRows; ++local_row) {
554
555 // Fill InV, InI with current row of L, D and U combined
556
557 NumIn = MaxNumEntries;
558 local_inds_host_view_type colValsL;
559 values_host_view_type valsL;
560
561 L_block_->getLocalRowView(local_row, colValsL, valsL);
562 NumL = (LO) colValsL.size();
563 for (LO j = 0; j < NumL; ++j)
564 {
565 const LO matOffset = blockMatSize*j;
566 little_block_host_type lmat((typename little_block_host_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
567 little_block_host_type lmatV((typename little_block_host_type::value_type*) &InV[matOffset],blockSize_,rowStride);
568 //lmatV.assign(lmat);
569 Tpetra::COPY (lmat, lmatV);
570 InI[j] = colValsL[j];
571 }
572
573 little_block_host_type dmat = D_block_->getLocalBlockHostNonConst(local_row, local_row);
574 little_block_host_type dmatV((typename little_block_host_type::value_type*) &InV[NumL*blockMatSize], blockSize_, rowStride);
575 //dmatV.assign(dmat);
576 Tpetra::COPY (dmat, dmatV);
577 InI[NumL] = local_row;
578
579 local_inds_host_view_type colValsU;
580 values_host_view_type valsU;
581 U_block_->getLocalRowView(local_row, colValsU, valsU);
582 NumURead = (LO) colValsU.size();
583 NumU = 0;
584 for (LO j = 0; j < NumURead; ++j)
585 {
586 if (!(colValsU[j] < numLocalRows)) continue;
587 InI[NumL+1+j] = colValsU[j];
588 const LO matOffset = blockMatSize*(NumL+1+j);
589 little_block_host_type umat((typename little_block_host_type::value_type*) &valsU[blockMatSize*j], blockSize_, rowStride);
590 little_block_host_type umatV((typename little_block_host_type::value_type*) &InV[matOffset], blockSize_, rowStride);
591 //umatV.assign(umat);
592 Tpetra::COPY (umat, umatV);
593 NumU += 1;
594 }
595 NumIn = NumL+NumU+1;
596
597 // Set column flags
598 for (size_t j = 0; j < NumIn; ++j) {
599 colflag[InI[j]] = j;
600 }
601
602#ifndef IFPACK2_RBILUK_INITIAL
603 for (LO i = 0; i < blockSize_; ++i)
604 for (LO j = 0; j < blockSize_; ++j){
605 {
606 diagModBlock(i,j) = 0;
607 }
608 }
609#else
610 scalar_type diagmod = STM::zero (); // Off-diagonal accumulator
611 Kokkos::deep_copy (diagModBlock, diagmod);
612#endif
613
614 for (LO jj = 0; jj < NumL; ++jj) {
615 LO j = InI[jj];
616 little_block_host_type currentVal((typename little_block_host_type::value_type*) &InV[jj*blockMatSize], blockSize_, rowStride); // current_mults++;
617 //multiplier.assign(currentVal);
618 Tpetra::COPY (currentVal, multiplier);
619
620 const little_block_host_type dmatInverse = D_block_->getLocalBlockHostNonConst(j,j);
621 // alpha = 1, beta = 0
622#ifndef IFPACK2_RBILUK_INITIAL_NOKK
623 KokkosBatched::Experimental::SerialGemm
624 <KokkosBatched::Experimental::Trans::NoTranspose,
625 KokkosBatched::Experimental::Trans::NoTranspose,
626 KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
627 (STS::one (), currentVal, dmatInverse, STS::zero (), matTmp);
628#else
629 Tpetra::GEMM ("N", "N", STS::one (), currentVal, dmatInverse,
630 STS::zero (), matTmp);
631#endif
632 //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (currentVal.data ()), reinterpret_cast<impl_scalar_type*> (dmatInverse.data ()), reinterpret_cast<impl_scalar_type*> (matTmp.data ()), blockSize_);
633 //currentVal.assign(matTmp);
634 Tpetra::COPY (matTmp, currentVal);
635 local_inds_host_view_type UUI;
636 values_host_view_type UUV;
637
638 U_block_->getLocalRowView(j, UUI, UUV);
639 NumUU = (LO) UUI.size();
640
641 if (this->RelaxValue_ == STM::zero ()) {
642 for (LO k = 0; k < NumUU; ++k) {
643 if (!(UUI[k] < numLocalRows)) continue;
644 const int kk = colflag[UUI[k]];
645 if (kk > -1) {
646 little_block_host_type kkval((typename little_block_host_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
647 little_block_host_type uumat((typename little_block_host_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
648#ifndef IFPACK2_RBILUK_INITIAL_NOKK
649 KokkosBatched::Experimental::SerialGemm
650 <KokkosBatched::Experimental::Trans::NoTranspose,
651 KokkosBatched::Experimental::Trans::NoTranspose,
652 KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
653 ( magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
654#else
655 Tpetra::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
656 STM::one (), kkval);
657#endif
658 //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (multiplier.data ()), reinterpret_cast<impl_scalar_type*> (uumat.data ()), reinterpret_cast<impl_scalar_type*> (kkval.data ()), blockSize_, -STM::one(), STM::one());
659 }
660 }
661 }
662 else {
663 for (LO k = 0; k < NumUU; ++k) {
664 if (!(UUI[k] < numLocalRows)) continue;
665 const int kk = colflag[UUI[k]];
666 little_block_host_type uumat((typename little_block_host_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
667 if (kk > -1) {
668 little_block_host_type kkval((typename little_block_host_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
669#ifndef IFPACK2_RBILUK_INITIAL_NOKK
670 KokkosBatched::Experimental::SerialGemm
671 <KokkosBatched::Experimental::Trans::NoTranspose,
672 KokkosBatched::Experimental::Trans::NoTranspose,
673 KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
674 (magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
675#else
676 Tpetra::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
677 STM::one (), kkval);
678#endif
679 //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(kkval.data ()), blockSize_, -STM::one(), STM::one());
680 }
681 else {
682#ifndef IFPACK2_RBILUK_INITIAL_NOKK
683 KokkosBatched::Experimental::SerialGemm
684 <KokkosBatched::Experimental::Trans::NoTranspose,
685 KokkosBatched::Experimental::Trans::NoTranspose,
686 KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
687 (magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), diagModBlock);
688#else
689 Tpetra::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
690 STM::one (), diagModBlock);
691#endif
692 //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(diagModBlock.data ()), blockSize_, -STM::one(), STM::one());
693 }
694 }
695 }
696 }
697 if (NumL) {
698 // Replace current row of L
699 L_block_->replaceLocalValues (local_row, InI.getRawPtr (), InV.getRawPtr (), NumL);
700 }
701
702 // dmat.assign(dmatV);
703 Tpetra::COPY (dmatV, dmat);
704
705 if (this->RelaxValue_ != STM::zero ()) {
706 //dmat.update(this->RelaxValue_, diagModBlock);
707 Tpetra::AXPY (this->RelaxValue_, diagModBlock, dmat);
708 }
709
710// if (STS::magnitude (DV[i]) > STS::magnitude (MaxDiagonalValue)) {
711// if (STS::real (DV[i]) < STM::zero ()) {
712// DV[i] = -MinDiagonalValue;
713// }
714// else {
715// DV[i] = MinDiagonalValue;
716// }
717// }
718// else
719 {
720 int lapackInfo = 0;
721 for (int k = 0; k < blockSize_; ++k) {
722 ipiv[k] = 0;
723 }
724
725 Tpetra::GETF2 (dmat, ipiv, lapackInfo);
726 //lapack.GETRF(blockSize_, blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), &lapackInfo);
727 TEUCHOS_TEST_FOR_EXCEPTION(
728 lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
729 "lapackInfo = " << lapackInfo << " which indicates an error in the factorization GETRF.");
730
731 Tpetra::GETRI (dmat, ipiv, work, lapackInfo);
732 //lapack.GETRI(blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), work.getRawPtr(), lwork, &lapackInfo);
733 TEUCHOS_TEST_FOR_EXCEPTION(
734 lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
735 "lapackInfo = " << lapackInfo << " which indicates an error in the matrix inverse GETRI.");
736 }
737
738 for (LO j = 0; j < NumU; ++j) {
739 little_block_host_type currentVal((typename little_block_host_type::value_type*) &InV[(NumL+1+j)*blockMatSize], blockSize_, rowStride); // current_mults++;
740 // scale U by the diagonal inverse
741#ifndef IFPACK2_RBILUK_INITIAL_NOKK
742 KokkosBatched::Experimental::SerialGemm
743 <KokkosBatched::Experimental::Trans::NoTranspose,
744 KokkosBatched::Experimental::Trans::NoTranspose,
745 KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
746 (STS::one (), dmat, currentVal, STS::zero (), matTmp);
747#else
748 Tpetra::GEMM ("N", "N", STS::one (), dmat, currentVal,
749 STS::zero (), matTmp);
750#endif
751 //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(dmat.data ()), reinterpret_cast<impl_scalar_type*>(currentVal.data ()), reinterpret_cast<impl_scalar_type*>(matTmp.data ()), blockSize_);
752 //currentVal.assign(matTmp);
753 Tpetra::COPY (matTmp, currentVal);
754 }
755
756 if (NumU) {
757 // Replace current row of L and U
758 U_block_->replaceLocalValues (local_row, &InI[NumL+1], &InV[blockMatSize*(NumL+1)], NumU);
759 }
760
761#ifndef IFPACK2_RBILUK_INITIAL
762 // Reset column flags
763 for (size_t j = 0; j < NumIn; ++j) {
764 colflag[InI[j]] = -1;
765 }
766#else
767 // Reset column flags
768 for (size_t j = 0; j < num_cols; ++j) {
769 colflag[j] = -1;
770 }
771#endif
772 }
773
774 } // Stop timing
775
776 // Sync everything back to device, for efficient solves.
777 /*
778 {
779 typedef typename block_crs_matrix_type::device_type device_type;
780 if (! A_block_.is_null ()) {
781 Teuchos::RCP<block_crs_matrix_type> A_nc =
782 Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
783 A_nc->template sync<device_type> ();
784 }
785 L_block_->template sync<device_type> ();
786 U_block_->template sync<device_type> ();
787 D_block_->template sync<device_type> ();
788 }
789 */
790
791 this->isComputed_ = true;
792 this->numCompute_ += 1;
793 this->computeTime_ += (timer.wallTime() - startTime);
794}
795
796
797template<class MatrixType>
798void
800apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
801 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
802 Teuchos::ETransp mode,
803 scalar_type alpha,
804 scalar_type beta) const
805{
806 using Teuchos::RCP;
807 typedef Tpetra::BlockMultiVector<scalar_type,
809
810 TEUCHOS_TEST_FOR_EXCEPTION(
811 A_block_.is_null (), std::runtime_error, "Ifpack2::Experimental::RBILUK::apply: The matrix is "
812 "null. Please call setMatrix() with a nonnull input, then initialize() "
813 "and compute(), before calling this method.");
814 TEUCHOS_TEST_FOR_EXCEPTION(
815 ! this->isComputed (), std::runtime_error,
816 "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
817 "you must call compute() before calling this method.");
818 TEUCHOS_TEST_FOR_EXCEPTION(
819 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
820 "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
821 "X.getNumVectors() = " << X.getNumVectors ()
822 << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
823 TEUCHOS_TEST_FOR_EXCEPTION(
824 STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
825 "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
826 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
827 "fixed. There is a FIXME in this file about this very issue.");
828
829 const LO blockMatSize = blockSize_*blockSize_;
830
831 const LO rowStride = blockSize_;
832
833 BMV yBlock (Y, * (A_block_->getGraph ()->getDomainMap ()), blockSize_);
834 const BMV xBlock (X, * (A_block_->getColMap ()), blockSize_);
835
836 Teuchos::Array<scalar_type> lclarray(blockSize_);
837 little_host_vec_type lclvec((typename little_host_vec_type::value_type*)&lclarray[0], blockSize_);
838 const scalar_type one = STM::one ();
839 const scalar_type zero = STM::zero ();
840
841 Teuchos::Time timer ("RBILUK::apply");
842 double startTime = timer.wallTime();
843 { // Start timing
844 Teuchos::TimeMonitor timeMon (timer);
845 if (alpha == one && beta == zero) {
846 if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
847 // Start by solving L C = X for C. C must have the same Map
848 // as D. We have to use a temp multivector, since our
849 // implementation of triangular solves does not allow its
850 // input and output to alias one another.
851 //
852 // FIXME (mfh 24 Jan 2014) Cache this temp multivector.
853 const LO numVectors = xBlock.getNumVectors();
854 BMV cBlock (* (A_block_->getGraph ()->getDomainMap ()), blockSize_, numVectors);
855 BMV rBlock (* (A_block_->getGraph ()->getDomainMap ()), blockSize_, numVectors);
856 for (LO imv = 0; imv < numVectors; ++imv)
857 {
858 for (size_t i = 0; i < D_block_->getLocalNumRows(); ++i)
859 {
860 LO local_row = i;
861 const_host_little_vec_type xval =
862 xBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
863 little_host_vec_type cval =
864 cBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
865 //cval.assign(xval);
866 Tpetra::COPY (xval, cval);
867
868 local_inds_host_view_type colValsL;
869 values_host_view_type valsL;
870 L_block_->getLocalRowView(local_row, colValsL, valsL);
871 LO NumL = (LO) colValsL.size();
872
873 for (LO j = 0; j < NumL; ++j)
874 {
875 LO col = colValsL[j];
876 const_host_little_vec_type prevVal =
877 cBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
878
879 const LO matOffset = blockMatSize*j;
880 little_block_host_type lij((typename little_block_host_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
881
882 //cval.matvecUpdate(-one, lij, prevVal);
883 Tpetra::GEMV (-one, lij, prevVal, cval);
884 }
885 }
886 }
887
888 // Solve D R = C. Note that D has been replaced by D^{-1} at this point.
889 D_block_->applyBlock(cBlock, rBlock);
890
891 // Solve U Y = R.
892 for (LO imv = 0; imv < numVectors; ++imv)
893 {
894 const LO numRows = D_block_->getLocalNumRows();
895 for (LO i = 0; i < numRows; ++i)
896 {
897 LO local_row = (numRows-1)-i;
898 const_host_little_vec_type rval =
899 rBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
900 little_host_vec_type yval =
901 yBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
902 //yval.assign(rval);
903 Tpetra::COPY (rval, yval);
904
905 local_inds_host_view_type colValsU;
906 values_host_view_type valsU;
907 U_block_->getLocalRowView(local_row, colValsU, valsU);
908 LO NumU = (LO) colValsU.size();
909
910 for (LO j = 0; j < NumU; ++j)
911 {
912 LO col = colValsU[NumU-1-j];
913 const_host_little_vec_type prevVal =
914 yBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
915
916 const LO matOffset = blockMatSize*(NumU-1-j);
917 little_block_host_type uij((typename little_block_host_type::value_type*) &valsU[matOffset], blockSize_, rowStride);
918
919 //yval.matvecUpdate(-one, uij, prevVal);
920 Tpetra::GEMV (-one, uij, prevVal, yval);
921 }
922 }
923 }
924 }
925 else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
926 TEUCHOS_TEST_FOR_EXCEPTION(
927 true, std::runtime_error,
928 "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm. ");
929 }
930 }
931 else { // alpha != 1 or beta != 0
932 if (alpha == zero) {
933 if (beta == zero) {
934 Y.putScalar (zero);
935 } else {
936 Y.scale (beta);
937 }
938 } else { // alpha != zero
939 MV Y_tmp (Y.getMap (), Y.getNumVectors ());
940 apply (X, Y_tmp, mode);
941 Y.update (alpha, Y_tmp, beta);
942 }
943 }
944 } // Stop timing
945
946 this->numApply_ += 1;
947 this->applyTime_ += (timer.wallTime() - startTime);
948}
949
950
951template<class MatrixType>
953{
954 std::ostringstream os;
955
956 // Output is a valid YAML dictionary in flow style. If you don't
957 // like everything on a single line, you should call describe()
958 // instead.
959 os << "\"Ifpack2::Experimental::RBILUK\": {";
960 os << "Initialized: " << (this->isInitialized () ? "true" : "false") << ", "
961 << "Computed: " << (this->isComputed () ? "true" : "false") << ", ";
962
963 os << "Level-of-fill: " << this->getLevelOfFill() << ", ";
964
965 if (A_block_.is_null ()) {
966 os << "Matrix: null";
967 }
968 else {
969 os << "Global matrix dimensions: ["
970 << A_block_->getGlobalNumRows () << ", " << A_block_->getGlobalNumCols () << "]"
971 << ", Global nnz: " << A_block_->getGlobalNumEntries();
972 }
973
974 os << "}";
975 return os.str ();
976}
977
978} // namespace Experimental
979
980} // namespace Ifpack2
981
982// FIXME (mfh 26 Aug 2015) We only need to do instantiation for
983// MatrixType = Tpetra::RowMatrix. Conversions to BlockCrsMatrix are
984// handled internally via dynamic cast.
985
986#define IFPACK2_EXPERIMENTAL_RBILUK_INSTANT(S,LO,GO,N) \
987 template class Ifpack2::Experimental::RBILUK< Tpetra::BlockCrsMatrix<S, LO, GO, N> >; \
988 template class Ifpack2::Experimental::RBILUK< Tpetra::RowMatrix<S, LO, GO, N> >;
989
990#endif
File for utility functions.
ILU(k) factorization of a given Tpetra::BlockCrsMatrix.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:130
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:150
const block_crs_matrix_type & getUBlock() const
Return the U factor of the ILU factorization.
Definition Ifpack2_Experimental_RBILUK_def.hpp:139
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:153
Teuchos::RCP< const block_crs_matrix_type > getBlockMatrix() const
Get the input matrix.
Definition Ifpack2_Experimental_RBILUK_def.hpp:182
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:142
void setMatrix(const Teuchos::RCP< const block_crs_matrix_type > &A)
Change the matrix to be preconditioned.
Definition Ifpack2_Experimental_RBILUK_def.hpp:86
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition Ifpack2_Experimental_RBILUK_def.hpp:800
void compute()
Compute the (numeric) incomplete factorization.
Definition Ifpack2_Experimental_RBILUK_def.hpp:458
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:159
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:136
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:146
virtual ~RBILUK()
Destructor (declared virtual for memory safety).
Definition Ifpack2_Experimental_RBILUK_def.hpp:81
const block_crs_matrix_type & getDBlock() const
Return the diagonal entries of the ILU factorization.
Definition Ifpack2_Experimental_RBILUK_def.hpp:125
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition Ifpack2_Experimental_RBILUK_def.hpp:187
std::string description() const
A one-line description of this object.
Definition Ifpack2_Experimental_RBILUK_def.hpp:952
const block_crs_matrix_type & getLBlock() const
Return the L factor of the ILU factorization.
Definition Ifpack2_Experimental_RBILUK_def.hpp:111
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition Ifpack2_IlukGraph.hpp:100
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition Ifpack2_RILUK_decl.hpp:254
Ifpack2 features that are experimental. Use at your own risk.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:74