Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_KLU2_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// Amesos2: Templated Direct Sparse Solver Package
6// Copyright 2011 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ***********************************************************************
41//
42// @HEADER
43
52#ifndef AMESOS2_KLU2_DEF_HPP
53#define AMESOS2_KLU2_DEF_HPP
54
55#include <Teuchos_Tuple.hpp>
56#include <Teuchos_ParameterList.hpp>
57#include <Teuchos_StandardParameterEntryValidators.hpp>
58
60#include "Amesos2_KLU2_decl.hpp"
61
62namespace Amesos2 {
63
64
65template <class Matrix, class Vector>
67 Teuchos::RCP<const Matrix> A,
68 Teuchos::RCP<Vector> X,
69 Teuchos::RCP<const Vector> B )
70 : SolverCore<Amesos2::KLU2,Matrix,Vector>(A, X, B)
71 , transFlag_(0)
72 , is_contiguous_(true)
73{
74 ::KLU2::klu_defaults<klu2_dtype, local_ordinal_type> (&(data_.common_)) ;
75 data_.symbolic_ = NULL;
76 data_.numeric_ = NULL;
77
78 // Override some default options
79 // TODO: use data_ here to init
80}
81
82
83template <class Matrix, class Vector>
85{
86 /* Free KLU2 data_types
87 * - Matrices
88 * - Vectors
89 * - Other data
90 */
91 if (data_.symbolic_ != NULL)
92 ::KLU2::klu_free_symbolic<klu2_dtype, local_ordinal_type>
93 (&(data_.symbolic_), &(data_.common_)) ;
94 if (data_.numeric_ != NULL)
95 ::KLU2::klu_free_numeric<klu2_dtype, local_ordinal_type>
96 (&(data_.numeric_), &(data_.common_)) ;
97
98 // Storage is initialized in numericFactorization_impl()
99 //if ( data_.A.Store != NULL ){
100 // destoy
101 //}
102
103 // only root allocated these SuperMatrices.
104 //if ( data_.L.Store != NULL ){ // will only be true for this->root_
105 // destroy ..
106 //}
107}
108
109template <class Matrix, class Vector>
110bool
112 return (this->root_ && (this->matrixA_->getComm()->getSize() == 1) && is_contiguous_);
113}
114
115template<class Matrix, class Vector>
116int
118{
119 /* TODO: Define what it means for KLU2
120 */
121#ifdef HAVE_AMESOS2_TIMERS
122 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
123#endif
124
125 return(0);
126}
127
128
129template <class Matrix, class Vector>
130int
132{
133 if (data_.symbolic_ != NULL) {
134 ::KLU2::klu_free_symbolic<klu2_dtype, local_ordinal_type>
135 (&(data_.symbolic_), &(data_.common_)) ;
136 }
137
138 if ( single_proc_optimization() ) {
139 host_ordinal_type_array host_row_ptr_view;
140 host_ordinal_type_array host_cols_view;
141 this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view);
142 this->matrixA_->returnColInd_kokkos_view(host_cols_view);
143 data_.symbolic_ = ::KLU2::klu_analyze<klu2_dtype, local_ordinal_type>
144 ((local_ordinal_type)this->globalNumCols_, host_row_ptr_view.data(),
145 host_cols_view.data(), &(data_.common_)) ;
146 }
147 else
148 {
149 data_.symbolic_ = ::KLU2::klu_analyze<klu2_dtype, local_ordinal_type>
150 ((local_ordinal_type)this->globalNumCols_, host_col_ptr_view_.data(),
151 host_rows_view_.data(), &(data_.common_)) ;
152
153 } //end single_process_optim_check = false
154
155 return(0);
156}
157
158
159template <class Matrix, class Vector>
160int
162{
163 using Teuchos::as;
164
165 // Cleanup old L and U matrices if we are not reusing a symbolic
166 // factorization. Stores and other data will be allocated in gstrf.
167 // Only rank 0 has valid pointers, TODO: for KLU2
168
169 int info = 0;
170 if ( this->root_ ) {
171
172 { // Do factorization
173#ifdef HAVE_AMESOS2_TIMERS
174 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
175#endif
176
177 if (data_.numeric_ != NULL) {
178 ::KLU2::klu_free_numeric<klu2_dtype, local_ordinal_type>
179 (&(data_.numeric_), &(data_.common_));
180 }
181
182 if ( single_proc_optimization() ) {
183 host_ordinal_type_array host_row_ptr_view;
184 host_ordinal_type_array host_cols_view;
185 this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view);
186 this->matrixA_->returnColInd_kokkos_view(host_cols_view);
187 this->matrixA_->returnValues_kokkos_view(host_nzvals_view_);
188 klu2_dtype * pValues = function_map::convert_scalar(host_nzvals_view_.data());
189 data_.numeric_ = ::KLU2::klu_factor<klu2_dtype, local_ordinal_type>
190 (host_row_ptr_view.data(), host_cols_view.data(), pValues,
191 data_.symbolic_, &(data_.common_));
192 }
193 else {
194 klu2_dtype * pValues = function_map::convert_scalar(host_nzvals_view_.data());
195 data_.numeric_ = ::KLU2::klu_factor<klu2_dtype, local_ordinal_type>
196 (host_col_ptr_view_.data(), host_rows_view_.data(), pValues,
197 data_.symbolic_, &(data_.common_));
198 } //end single_process_optim_check = false
199
200 // To have a test which confirms a throw, we need MPI to throw on all the
201 // ranks. So we delay and broadcast first. Others throws in Amesos2 which
202 // happen on just the root rank would also have the same problem if we
203 // tested them but we decided to fix just this one for the present. This
204 // is the only error/throw we currently have a unit test for.
205 if(data_.numeric_ == nullptr) {
206 info = 1;
207 }
208
209 // This is set after numeric factorization complete as pivoting can be used;
210 // In this case, a discrepancy between symbolic and numeric nnz total can occur.
211 if(info == 0) { // skip if error code so we don't segfault - will throw
212 this->setNnzLU( as<size_t>((data_.numeric_)->lnz) + as<size_t>((data_.numeric_)->unz) );
213 }
214 } // end scope
215
216 } // end this->root_
217
218 /* All processes should have the same error code */
219 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
220
221 TEUCHOS_TEST_FOR_EXCEPTION(info > 0, std::runtime_error,
222 "KLU2 numeric factorization failed");
223
224 return(info);
225}
226
227template <class Matrix, class Vector>
228int
230 const Teuchos::Ptr<MultiVecAdapter<Vector> > X,
231 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
232{
233 using Teuchos::as;
234 int ierr = 0; // returned error code
235
236 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
237 const size_t nrhs = X->getGlobalNumVectors();
238
239 bool bDidAssignX;
240 bool bDidAssignB;
241 {
242#ifdef HAVE_AMESOS2_TIMERS
243 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
244 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
245#endif
246 const bool initialize_data = true;
247 const bool do_not_initialize_data = false;
248 if ( single_proc_optimization() && nrhs == 1 ) {
249 // no msp creation
250 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
251 host_solve_array_t>::do_get(initialize_data, B, bValues_, as<size_t>(ld_rhs));
252
253 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
254 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_, as<size_t>(ld_rhs));
255 }
256 else {
257 if ( is_contiguous_ == true ) {
258 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
259 host_solve_array_t>::do_get(initialize_data, B, bValues_,
260 as<size_t>(ld_rhs),
261 ROOTED, this->rowIndexBase_);
262 }
263 else {
264 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
265 host_solve_array_t>::do_get(initialize_data, B, bValues_,
266 as<size_t>(ld_rhs),
267 CONTIGUOUS_AND_ROOTED, this->rowIndexBase_);
268 }
269
270 // see Amesos2_Tacho_def.hpp for an explanation of why we 'get' X
271 if ( is_contiguous_ == true ) {
272 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
273 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_,
274 as<size_t>(ld_rhs),
275 ROOTED, this->rowIndexBase_);
276 }
277 else {
278 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
279 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_,
280 as<size_t>(ld_rhs),
281 CONTIGUOUS_AND_ROOTED, this->rowIndexBase_);
282 }
283
284 // klu_tsolve is going to put the solution x into the input b.
285 // Copy b to x then solve in x.
286 // We do not want to solve in b, then copy to x, because if b was assigned
287 // then the solve will change b permanently and mess up the next test cycle.
288 // However if b was actually a copy (bDidAssignB = false) then we can avoid
289 // this deep_copy and just assign xValues_ = bValues_.
290 if(bDidAssignB) {
291 Kokkos::deep_copy(xValues_, bValues_); // need deep_copy or solve will change adapter's b memory which should never happen
292 }
293 else {
294 xValues_ = bValues_; // safe because bValues_ does not point straight to adapter's memory space
295 }
296 }
297 }
298
299 klu2_dtype * pxValues = function_map::convert_scalar(xValues_.data());
300 klu2_dtype * pbValues = function_map::convert_scalar(bValues_.data());
301
302 // can be null for non root
303 if( this->root_) {
304 TEUCHOS_TEST_FOR_EXCEPTION(pbValues == nullptr,
305 std::runtime_error, "Amesos2 Runtime Error: b_vector returned null ");
306
307 TEUCHOS_TEST_FOR_EXCEPTION(pxValues == nullptr,
308 std::runtime_error, "Amesos2 Runtime Error: x_vector returned null ");
309 }
310
311 if ( single_proc_optimization() && nrhs == 1 ) {
312#ifdef HAVE_AMESOS2_TIMERS
313 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
314#endif
315
316 // For this case, Crs matrix raw pointers were used, so the non-transpose default solve
317 // is actually the transpose solve as klu_solve expects Ccs matrix pointers
318 // Thus, if the transFlag_ is true, the non-transpose solve should be used
319 if (transFlag_ == 0)
320 {
321 ::KLU2::klu_tsolve2<klu2_dtype, local_ordinal_type>
322 (data_.symbolic_, data_.numeric_,
323 (local_ordinal_type)this->globalNumCols_,
324 (local_ordinal_type)nrhs,
325 pbValues, pxValues, &(data_.common_)) ;
326 }
327 else {
328 ::KLU2::klu_solve2<klu2_dtype, local_ordinal_type>
329 (data_.symbolic_, data_.numeric_,
330 (local_ordinal_type)this->globalNumCols_,
331 (local_ordinal_type)nrhs,
332 pbValues, pxValues, &(data_.common_)) ;
333 }
334
335 /* All processes should have the same error code */
336 // Teuchos::broadcast(*(this->getComm()), 0, &ierr);
337
338 } // end single_process_optim_check && nrhs == 1
339 else // single proc optimizations but nrhs > 1,
340 // or distributed over processes case
341 {
342 if ( this->root_ ) {
343#ifdef HAVE_AMESOS2_TIMERS
344 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
345#endif
346 if (transFlag_ == 0)
347 {
348 // For this case, Crs matrix raw pointers were used, so the non-transpose default solve
349 // is actually the transpose solve as klu_solve expects Ccs matrix pointers
350 // Thus, if the transFlag_ is true, the non-transpose solve should be used
351 if ( single_proc_optimization() ) {
352 ::KLU2::klu_tsolve<klu2_dtype, local_ordinal_type>
353 (data_.symbolic_, data_.numeric_,
354 (local_ordinal_type)this->globalNumCols_,
355 (local_ordinal_type)nrhs,
356 pxValues, &(data_.common_)) ;
357 }
358 else {
359 ::KLU2::klu_solve<klu2_dtype, local_ordinal_type>
360 (data_.symbolic_, data_.numeric_,
361 (local_ordinal_type)this->globalNumCols_,
362 (local_ordinal_type)nrhs,
363 pxValues, &(data_.common_)) ;
364 }
365 }
366 else
367 {
368 // For this case, Crs matrix raw pointers were used, so the non-transpose default solve
369 // is actually the transpose solve as klu_solve expects Ccs matrix pointers
370 // Thus, if the transFlag_ is true, the non- transpose solve should be used
371 if ( single_proc_optimization() ) {
372 ::KLU2::klu_solve<klu2_dtype, local_ordinal_type>
373 (data_.symbolic_, data_.numeric_,
374 (local_ordinal_type)this->globalNumCols_,
375 (local_ordinal_type)nrhs,
376 pxValues, &(data_.common_)) ;
377 }
378 else {
379 ::KLU2::klu_tsolve<klu2_dtype, local_ordinal_type>
380 (data_.symbolic_, data_.numeric_,
381 (local_ordinal_type)this->globalNumCols_,
382 (local_ordinal_type)nrhs,
383 pxValues, &(data_.common_)) ;
384 }
385 }
386 } // end root_
387 } //end else
388
389 // if bDidAssignX, then we solved straight to the adapter's X memory space without
390 // requiring additional memory allocation, so the x data is already in place.
391 if(!bDidAssignX) {
392#ifdef HAVE_AMESOS2_TIMERS
393 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
394#endif
395
396 if ( is_contiguous_ == true ) {
397 Util::put_1d_data_helper_kokkos_view<
398 MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, xValues_,
399 as<size_t>(ld_rhs),
400 ROOTED, this->rowIndexBase_);
401 }
402 else {
403 Util::put_1d_data_helper_kokkos_view<
404 MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, xValues_,
405 as<size_t>(ld_rhs),
406 CONTIGUOUS_AND_ROOTED, this->rowIndexBase_);
407 }
408 }
409
410 return(ierr);
411}
412
413
414template <class Matrix, class Vector>
415bool
417{
418 // The KLU2 factorization routines can handle square as well as
419 // rectangular matrices, but KLU2 can only apply the solve routines to
420 // square matrices, so we check the matrix for squareness.
421 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
422}
423
424
425template <class Matrix, class Vector>
426void
427KLU2<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
428{
429 using Teuchos::RCP;
430 using Teuchos::getIntegralValue;
431 using Teuchos::ParameterEntryValidator;
432
433 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
434
435 transFlag_ = this->control_.useTranspose_ ? 1: 0;
436 // The KLU2 transpose option can override the Amesos2 option
437 if( parameterList->isParameter("Trans") ){
438 RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry("Trans").validator();
439 parameterList->getEntry("Trans").setValidator(trans_validator);
440
441 transFlag_ = getIntegralValue<int>(*parameterList, "Trans");
442 }
443
444 if( parameterList->isParameter("IsContiguous") ){
445 is_contiguous_ = parameterList->get<bool>("IsContiguous");
446 }
447}
448
449
450template <class Matrix, class Vector>
451Teuchos::RCP<const Teuchos::ParameterList>
453{
454 using std::string;
455 using Teuchos::tuple;
456 using Teuchos::ParameterList;
457 using Teuchos::setStringToIntegralParameter;
458
459 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
460
461 if( is_null(valid_params) )
462 {
463 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
464
465 pl->set("Equil", true, "Whether to equilibrate the system before solve, does nothing now");
466 pl->set("IsContiguous", true, "Whether GIDs contiguous");
467
468 setStringToIntegralParameter<int>("Trans", "NOTRANS",
469 "Solve for the transpose system or not",
470 tuple<string>("NOTRANS","TRANS","CONJ"),
471 tuple<string>("Solve with transpose",
472 "Do not solve with transpose",
473 "Solve with the conjugate transpose"),
474 tuple<int>(0, 1, 2),
475 pl.getRawPtr());
476 valid_params = pl;
477 }
478
479 return valid_params;
480}
481
482
483template <class Matrix, class Vector>
484bool
486{
487 using Teuchos::as;
488
489 if(current_phase == SOLVE)return(false);
490
491 if ( single_proc_optimization() ) {
492 // Do nothing in this case - Crs raw pointers will be used
493 }
494 else
495 {
496
497#ifdef HAVE_AMESOS2_TIMERS
498 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
499#endif
500
501 // Only the root image needs storage allocated
502 if( this->root_ ) {
503 host_nzvals_view_ = host_value_type_array(
504 Kokkos::ViewAllocateWithoutInitializing("host_nzvals_view_"), this->globalNumNonZeros_);
505 host_rows_view_ = host_ordinal_type_array(
506 Kokkos::ViewAllocateWithoutInitializing("host_rows_view_"), this->globalNumNonZeros_);
507 host_col_ptr_view_ = host_ordinal_type_array(
508 Kokkos::ViewAllocateWithoutInitializing("host_col_ptr_view_"), this->globalNumRows_ + 1);
509 }
510
511 local_ordinal_type nnz_ret = 0;
512 {
513#ifdef HAVE_AMESOS2_TIMERS
514 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
515#endif
516
517 if ( is_contiguous_ == true ) {
519 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,host_ordinal_type_array>
520 ::do_get(this->matrixA_.ptr(), host_nzvals_view_, host_rows_view_, host_col_ptr_view_,
521 nnz_ret, ROOTED, ARBITRARY, this->rowIndexBase_);
522 }
523 else {
525 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,host_ordinal_type_array>
526 ::do_get(this->matrixA_.ptr(), host_nzvals_view_, host_rows_view_, host_col_ptr_view_,
527 nnz_ret, CONTIGUOUS_AND_ROOTED, ARBITRARY, this->rowIndexBase_);
528 }
529 }
530
531 if( this->root_ ) {
532 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<local_ordinal_type>(this->globalNumNonZeros_),
533 std::runtime_error,
534 "Did not get the expected number of non-zero vals");
535 }
536
537 } //end else single_process_optim_check = false
538
539 return true;
540}
541
542
543template<class Matrix, class Vector>
544const char* KLU2<Matrix,Vector>::name = "KLU2";
545
546
547} // end namespace Amesos2
548
549#endif // AMESOS2_KLU2_DEF_HPP
Amesos2 KLU2 declarations.
@ ROOTED
Definition Amesos2_TypeDecl.hpp:127
@ CONTIGUOUS_AND_ROOTED
Definition Amesos2_TypeDecl.hpp:128
@ ARBITRARY
Definition Amesos2_TypeDecl.hpp:143
Amesos2 interface to the KLU2 package.
Definition Amesos2_KLU2_decl.hpp:73
KLU2(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition Amesos2_KLU2_def.hpp:66
~KLU2()
Destructor.
Definition Amesos2_KLU2_def.hpp:84
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers.
Definition Amesos2_SolverCore_decl.hpp:106
EPhase
Used to indicate a phase in the direct solution.
Definition Amesos2_TypeDecl.hpp:65
A generic helper class for getting a CCS representation of a Matrix.
Definition Amesos2_Util.hpp:652