64#define __FUNC__ "Euclid_dhCreate"
88 ctx->isScaled =
false;
94 strcpy (ctx->algo_par,
"pilu");
95 strcpy (ctx->algo_ilu,
"iluk");
98 ctx->sparseTolA = 0.0;
99 ctx->sparseTolF = 0.0;
107 strcpy (ctx->krylovMethod,
"bicgstab");
120 ctx->timing[i] = 0.0;
124 ctx->timingsWereReduced =
false;
131#define __FUNC__ "Euclid_dhDestroy"
145 if (ctx->setupCount > 1 && ctx->printStats)
161 if (ctx->scale != NULL)
166 if (ctx->work != NULL)
171 if (ctx->work2 != NULL)
176 if (ctx->slist != NULL)
181 if (ctx->extRows != NULL)
197#define __FUNC__ "Euclid_dhSetup"
210 if (ctx->setupCount && ctx->printStats)
243 SET_V_ERROR (
"must set ctx->A before calling init");
253 (
"setting up linear system; global rows: %i local rows: %i (on P_0)\n",
257 sprintf (
msgBuf_dh,
"localRow= %i; globalRows= %i; beg_row= %i",
m,
n,
300 goto END_OF_FUNCTION;
312 if (!strcmp (ctx->algo_par,
"bj"))
319 if (ctx->scale == NULL)
326 for (i = 0; i <
m; ++i)
333 if (ctx->work == NULL)
338 if (ctx->work2 == NULL)
351 ctx->timing[
FACTOR_T] += (MPI_Wtime () - t1);
356 if (strcmp (ctx->algo_par,
"none"))
372 if (strcmp (ctx->algo_par,
"none"))
387 if (!strcmp (ctx->algo_par,
"pilu") &&
np_dh > 1)
401 ctx->setupCount += 1;
409#define __FUNC__ "get_runtime_params_private"
425 strcpy (ctx->algo_par, tmp);
429 strcpy (ctx->algo_par,
"bj");
442 ctx->isScaled =
true;
443 strcpy (ctx->algo_ilu,
"ilut");
449 if (!strcmp (ctx->algo_par,
"none"))
451 strcpy (ctx->algo_ilu,
"none");
453 else if (!strcmp (ctx->algo_ilu,
"none"))
455 strcpy (ctx->algo_par,
"none");
469 if (ctx->sparseTolA || !strcmp (ctx->algo_ilu,
"ilut"))
471 ctx->isScaled =
true;
479 strcpy (ctx->krylovMethod, tmp);
481 if (!strcmp (ctx->krylovMethod,
"bcgs"))
483 strcpy (ctx->krylovMethod,
"bicgstab");
489#define __FUNC__ "invert_diagonals_private"
494 int *diag = ctx->F->diag;
495 if (aval == NULL || diag == NULL)
497 SET_INFO (
"can't invert diags; either F->aval or F->diag is NULL");
501 int i,
m = ctx->F->m;
502 for (i = 0; i <
m; ++i)
504 aval[diag[i]] = 1.0 / aval[diag[i]];
512#define __FUNC__ "compute_rho_private"
518 double bufLocal[3], bufGlobal[3];
521 ctx->stats[
NZF_STATS] = (double) ctx->F->rp[
m];
528 bufGlobal[0] = bufLocal[0];
529 bufGlobal[1] = bufLocal[1];
530 bufGlobal[2] = bufLocal[2];
534 MPI_Reduce (bufLocal, bufGlobal, 3, MPI_DOUBLE, MPI_SUM, 0,
542 if (bufGlobal[0] && bufGlobal[1])
544 ctx->rho_final = bufGlobal[1] / bufGlobal[0];
552 if (bufGlobal[0] && bufGlobal[2])
555 100.0 * bufGlobal[2] / bufGlobal[0];
566#define __FUNC__ "factor_private"
574 if (!strcmp (ctx->algo_par,
"none"))
587 br = ctx->sg->beg_rowP[
myid_dh];
588 id = ctx->sg->o2n_sub[
myid_dh];
590 Factor_dhInit (ctx->A,
true,
true, ctx->rho_init,
id, br, &(ctx->F));
592 ctx->F->bdry_count = ctx->sg->bdry_count[
myid_dh];
593 ctx->F->first_bdry = ctx->F->m - ctx->F->bdry_count;
594 if (!strcmp (ctx->algo_par,
"bj"))
595 ctx->F->blockJacobi =
true;
597 ctx->F->blockJacobi =
true;
607 if (!strcmp (ctx->algo_ilu,
"iluk"))
615 if (ctx->sg != NULL && ctx->sg->blocks > 1)
618 (
"only use -mpi, which invokes ilu_mpi_pilu(), for np = 1 and -blocks 1");
634 else if (!strcmp (ctx->algo_ilu,
"ilut"))
645 sprintf (
msgBuf_dh,
"factorization method: %s is not implemented",
657 if (!strcmp (ctx->algo_par,
"bj"))
666 else if (!strcmp (ctx->algo_ilu,
"iluk"))
668 bool bj = ctx->F->blockJacobi;
683 ctx->to = ctx->F->first_bdry;
708 ctx->from = ctx->F->first_bdry;
739 sprintf (
msgBuf_dh,
"factorization method: %s is not implemented",
752#define __FUNC__ "discard_indices_private"
758 int *rp = ctx->F->rp, *cval = ctx->F->cval;
759 double *aval = ctx->F->aval;
760 int m =
F->
m, *nabors = ctx->nabors, nc = ctx->naborCount;
761 int i, j, k, idx, count = 0, start_of_row;
762 int beg_row = ctx->beg_row, end_row = beg_row +
m;
763 int *diag = ctx->F->diag;
770 for (i = 0; i <
m; ++i)
772 for (j = rp[i]; j < rp[i + 1]; ++j)
775 if (col < beg_row || col >= end_row)
778 int owner = find_owner_private_mpi (ctx, col);
781 for (k = 0; k < nc; ++k)
783 if (nabors[k] == owner)
800 "deleting %i indices that would alter the subdomain graph", count);
806 for (i = 0; i <
m; ++i)
808 for (j = start_of_row; j < rp[i + 1]; ++j)
811 double val = aval[j];
819 start_of_row = rp[i + 1];
824 for (i = 0; i <
m; ++i)
826 for (j = rp[i]; j < rp[i + 1]; ++j)
828 if (cval[j] == i + beg_row)
840#define __FUNC__ "Euclid_dhSolve"
847 if (!strcmp (ctx->krylovMethod,
"cg"))
852 else if (!strcmp (ctx->krylovMethod,
"bicgstab"))
859 sprintf (
msgBuf_dh,
"unknown krylov solver: %s", ctx->krylovMethod);
866#define __FUNC__ "Euclid_dhPrintStats"
885 "\n==================== Euclid report (start) ====================\n");
888 fprintf_dh (fp,
" setups: %i\n", ctx->setupCount);
889 fprintf_dh (fp,
" tri solves: %i\n", ctx->itsTotal);
890 fprintf_dh (fp,
" parallelization method: %s\n", ctx->algo_par);
891 fprintf_dh (fp,
" factorization method: %s\n", ctx->algo_ilu);
892 fprintf_dh (fp,
" matrix was row scaled: %i\n", ctx->isScaled);
894 fprintf_dh (fp,
" matrix row count: %i\n", ctx->n);
896 fprintf_dh (fp,
" rho: %g\n", ctx->rho_final);
898 fprintf_dh (fp,
" sparseA: %g\n", ctx->sparseTolA);
902 fprintf_dh (fp,
" solves total: %0.2f (see docs)\n",
906 fprintf_dh (fp,
" subdomain graph setup: %0.2f\n",
913 fprintf_dh (fp,
" misc (should be small): %0.2f\n",
927 fprintf_dh (fp,
"\nApplicable if Euclid's internal solvers were used:\n");
928 fprintf_dh (fp,
"---------------------------------------------------\n");
929 fprintf_dh (fp,
" solve method: %s\n", ctx->krylovMethod);
930 fprintf_dh (fp,
" maxIts: %i\n", ctx->maxIts);
934 "\n==================== Euclid report (end) ======================\n");
943#define __FUNC__ "Euclid_dhPrintStatsShort"
959 blocks = ctx->sg->blocks;
967 apply_per_it = apply_total / (double) ctx->its;
969 perIt = solve / (double) ctx->its;
972 fprintf_dh (fp,
"%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
973 "method",
"subdms",
"level",
"its",
"setup",
"solve",
"total",
974 "perIt",
"perIt",
"rows");
976 "------ ----- ----- ----- ----- ----- ----- ----- ----- ----- XX\n");
977 fprintf_dh (fp,
"%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.4f %6.5f %6g XXX\n", ctx->algo_par,
994 fprintf_dh (fp,
"%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
995 "",
"",
"",
"",
"",
"setup",
"setup",
"",
"",
"",
"",
"",
"");
997 fprintf_dh (fp,
"%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
998 "method",
"subdms",
"level",
"its",
"total",
"factor",
999 "other",
"apply",
"perIt",
"rho",
"A_tol",
"A_%",
"rows");
1001 "------ ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- XX\n");
1004 fprintf_dh (fp,
"%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.2f %6.4f %6.1f %6g %6.2f %6g XXX\n", ctx->algo_par,
1023 fprintf_dh (fp,
"\n%6s %6s %6s %6s %6s %6s WW\n",
"method",
"level",
1024 "subGph",
"factor",
"solveS",
"perIt");
1025 fprintf_dh (fp,
"------ ----- ----- ----- ----- ----- WW\n");
1026 fprintf_dh (fp,
"%6s %6i %6.2f %6.2f %6.2f %6.4f WWW\n",
1037#define __FUNC__ "Euclid_dhPrintStatsShorter"
1044 double rho = ctx->rho_final;
1048 fprintf_dh (fp,
"\nStats from last linear solve: YY\n");
1049 fprintf_dh (fp,
"%6s %6s %6s YY\n",
"its",
"rho",
"A_%");
1051 fprintf_dh (fp,
"%6i %6.2f %6.2f YYY\n",
its, rho, nzUsedRatio);
1056#define __FUNC__ "Euclid_dhPrintScaling"
1065 if (ctx->scale == NULL)
1067 SET_V_ERROR (
"ctx->scale is NULL; was Euclid_dhSetup() called?");
1070 fprintf (fp,
"\n---------- 1st %i row scaling values:\n",
m);
1071 for (i = 0; i <
m; ++i)
1073 fprintf (fp,
" %i %g \n", i + 1, ctx->scale[i]);
1079#define __FUNC__ "reduce_timings_private"
1087 memcpy (bufOUT, ctx->timing,
TIMING_BINS * sizeof (
double));
1088 MPI_Reduce (bufOUT, ctx->timing,
TIMING_BINS, MPI_DOUBLE, MPI_MAX, 0,
1092 ctx->timingsWereReduced =
true;
1096#define __FUNC__ "Euclid_dhPrintHypreReport"
1118 "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (start)\n");
1121 fprintf_dh (fp,
" setups: %i\n", ctx->setupCount);
1122 fprintf_dh (fp,
" tri solves: %i\n", ctx->itsTotal);
1123 fprintf_dh (fp,
" parallelization method: %s\n", ctx->algo_par);
1124 fprintf_dh (fp,
" factorization method: %s\n", ctx->algo_ilu);
1125 if (!strcmp (ctx->algo_ilu,
"iluk"))
1135 fprintf_dh (fp,
" global matrix row count: %i\n", ctx->n);
1137 fprintf_dh (fp,
" rho: %g\n", ctx->rho_final);
1138 fprintf_dh (fp,
" sparseA: %g\n", ctx->sparseTolA);
1142 fprintf_dh (fp,
" solves total: %0.2f (see docs)\n",
1146 fprintf_dh (fp,
" subdomain graph setup: %0.2f\n",
1154 fprintf_dh (fp,
" misc (should be small): %0.2f\n",
1159 if (ctx->sg != NULL)
1168 "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (end)\n");
1175#define __FUNC__ "Euclid_dhPrintTestData"
1186 fprintf (fp,
" setups: %i\n", ctx->setupCount);
1187 fprintf (fp,
" tri solves: %i\n", ctx->its);
1188 fprintf (fp,
" parallelization method: %s\n", ctx->algo_par);
1189 fprintf (fp,
" factorization method: %s\n", ctx->algo_ilu);
1190 fprintf (fp,
" level: %i\n", ctx->level);
1191 fprintf (fp,
" row scaling: %i\n", ctx->isScaled);
void Euclid_dhPrintStats(Euclid_dh ctx, FILE *fp)
void Euclid_dhPrintTestData(Euclid_dh ctx, FILE *fp)
static void reduce_timings_private(Euclid_dh ctx)
void Euclid_dhSolve(Euclid_dh ctx, Vec_dh x, Vec_dh b, int *its)
void Euclid_dhDestroy(Euclid_dh ctx)
static void factor_private(Euclid_dh ctx)
void Euclid_dhSetup(Euclid_dh ctx)
void Euclid_dhCreate(Euclid_dh *ctxOUT)
void Euclid_dhPrintStatsShort(Euclid_dh ctx, double setup, double solve, FILE *fp)
void Euclid_dhPrintHypreReport(Euclid_dh ctx, FILE *fp)
static void invert_diagonals_private(Euclid_dh ctx)
void Euclid_dhPrintScaling(Euclid_dh ctx, FILE *fp)
void Euclid_dhPrintStatsShorter(Euclid_dh ctx, FILE *fp)
static void compute_rho_private(Euclid_dh ctx)
static void get_runtime_params_private(Euclid_dh ctx)
void ExternalRows_dhInit(ExternalRows_dh er, Euclid_dh ctx)
void ExternalRows_dhCreate(ExternalRows_dh *er)
void ExternalRows_dhSendRows(ExternalRows_dh er)
void ExternalRows_dhDestroy(ExternalRows_dh er)
void ExternalRows_dhRecvRows(ExternalRows_dh er)
void Factor_dhInit(void *A, bool fillFlag, bool avalFlag, double rho, int id, int beg_rowP, Factor_dh *Fout)
void Factor_dhSolveSetup(Factor_dh mat, SubdomainGraph_dh sg)
void Factor_dhDestroy(Factor_dh mat)
int Factor_dhReadNz(Factor_dh mat)
void Parser_dhInsert(Parser_dh p, char *option, char *value)
bool Parser_dhReadInt(Parser_dh p, char *in, int *out)
bool Parser_dhHasSwitch(Parser_dh p, char *s)
bool Parser_dhReadString(Parser_dh p, char *in, char **out)
bool Parser_dhReadDouble(Parser_dh p, char *in, double *out)
void SortedList_dhInit(SortedList_dh sList, SubdomainGraph_dh sg)
void SortedList_dhCreate(SortedList_dh *sList)
void SortedList_dhDestroy(SortedList_dh sList)
void SubdomainGraph_dhDestroy(SubdomainGraph_dh s)
void SubdomainGraph_dhInit(SubdomainGraph_dh s, int blocks, bool bj, void *A)
void SubdomainGraph_dhCreate(SubdomainGraph_dh *s)
void SubdomainGraph_dhPrintRatios(SubdomainGraph_dh s, FILE *fp)
void SubdomainGraph_dhPrintStats(SubdomainGraph_dh sg, FILE *fp)
void printf_dh(char *fmt,...)
void fprintf_dh(FILE *fp, char *fmt,...)
char msgBuf_dh[MSG_BUF_SIZE_DH]
#define PIVOT_FIX_DEFAULT
void EuclidGetDimensions(void *A, int *beg_row, int *rowsLocal, int *rowsGlobal)
void iluk_seq(Euclid_dh ctx)
void iluk_seq_block(Euclid_dh ctx)
void iluk_mpi_bj(Euclid_dh ctx)
void ilut_seq(Euclid_dh ctx)
void iluk_mpi_pilu(Euclid_dh ctx)
void bicgstab_euclid(Mat_dh A, Euclid_dh ctx, double *x, double *b, int *itsOUT)
void cg_euclid(Mat_dh A, Euclid_dh ctx, double *x, double *b, int *itsOUT)
double timing[TIMING_BINS]