59 ParameterList &trlist = list.sublist(
"Step").sublist(
"Trust Region");
61 state_->searchSize = trlist.get(
"Initial Radius", -1.0);
62 delMax_ = trlist.get(
"Maximum Radius", ROL_INF<Real>());
63 eta0_ = trlist.get(
"Step Acceptance Threshold", 0.05);
64 eta1_ = trlist.get(
"Radius Shrinking Threshold", 0.05);
65 eta2_ = trlist.get(
"Radius Growing Threshold", 0.9);
66 gamma0_ = trlist.get(
"Radius Shrinking Rate (Negative rho)", 0.0625);
67 gamma1_ = trlist.get(
"Radius Shrinking Rate (Positive rho)", 0.25);
68 gamma2_ = trlist.get(
"Radius Growing Rate", 2.5);
69 TRsafe_ = trlist.get(
"Safeguard Size", 100.0);
70 eps_ = TRsafe_*ROL_EPSILON<Real>();
71 interpRad_ = trlist.get(
"Use Radius Interpolation",
false);
72 verbosity_ = trlist.sublist(
"General").get(
"Output Level", 0);
74 storageNM_ = trlist.get(
"Nonmonotone Storage Size", 0);
75 useNM_ = (storageNM_ <= 0 ? false :
true);
77 ROL::ParameterList &lmlist = trlist.sublist(
"SPG");
78 mu0_ = lmlist.get(
"Sufficient Decrease Parameter", 1e-2);
79 spexp_ = lmlist.get(
"Relative Tolerance Exponent", 1.0);
80 spexp_ = std::max(
static_cast<Real
>(1),std::min(spexp_,
static_cast<Real
>(2)));
81 redlim_ = lmlist.sublist(
"Cauchy Point").get(
"Maximum Number of Reduction Steps", 10);
82 explim_ = lmlist.sublist(
"Cauchy Point").get(
"Maximum Number of Expansion Steps", 10);
83 alpha_ = lmlist.sublist(
"Cauchy Point").get(
"Initial Step Size", 1.0);
84 normAlpha_ = lmlist.sublist(
"Cauchy Point").get(
"Normalize Initial Step Size",
false);
85 interpf_ = lmlist.sublist(
"Cauchy Point").get(
"Reduction Rate", 0.1);
86 extrapf_ = lmlist.sublist(
"Cauchy Point").get(
"Expansion Rate", 10.0);
87 qtol_ = lmlist.sublist(
"Cauchy Point").get(
"Decrease Tolerance", 1e-8);
89 lambdaMin_ = lmlist.sublist(
"Solver").get(
"Minimum Spectral Step Size", 1e-8);
90 lambdaMax_ = lmlist.sublist(
"Solver").get(
"Maximum Spectral Step Size", 1e8);
91 gamma_ = lmlist.sublist(
"Solver").get(
"Sufficient Decrease Tolerance", 1e-4);
92 maxSize_ = lmlist.sublist(
"Solver").get(
"Maximum Storage Size", 10);
93 maxit_ = lmlist.sublist(
"Solver").get(
"Iteration Limit", 25);
94 tol1_ = lmlist.sublist(
"Solver").get(
"Absolute Tolerance", 1e-4);
95 tol2_ = lmlist.sublist(
"Solver").get(
"Relative Tolerance", 1e-2);
96 useMin_ = lmlist.sublist(
"Solver").get(
"Use Smallest Model Iterate",
true);
97 useNMSP_ = lmlist.sublist(
"Solver").get(
"Use Nonmonotone Search",
false);
98 useSimpleSPG_ = !lmlist.sublist(
"Solver").get(
"Compute Cauchy Point",
true);
100 ParameterList &glist = list.sublist(
"General");
102 useInexact_.push_back(glist.get(
"Inexact Objective Function",
false));
103 useInexact_.push_back(glist.get(
"Inexact Gradient",
false));
104 useInexact_.push_back(glist.get(
"Inexact Hessian-Times-A-Vector",
false));
106 ParameterList &ilist = trlist.sublist(
"Inexact").sublist(
"Gradient");
107 scale0_ = ilist.get(
"Tolerance Scaling",
static_cast<Real
>(0.1));
108 scale1_ = ilist.get(
"Relative Tolerance",
static_cast<Real
>(2));
110 ParameterList &vlist = trlist.sublist(
"Inexact").sublist(
"Value");
111 scale_ = vlist.get(
"Tolerance Scaling",
static_cast<Real
>(1.e-1));
112 omega_ = vlist.get(
"Exponent",
static_cast<Real
>(0.9));
113 force_ = vlist.get(
"Forcing Sequence Initial Value",
static_cast<Real
>(1.0));
114 updateIter_ = vlist.get(
"Forcing Sequence Update Frequency",
static_cast<int>(10));
115 forceFactor_ = vlist.get(
"Forcing Sequence Reduction Factor",
static_cast<Real
>(0.1));
117 verbosity_ = list.sublist(
"General").get(
"Output Level",0);
118 writeHeader_ = verbosity_ > 2;
120 useSecantPrecond_ = list.sublist(
"General").sublist(
"Secant").get(
"Use as Preconditioner",
false);
121 useSecantHessVec_ = list.sublist(
"General").sublist(
"Secant").get(
"Use as Hessian",
false);
126 model_ = makePtr<TrustRegionModel_U<Real>>(list,secant,mode);
127 if (secant == nullPtr) {
128 esec_ =
StringToESecant(list.sublist(
"General").sublist(
"Secant").get(
"Type",
"Limited-Memory BFGS"));
222 std::ostream &outStream ) {
223 const Real
zero(0), one(1);
225 Real inTol =
static_cast<Real
>(0.1)*ROL_OVERFLOW<Real>(), outTol(inTol);
226 Real ftrial(0), pRed(0), rho(1), q(0);
228 std::vector<std::string> output;
229 initialize(x,g,inTol,obj,bnd,outStream);
230 Ptr<Vector<Real>> gmod = g.
clone();
231 Ptr<Vector<Real>> pwa1 = x.
clone(), pwa2 = x.
clone();
232 Ptr<Vector<Real>> pwa3 = x.
clone(), pwa4 = x.
clone();
233 Ptr<Vector<Real>> pwa5 = x.
clone(), pwa6 = x.
clone();
234 Ptr<Vector<Real>> pwa7 = x.
clone();
235 Ptr<Vector<Real>> dwa1 = g.
clone(), dwa2 = g.
clone();
237 Real rhoNM(0), sigmac(0), sigmar(0);
238 Real fr(state_->value), fc(state_->value), fmin(state_->value);
243 if (verbosity_ > 0) writeOutput(outStream,
true);
245 while (status_->check(*state_)) {
247 model_->setData(obj,*state_->iterateVec,*state_->gradientVec);
251 gmod->set(*state_->gradientVec);
253 dpsg_simple(x,q,*gmod,*state_->iterateVec,state_->searchSize,*model_,
254 *pwa1,*pwa2,*dwa1,outStream);
257 dcauchy(*state_->stepVec,alpha_,q,*state_->iterateVec,
258 state_->gradientVec->dual(),state_->searchSize,
259 *model_,*dwa1,*dwa2,outStream);
260 x.
plus(*state_->stepVec);
266 dpsg(x,q,*gmod,*state_->iterateVec,state_->searchSize,*model_,
267 *pwa1,*pwa2,*pwa3,*pwa4,*pwa5,*pwa6,*pwa7,*dwa1,outStream);
272 state_->stepVec->set(x); state_->stepVec->axpy(-one,*state_->iterateVec);
273 state_->snorm = state_->stepVec->norm();
276 ftrial = computeValue(inTol,outTol,pRed,state_->value,state_->iter,x,*state_->iterateVec,obj);
281 TRUtils::analyzeRatio<Real>(rho,TRflag_,state_->value,ftrial,pRed,eps_,outStream,verbosity_>1);
283 TRUtils::analyzeRatio<Real>(rhoNM,TRflagNM,fr,ftrial,pRed+sigmar,eps_,outStream,verbosity_>1);
284 TRflag_ = (rho < rhoNM ? TRflagNM : TRflag_);
285 rho = (rho < rhoNM ? rhoNM : rho );
292 x.
set(*state_->iterateVec);
296 state_->searchSize = TRUtils::interpolateRadius<Real>(*state_->gradientVec,*state_->stepVec,
297 state_->snorm,pRed,state_->value,ftrial,state_->searchSize,gamma0_,gamma1_,eta2_,
298 outStream,verbosity_>1);
301 state_->searchSize = gamma1_*std::min(state_->snorm,state_->searchSize);
306 state_->value = ftrial;
310 sigmac += pRed; sigmar += pRed;
311 if (ftrial < fmin) { fmin = ftrial; fc = fmin; sigmac =
zero; L = 0; }
314 if (ftrial > fc) { fc = ftrial; sigmac =
zero; }
315 if (L == storageNM_) { fr = fc; sigmar = sigmac; }
319 if (rho >= eta2_) state_->searchSize = std::min(gamma2_*state_->searchSize, delMax_);
321 dwa1->set(*state_->gradientVec);
322 state_->gnorm = computeGradient(x,*state_->gradientVec,*pwa1,state_->searchSize,obj,outStream);
324 state_->iterateVec->set(x);
326 model_->update(x,*state_->stepVec,*dwa1,*state_->gradientVec,
327 state_->snorm,state_->iter);
331 if (verbosity_ > 0) writeOutput(outStream,writeHeader_);
355 std::ostream &outStream) {
356 const Real half(0.5);
358 Real tol = std::sqrt(ROL_EPSILON<Real>());
360 Real gs(0), snorm(0);
362 snorm = dgpstep(s,g,x,-alpha,outStream);
367 model.
hessVec(dwa,s,x,tol); nhess_++;
370 q = half * s.
apply(dwa) + gs;
371 interp = (q > mu0_*gs);
379 snorm = dgpstep(s,g,x,-alpha,outStream);
381 model.
hessVec(dwa,s,x,tol); nhess_++;
384 q = half * s.
apply(dwa) + gs;
385 search = (q > mu0_*gs) && (cnt < redlim_);
397 snorm = dgpstep(s,g,x,-alpha,outStream);
398 if (snorm <= del && cnt < explim_) {
399 model.
hessVec(dwa,s,x,tol); nhess_++;
402 q = half * s.
apply(dwa) + gs;
403 if (q <= mu0_*gs && std::abs(q-qs) > qtol_*std::abs(qs)) {
421 snorm = dgpstep(s,g,x,-alpha,outStream);
423 if (verbosity_ > 1) {
424 outStream <<
" Cauchy point" << std::endl;
425 outStream <<
" Step length (alpha): " << alpha << std::endl;
426 outStream <<
" Step length (alpha*g): " << snorm << std::endl;
427 outStream <<
" Model decrease (pRed): " << -q << std::endl;
429 outStream <<
" Number of extrapolation steps: " << cnt << std::endl;
445 std::ostream &outStream) {
453 const Real half(0.5), one(1), safeguard(1e2*ROL_EPSILON<Real>());
454 Real tol(std::sqrt(ROL_EPSILON<Real>()));
455 Real alpha(1), alphaMax(1), s0s0(0), ss0(0), sHs(0), lambdaTmp(1), snorm(0);
462 Real coeff = one/gmod.
norm();
463 Real lambda = std::max(lambdaMin_,std::min(coeff,lambdaMax_));
465 proj_->project(pwa,outStream); state_->nproj++;
467 Real gs = gmod.
apply(pwa);
468 Real ss = pwa.
dot(pwa);
469 Real gnorm = std::sqrt(ss);
472 const Real gtol = std::min(tol1_,tol2_*gnorm);
475 outStream <<
" Spectral Projected Gradient" << std::endl;
478 while (SPiter_ < maxit_) {
482 model.
hessVec(dwa,pwa,x,tol); nhess_++;
483 sHs = dwa.
apply(pwa);
487 if (gnorm >= del-safeguard) {
489 alphaMax = std::min(one, (-ss0 + std::sqrt(ss0*ss0 - ss*(s0s0-del*del)))/ss);
491 if (sHs <= safeguard)
494 alpha = std::min(alphaMax, -gs/sHs);
497 q += alpha * (gs + half * alpha * sHs);
498 gmod.
axpy(alpha,dwa);
502 pwa1.
set(y); pwa1.
axpy(-one,x);
503 s0s0 = pwa1.
dot(pwa1);
504 snorm = std::sqrt(s0s0);
506 if (verbosity_ > 1) {
507 outStream << std::endl;
508 outStream <<
" Iterate: " << SPiter_ << std::endl;
509 outStream <<
" Spectral step length (lambda): " << lambda << std::endl;
510 outStream <<
" Step length (alpha): " << alpha << std::endl;
511 outStream <<
" Model decrease (pRed): " << -q << std::endl;
512 outStream <<
" Optimality criterion: " << gnorm << std::endl;
513 outStream <<
" Step norm: " << snorm << std::endl;
514 outStream << std::endl;
517 if (snorm >= del - safeguard) { SPflag_ = 2;
break; }
520 lambdaTmp = (sHs <= safeguard ? one/gmod.
norm() : ss/sHs);
521 lambda = std::max(lambdaMin_,std::min(lambdaTmp,lambdaMax_));
523 proj_->project(pwa,outStream); state_->nproj++;
525 gs = gmod.
apply(pwa);
527 gnorm = std::sqrt(ss);
529 if (gnorm <= gtol) { SPflag_ = 0;
break; }
531 SPflag_ = (SPiter_==maxit_) ? 1 : SPflag_;
549 std::ostream &outStream) {
557 const Real
zero(0), half(0.5), one(1), two(2);
558 Real tol(std::sqrt(ROL_EPSILON<Real>()));
559 Real alpha(1), sHs(0), alphaTmp(1), mmax(0), qmin(0), lambdaTmp(1);
560 std::deque<Real> mqueue; mqueue.push_back(q);
562 if (useNMSP_ && useMin_) { qmin = q; ymin.
set(y); }
566 pwa.
set(y); pwa.
axpy(-one,pwa1);
567 dproj(pwa,x,del,pwa2,pwa3,pwa4,pwa5,outStream);
569 Real gnorm = pwa.
norm();
570 const Real gtol = std::min(tol1_,tol2_*gnorm);
573 Real coeff = one/gmod.
norm();
574 Real lambda = std::max(lambdaMin_,std::min(coeff,lambdaMax_));
575 pwa.
set(y); pwa.
axpy(-lambda,pwa1);
576 dproj(pwa,x,del,pwa2,pwa3,pwa4,pwa5,outStream);
578 Real gs = gmod.
apply(pwa);
579 Real ss = pwa.
dot(pwa);
582 outStream <<
" Spectral Projected Gradient" << std::endl;
585 while (SPiter_ < maxit_) {
589 model.
hessVec(dwa,pwa,x,tol); nhess_++;
590 sHs = dwa.
apply(pwa);
594 mmax = *std::max_element(mqueue.begin(),mqueue.end());
595 alphaTmp = (-(one-gamma_)*gs + std::sqrt(std::pow((one-gamma_)*gs,two)-two*sHs*(q-mmax)))/sHs;
600 alpha = (sHs >
zero ? std::min(one,std::max(
zero,alphaTmp)) : one);
603 q += alpha * (gs + half * alpha * sHs);
604 gmod.
axpy(alpha,dwa);
609 if (
static_cast<int>(mqueue.size())==maxSize_) mqueue.pop_front();
611 if (useMin_ && q <= qmin) { qmin = q; ymin.
set(y); }
616 pwa.
set(y); pwa.
axpy(-one,pwa1);
617 dproj(pwa,x,del,pwa2,pwa3,pwa4,pwa5,outStream);
621 if (verbosity_ > 1) {
622 outStream << std::endl;
623 outStream <<
" Iterate: " << SPiter_ << std::endl;
624 outStream <<
" Spectral step length (lambda): " << lambda << std::endl;
625 outStream <<
" Step length (alpha): " << alpha << std::endl;
626 outStream <<
" Model decrease (pRed): " << -q << std::endl;
627 outStream <<
" Optimality criterion: " << gnorm << std::endl;
628 outStream << std::endl;
630 if (gnorm < gtol)
break;
634 lambdaTmp = (sHs == 0 ? coeff : ss/sHs);
635 lambda = std::max(lambdaMin_,std::min(lambdaTmp,lambdaMax_));
636 pwa.
set(y); pwa.
axpy(-lambda,pwa1);
637 dproj(pwa,x,del,pwa2,pwa3,pwa4,pwa5,outStream);
639 gs = gmod.
apply(pwa);
642 if (useNMSP_ && useMin_) { q = qmin; y.
set(ymin); }
643 SPflag_ = (SPiter_==maxit_) ? 1 : 0;
654 std::ostream &outStream)
const {
656 const Real
zero(0), half(0.5), one(1), two(2), three(3);
657 const Real eps(ROL_EPSILON<Real>()), tol0(1e1*eps), fudge(1.0-1e-2*sqrt(eps));
658 Real f0(0), f1(0), fc(0), t0(0), t1(1), tc(0), d1(1), d2(1), tol(1);
659 Real p(0), q(0), r(0), s(0), m(0);
660 int cnt(state_->nproj);
662 proj_->project(y1,outStream); state_->nproj++;
663 pwa.
set(y1); pwa.
axpy(-one,x0);
670 tc = t0; fc = f0; yc.
set(y0);
674 if (std::abs(fc-del) < std::abs(f1-del)) {
675 t0 = t1; t1 = tc; tc = t0;
676 f0 = f1; f1 = fc; fc = f0;
679 tol = two*eps*std::abs(t1) + half*tol0;
681 if (std::abs(m) <= tol) { code = 1;
break; }
682 if ((f1 >= fudge*del && f1 <= del))
break;
683 if (std::abs(d1) < tol || std::abs(f0-del) <= std::abs(f1-del)) {
687 s = (f1-del)/(f0-del);
693 q = (f0-del)/(fc-del);
694 r = (f1-del)/(fc-del);
695 p = s*(two*m*q*(q-r)-(t1-t0)*(r-one));
696 q = (q-one)*(r-one)*(s-one);
698 if (p >
zero) q = -q;
702 if (two*p < three*m*q-std::abs(tol*q) && p < std::abs(half*s*q)) {
709 t0 = t1; f0 = f1; y0.
set(y1);
710 if (std::abs(d2) > tol) t1 += d2;
711 else if (m >
zero) t1 += tol;
714 proj_->project(y1,outStream); state_->nproj++;
715 pwa.
set(y1); pwa.
axpy(-one,x0);
717 if ((f1 > del && fc > del) || (f1 <= del && fc <= del)) {
718 tc = t0; fc = f0; yc.
set(y0);
722 if (code==1 && f1>del) x.
set(yc);
724 if (verbosity_ > 1) {
725 outStream << std::endl;
726 outStream <<
" Trust-Region Subproblem Projection" << std::endl;
727 outStream <<
" Number of polyhedral projections: " << state_->nproj-cnt << std::endl;
728 if (code == 1 && f1 > del) {
729 outStream <<
" Transformed Multiplier: " << tc << std::endl;
730 outStream <<
" Dual Residual: " << fc-del << std::endl;
733 outStream <<
" Transformed Multiplier: " << t1 << std::endl;
734 outStream <<
" Dual Residual: " << f1-del << std::endl;
736 outStream <<
" Exit Code: " << code << std::endl;
737 outStream << std::endl;
917 std::stringstream hist;
918 if (verbosity_ > 1) {
919 hist << std::string(114,
'-') << std::endl;
920 hist <<
" SPG trust-region method status output definitions" << std::endl << std::endl;
921 hist <<
" iter - Number of iterates (steps taken)" << std::endl;
922 hist <<
" value - Objective function value" << std::endl;
923 hist <<
" gnorm - Norm of the gradient" << std::endl;
924 hist <<
" snorm - Norm of the step (update to optimization vector)" << std::endl;
925 hist <<
" delta - Trust-Region radius" << std::endl;
926 hist <<
" #fval - Number of times the objective function was evaluated" << std::endl;
927 hist <<
" #grad - Number of times the gradient was computed" << std::endl;
928 hist <<
" #hess - Number of times the Hessian was applied" << std::endl;
929 hist <<
" #proj - Number of times the projection was computed" << std::endl;
931 hist <<
" tr_flag - Trust-Region flag" << std::endl;
937 hist <<
" iterSPG - Number of Spectral Projected Gradient iterations" << std::endl << std::endl;
938 hist <<
" flagSPG - Trust-Region Truncated CG flag" << std::endl;
939 hist <<
" 0 - Converged" << std::endl;
940 hist <<
" 1 - Iteration Limit Exceeded" << std::endl;
941 hist << std::string(114,
'-') << std::endl;
944 hist << std::setw(6) << std::left <<
"iter";
945 hist << std::setw(15) << std::left <<
"value";
946 hist << std::setw(15) << std::left <<
"gnorm";
947 hist << std::setw(15) << std::left <<
"snorm";
948 hist << std::setw(15) << std::left <<
"delta";
949 hist << std::setw(10) << std::left <<
"#fval";
950 hist << std::setw(10) << std::left <<
"#grad";
951 hist << std::setw(10) << std::left <<
"#hess";
952 hist << std::setw(10) << std::left <<
"#proj";
953 hist << std::setw(10) << std::left <<
"tr_flag";
954 hist << std::setw(10) << std::left <<
"iterSPG";
955 hist << std::setw(10) << std::left <<
"flagSPG";
969 std::stringstream hist;
970 hist << std::scientific << std::setprecision(6);
971 if ( state_->iter == 0 ) writeName(os);
972 if ( write_header ) writeHeader(os);
973 if ( state_->iter == 0 ) {
975 hist << std::setw(6) << std::left << state_->iter;
976 hist << std::setw(15) << std::left << state_->value;
977 hist << std::setw(15) << std::left << state_->gnorm;
978 hist << std::setw(15) << std::left <<
"---";
979 hist << std::setw(15) << std::left << state_->searchSize;
980 hist << std::setw(10) << std::left << state_->nfval;
981 hist << std::setw(10) << std::left << state_->ngrad;
982 hist << std::setw(10) << std::left << nhess_;
983 hist << std::setw(10) << std::left << state_->nproj;
984 hist << std::setw(10) << std::left <<
"---";
985 hist << std::setw(10) << std::left <<
"---";
986 hist << std::setw(10) << std::left <<
"---";
991 hist << std::setw(6) << std::left << state_->iter;
992 hist << std::setw(15) << std::left << state_->value;
993 hist << std::setw(15) << std::left << state_->gnorm;
994 hist << std::setw(15) << std::left << state_->snorm;
995 hist << std::setw(15) << std::left << state_->searchSize;
996 hist << std::setw(10) << std::left << state_->nfval;
997 hist << std::setw(10) << std::left << state_->ngrad;
998 hist << std::setw(10) << std::left << nhess_;
999 hist << std::setw(10) << std::left << state_->nproj;
1000 hist << std::setw(10) << std::left << TRflag_;
1001 hist << std::setw(10) << std::left << SPiter_;
1002 hist << std::setw(10) << std::left << SPflag_;