cMHN 1.1
C++ library for learning MHNs with pRC
Loading...
Searching...
No Matches
lbfgsb.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: BSD-2-Clause
2
3// References:
4// Authors: Richard H. Byrd, Peihuang Lu, Jorge Nocedal, Ciyou Zhu
5// Title: A Limited Memory Algorithm for Bound Constrained Optimization
6// Year: 1995
7// URL: https://doi.org/10.1137/0916069
8
9#ifndef pRC_ALGORITHMS_OPTIMIZER_LBFGSB_H
10#define pRC_ALGORITHMS_OPTIMIZER_LBFGSB_H
11
12#include <prc/config.hpp>
28
29namespace pRC::Optimizer
30{
31 template<class LS = LineSearch::MoreThuente, Size M = 5>
32 class LBFGSB
33 {
34 private:
35 static constexpr Size defaultMaxIterations()
36 {
37 return 1000;
38 }
39
40 template<class X, class G, class L, class U, class T>
41 static constexpr auto projectedGradientConverged(X const &x, G const &g,
42 L const &lowerBound, U const &upperBound, T const &tolerance)
43 {
44 auto const projG = where(g < zero(), max(x - upperBound, g),
45 min(x - lowerBound, g));
46
47 auto const infNorm = norm<2, 0>(projG)();
48
50 }
51
52 template<class F, class T>
53 static constexpr auto valueConverged(F const &f0, F const &f,
54 T const &tolerance)
55 {
56 auto const scale = max(abs(f0), abs(f), identity<T>());
57
58 return delta(f0, f) <= tolerance * scale;
59 }
60
61 template<class F>
62 static constexpr auto valueDiverged(F const &f)
63 {
64 return !isFinite(f);
65 }
66
67 template<class F>
68 static constexpr auto valueIncreased(F const &f0, F const &f)
69 {
70 return f > f0;
71 }
72
73 template<class S, class Y, class STY, class J, class T>
74 static constexpr auto resetHistory(S &s, Y &y, STY &sTy, J &j, T &theta)
75 {
76 s.clear();
77 y.clear();
78 sTy = zero();
79 j = zero();
80 theta = identity();
81
82 return;
83 }
84
85 template<class T>
86 static constexpr auto applyMiddleMatrix(Tensor<T, M, M> const &sTy,
87 Tensor<T, M, M> const &j, Tensor<T, 2 * M> const &v,
88 Size const size)
89 {
90 auto v1 = block<2>(v, 0);
91 auto v2 = block<2>(v, 1);
92
94 auto p1 = block<2>(p, 0);
95 auto p2 = block<2>(p, 1);
96
98 p2 = v2;
99 for(Index k = 0; k < size; ++k)
100 {
101 for(Index i = k + 1; i < size; ++i)
102 {
103 p2(i) -= sTy(i, k) * p1(k);
104 }
105 }
106
109
110 for(Index i = 0; i < size; ++i)
111 {
112 auto const scale = rcp(sTy(i, i));
113 for(Index k = i + 1; k < size; ++k)
114 {
115 p1(i) += sTy(k, i) * p2(k) * scale;
116 }
117 }
118
119 return p;
120 }
121
122 template<class X, class G, class LB, class UB, class T, class S,
123 class Y, class STY, class J, class C>
124 static constexpr auto computeGeneralizedCauchyPoint(X const &x,
125 G const &g, LB const &lowerBound, UB const &upperBound,
126 T const &theta, S const &s, Y const &y, STY const &sTy, J const &j,
127 C &c)
128 {
129 using TX = typename X::Type;
130
131 Tensor const t = where(g == zero(),
133 where(g < zero(), (x - upperBound) / g, (x - lowerBound) / g));
134
135 Tensor d = where(t == zero(), zero<G>(), -g);
136
137 Array<Index, X::size()> sortedIndices;
138 for(Index i = 0; i < X::size(); ++i)
139 {
140 sortedIndices[i] = i;
141 }
142
143 sort(
144 [&t](auto const i, auto const j)
145 {
146 return t[i] < t[j];
147 },
149
151 auto p0 = block<2>(p, 0);
152 auto p1 = block<2>(p, 1);
153 for(Index m = 0; m < s.size(); ++m)
154 {
155 p0(m) = scalarProduct(y.front(m), d)();
156 p1(m) = theta * scalarProduct(s.front(m), d)();
157 }
158
159 auto f1 = -norm<2, 1>(d)();
160
161 auto const mp = applyMiddleMatrix(sTy, j, p, s.size());
162 auto f2 = -theta * f1 - scalarProduct(p, mp)();
163
164 auto deltaTMin = -f1 / f2;
165
166 Index sorted = 0;
167 auto b = sortedIndices[sorted];
168 while(t[b] <= zero())
169 {
170 d[b] = zero();
171 ++sorted;
173 }
174
175 auto deltaT = t[b];
176
177 Tensor xCP = x;
178 c = zero();
179 TX t0 = zero();
180 while(deltaTMin >= deltaT)
181 {
182 if(d[b] > zero())
183 {
184 xCP[b] = upperBound[b];
185 }
186
187 if(d[b] < zero())
188 {
189 xCP[b] = lowerBound[b];
190 }
191
192 auto const z = xCP[b] - x[b];
193
194 c += deltaT * p;
195
196 f1 += deltaT * f2 + square(d[b]) - theta * d[b] * z;
197 f2 -= theta * square(d[b]);
198
200 auto w0 = block<2>(w, 0);
201 auto w1 = block<2>(w, 1);
202 for(Index m = 0; m < s.size(); ++m)
203 {
204 w0(m) = y.front(m)[b];
205 w1(m) = theta * s.front(m)[b];
206 }
207
208 auto wM = applyMiddleMatrix(sTy, j, w, s.size());
209
210 f1 += d[b] * scalarProduct(wM, c)();
211 f2 += identity<TX>(2) * d[b] * scalarProduct(wM, p)() -
212 square(d[b]) * scalarProduct(wM, w)();
213
214 p -= d[b] * wM;
215
216 deltaTMin = -f1 / f2;
217 d[b] = zero();
218 t0 = t[b];
219 ++sorted;
220 if(sorted < X::size())
221 {
223 deltaT = t[b] - t0;
224 }
225 else
226 {
227 break;
228 }
229 }
230
232 c += deltaTMin * p;
233
234 auto const scale = t0 + deltaTMin;
235
236 return xCP += scale * d;
237 }
238
239 template<class X, class CP, class G, class LB, class UB, class T,
240 class S, class Y, class STY, class J, class C>
241 static constexpr auto minimizeSubspace(X const &x, CP &xCP, G const &g,
242 LB const &lowerBound, UB const &upperBound, T const &theta,
243 S const &s, Y const &y, STY const &sTy, J const &j, C const &c)
244 {
245 using TX = typename X::Type;
246
247 Tensor r = -theta * (xCP - x) - g;
248 Tensor const mc = applyMiddleMatrix(sTy, j, c, s.size());
249 for(Index m = 0; m < s.size(); ++m)
250 {
251 r += y.front(m) * mc(m);
252 r += s.front(m) * theta * mc(M + m);
253 }
254
255 Size nFree = 0;
256 Array<Index, X::size()> freeIndices;
257 for(Index i = 0; i < X::size(); ++i)
258 {
259 if((xCP != upperBound && xCP != lowerBound)[i])
260 {
261 freeIndices[nFree++] = i;
262 }
263 }
264
265 decltype(r) zTr = zero();
266 for(Index f = 0; f < nFree; ++f)
267 {
268 zTr[f] = r[freeIndices[f]];
269 }
270
272 for(Index m = 0; m < s.size(); ++m)
273 {
275 for(Index f = 0; f < nFree; ++f)
276 {
277 zTs.back()[f] = s.front(m)[freeIndices[f]];
278 }
279 }
280
282 for(Index m = 0; m < y.size(); ++m)
283 {
285 for(Index f = 0; f < nFree; ++f)
286 {
287 zTy.back()[f] = y.front(m)[freeIndices[f]];
288 }
289 }
290
292 auto l00 = block<2, 2>(l, 0, 0);
293 auto l10 = block<2, 2>(l, 1, 0);
294 auto l01 = block<2, 2>(l, 0, 1);
295 auto l11 = block<2, 2>(l, 1, 1);
296
297 {
299 for(Index m = 0; m < zTy.size(); ++m)
300 {
301 for(Index n = 0; n < zTy.size(); ++n)
302 {
303 yTzzTy(m, n) =
304 scalarProduct(zTy.front(m), zTy.front(n))();
305 }
306 }
307
309 }
310
311 l10 = zero();
312
313 {
315 for(Index m = 0; m < zTy.size(); ++m)
316 {
317 for(Index n = 0; n < zTs.size(); ++n)
318 {
319 yTzzTs(m, n) =
320 scalarProduct(zTy.front(m), zTs.front(n))();
321 }
322 }
323
324 auto const rhs =
327 }
328
329 {
331 for(Index m = 0; m < s.size(); ++m)
332 {
333 for(Index n = 0; n < s.size(); ++n)
334 {
335 sTaaTs(m, n) = (scalarProduct(s.front(m), s.front(n)) -
336 scalarProduct(zTs.front(m), zTs.front(n)))();
337 }
338 }
339
341 }
342
344 auto rTw0 = block<2>(rTw, 0);
345 auto rTw1 = block<2>(rTw, 1);
346
347 for(Index m = 0; m < zTy.size(); ++m)
348 {
349 rTw0(m) = scalarProduct(zTr, zTy.front(m))();
350 }
351 for(Index m = 0; m < zTs.size(); ++m)
352 {
353 rTw1(m) = theta * scalarProduct(zTr, zTs.front(m))();
354 }
355
357 rTw0 = -rTw0;
359
360 Tensor d = zero<X>();
361 for(Index m = 0; m < zTy.size(); ++m)
362 {
363 d += zTy.front(m) * rTw0(m);
364 d += zTs.front(m) * rTw1(m) * theta;
365 }
366
367 auto const rTheta = rcp(theta);
368 d = (zTr + d * rTheta) * rTheta;
369
370 {
372
373 for(Index f = 0; f < nFree; ++f)
374 {
375 auto const i = freeIndices[f];
376 xCP[i] =
377 min(upperBound[i], max(lowerBound[i], xCP[i] + d[f]));
378 }
379
380 xCP -= x;
381 auto const dd = scalarProduct(xCP, g)();
382
383 if(dd < zero())
384 {
385 return dd;
386 }
387
388 xCP = xCPcopy;
389 }
390
391 TX alpha = identity();
392 for(Index f = 0; f < nFree; ++f)
393 {
394 auto const i = freeIndices[f];
395 if(d[f] > zero())
396 {
397 alpha = min(alpha, (upperBound[i] - xCP[i]) / d[f]);
398 }
399 if(d[f] < zero())
400 {
401 alpha = min(alpha, (lowerBound[i] - xCP[i]) / d[f]);
402 }
403 }
404
405 for(Index f = 0; f < nFree; ++f)
406 {
407 xCP[freeIndices[f]] += alpha * d[f];
408 }
409
410 xCP -= x;
411 return scalarProduct(xCP, g)();
412 }
413
414 template<class X, class P, class LB, class UB>
415 static constexpr auto getLineSearchParameters(X const &x, P const &p,
416 LB const &lowerBound, UB const &upperBound)
417 {
418 using TX = typename X::Type;
419
421
422 for(Index i = 0; i < X::size(); ++i)
423 {
424 if(p[i] < zero())
425 {
426 auto const d = lowerBound[i] - x[i];
427 if(d > zero())
428 {
429 alphaMax = zero();
430 }
431 else
432 {
433 if(p[i] * alphaMax < d)
434 {
435 alphaMax = d / p[i];
436 }
437 }
438 }
439 if(p[i] > zero())
440 {
441 auto const d = upperBound[i] - x[i];
442 if(d < zero())
443 {
444 alphaMax = zero();
445 }
446 else
447 {
448 if(p[i] * alphaMax > d)
449 {
450 alphaMax = d / p[i];
451 }
452 }
453 }
454 }
455
456 auto const alpha = min(rcp(norm<2>(p))(), alphaMax);
457
459 }
460
461 public:
462 constexpr LBFGSB(LS const &lineSearch,
463 Size const maxIterations = defaultMaxIterations())
464 : mLineSearch(lineSearch)
465 , mMaxIterations(maxIterations)
466 {
467 }
468
469 constexpr LBFGSB(Size const maxIterations = defaultMaxIterations())
470 : mMaxIterations(maxIterations)
471 {
472 }
473
474 constexpr auto &lineSearch() const
475 {
476 return mLineSearch;
477 }
478
479 constexpr auto maxIterations() const
480 {
481 return mMaxIterations;
482 }
483
484 template<class XX, class RX = RemoveReference<XX>,
485 class TX = typename RX::Type, class VX = typename TX::Value,
486 If<IsTensorish<RX>> = 0,
487 class RXE = RemoveConstReference<ResultOf<Eval, XX>>, class FF,
488 If<IsInvocable<FF, RXE const &, RXE &>> = 0,
489 If<IsFloat<ResultOf<FF, RXE const &, RXE &>>> = 0, class FC,
490 If<IsInvocable<FC, RXE>> = 0,
491 class XL = decltype(unit<RX>(NumericLimits<VX>::lowest())),
492 class RL = RemoveReference<XL>, class TL = typename RL::Type,
493 class VL = typename TL::Value, If<IsTensorish<RL>> = 0,
494 class XU = decltype(unit<RX>(NumericLimits<VX>::max())),
495 class RU = RemoveReference<XU>, class TU = typename RU::Type,
496 class VU = typename TU::Value, If<IsTensorish<RU>> = 0,
497 class VT = Common<VX, VL, VU>,
498 If<All<IsFloat<VX>, IsFloat<VT>>> = 0,
499 If<IsSame<typename RX::Dimension, typename RL::Dimension,
500 typename RU::Dimension>> = 0,
501 If<IsSame<typename RX::Sizes, typename RL::Sizes,
502 typename RU::Sizes>> = 0,
503 If<IsInvocable<LS, RXE &, ResultOf<FF, RXE const &, RXE &> &, RXE &,
504 VX &, FF, RXE(RXE const &), RXE const &, VX, VX, VX>> = 0>
505 inline constexpr auto operator()(XX &&x0, FF &&function, FC &&callback,
506 VT const &tolerance = NumericLimits<VT>::tolerance(),
509 {
510 if constexpr(cDebugLevel >= DebugLevel::High)
511 {
513 {
514 Logging::error("L-BFGS-B: Lower bound > upper bound.");
515 }
516 }
517
518 if constexpr(cDebugLevel >= DebugLevel::Low)
519 {
521 {
523 "L-BFGS-B: Initial parameters not in range "
524 "[lowerBound, "
525 "upperBound]");
526 }
527 }
528
529 decltype(auto) x =
530 copy<!(!IsReference<XX>() && !IsConst<RX>())>(eval(x0));
531
532 RXE g;
533 auto f = function(x, g);
534
535 Logging::info("L-BFGS-B initial f(x) =", f);
536
537 if(projectedGradientConverged(x, g, lowerBound, upperBound,
538 tolerance))
539 {
540 return x;
541 }
542
547 TX theta = identity();
548
549 for(Index iteration = 0;;)
550 {
552
553 Tensor xCP = computeGeneralizedCauchyPoint(x, g, lowerBound,
554 upperBound, theta, s, y, sTy, j, c);
555
556 auto d = minimizeSubspace(x, xCP, g, lowerBound, upperBound,
557 theta, s, y, sTy, j, c);
558
559 if(d >= zero())
560 {
561 resetHistory(s, y, sTy, j, theta);
562 continue;
563 }
564
565 auto const &p = xCP;
566 auto [alphaInit, alphaMin, alphaMax] =
567 getLineSearchParameters(x, p, lowerBound, upperBound);
568
569 auto const f0 = f;
570 auto const g0 = g;
571 auto const d0 = d;
572 auto const x0 = x;
573
574 auto alpha = lineSearch()(
575 x, f, g, d, function,
576 [&lowerBound, &upperBound](auto &&x)
577 {
578 return min(upperBound,
579 max(lowerBound, forward<decltype(x)>(x)));
580 },
582
583 callback(x);
584
585 if(valueDiverged(f))
586 {
587 Logging::info("L-BFGS-B diverged at f(x) =", f);
588 x = x0;
589 break;
590 }
591
592 if(valueIncreased(f))
593 {
594 Logging::info("L-BFGS-B made no further progress at f(x) =",
595 f);
596 x = x0;
597 break;
598 }
599
600 if(++iteration; !(iteration < maxIterations()))
601 {
602 Logging::info("L-BFGS-B max iterations reached at f(x) =",
603 f);
604 break;
605 }
606
607 if(valueConverged(f0, f, tolerance))
608 {
609 Logging::info("L-BFGS-B converged at f(x) =", f);
610 break;
611 }
612
613 if(projectedGradientConverged(x, g, lowerBound, upperBound,
614 tolerance))
615 {
616 Logging::info("L-BFGS-B converged at f(x) =", f);
617 break;
618 }
619
620 Logging::info("L-BFGS-B current f(x) =", f);
621
622 auto const skTyk = (d - d0) * alpha;
623
625 {
626 continue;
627 }
628
629 s.pushBack(alpha * p);
630 y.pushBack(g - g0);
631
632 theta = norm<2, 1>(y.back())() / skTyk;
633
634 for(Index n = 0; n < y.size(); ++n)
635 {
636 for(Index m = 0; m < s.size(); ++m)
637 {
638 sTy(m, n) = scalarProduct(s.front(m), y.front(n))();
639 }
640 }
641
642 j = zero();
643 for(Index m = 0; m < s.size(); ++m)
644 {
645 for(Index n = m; n < s.size(); ++n)
646 {
647 j(m, n) =
648 theta * scalarProduct(s.front(m), s.front(n))();
649 for(Index k = 0; k < m; ++k)
650 {
651 j(m, n) += sTy(m, k) * sTy(n, k) / sTy(k, k);
652 }
653 }
654 }
655
657 }
658
659 callback(x);
660
661 if constexpr(IsReference<decltype(x)>())
662 {
663 return forward<XX>(x0);
664 }
665 else
666 {
667 return x;
668 }
669 }
670
671 private:
672 LS const mLineSearch;
673 Size const mMaxIterations;
674 };
675}
676#endif // pRC_ALGORITHMS_OPTIMIZER_LBFGSB_H
Definition deque.hpp:15
constexpr auto size() const
Definition deque.hpp:28
constexpr decltype(auto) front(Index const position=0) &&
Definition deque.hpp:33
constexpr auto pushBack(R const &element) &&
Definition deque.hpp:133
constexpr auto emplaceBack(Args &&...args) &&
Definition deque.hpp:163
Definition lbfgsb.hpp:33
constexpr LBFGSB(Size const maxIterations=defaultMaxIterations())
Definition lbfgsb.hpp:469
constexpr LBFGSB(LS const &lineSearch, Size const maxIterations=defaultMaxIterations())
Definition lbfgsb.hpp:462
constexpr auto & lineSearch() const
Definition lbfgsb.hpp:474
constexpr auto maxIterations() const
Definition lbfgsb.hpp:479
constexpr auto operator()(XX &&x0, FF &&function, FC &&callback, VT const &tolerance=NumericLimits< VT >::tolerance(), XL &&lowerBound=unit< RL >(NumericLimits< VL >::lowest()), XU &&upperBound=unit< RU >(NumericLimits< VU >::max())) const
Definition lbfgsb.hpp:505
Definition tensor.hpp:28
TN::Subscripts S
Definition externs_nonTT.hpp:9
pRC::Float<> T
Definition externs_nonTT.hpp:1
static void info(Xs &&...args)
Definition log.hpp:27
static void error(Xs &&...args)
Definition log.hpp:14
Definition bfgs.hpp:17
static constexpr auto extractDiagonal(X &&a)
Definition extract_diagonal.hpp:17
static constexpr auto isFinite(T const &a)
Definition is_finite.hpp:13
static constexpr X eval(X &&a)
Definition eval.hpp:11
static constexpr X min(X &&a)
Definition min.hpp:13
static constexpr auto makeConstantSequence()
Definition sequence.hpp:402
static constexpr auto diagonal(X &&a)
Definition diagonal.hpp:16
Size Index
Definition type_traits.hpp:21
static constexpr auto rcp(Complex< T > const &b)
Definition rcp.hpp:13
std::size_t Size
Definition type_traits.hpp:20
static constexpr auto cholesky(X &&a)
Definition cholesky.hpp:24
static constexpr auto zero()
Definition zero.hpp:12
static constexpr auto transpose(JacobiRotation< T > const &a)
Definition jacobi_rotation.hpp:319
static constexpr T where(TE const e, T &&a, T &&b)
Definition where.hpp:12
static constexpr void sort(C const &compare, T &a, Size const k=T::size(), Size const d=0)
Definition sort.hpp:15
static constexpr auto square(Complex< T > const &a)
Definition square.hpp:14
std::is_reference< T > IsReference
Definition type_traits.hpp:47
static constexpr auto strictlyLowerTriangular(X &&a)
Definition strictly_lower_triangular.hpp:15
static constexpr auto delta(Complex< TA > const &a, Complex< TB > const &b)
Definition delta.hpp:12
static constexpr auto abs(Complex< T > const &a)
Definition abs.hpp:12
static constexpr Conditional< IsSatisfied< C >, RemoveConstReference< X >, X > copy(X &&a)
Definition copy.hpp:13
constexpr auto cDebugLevel
Definition config.hpp:46
Conditional< IsSatisfied<((Ns *... *1) *sizeof(T) > cHugepageSizeByte)>, HeapArray< T, Ns... >, StackArray< T, Ns... > > Array
Definition type_traits.hpp:58
static constexpr auto scalarProduct(Complex< TA > const &a, Complex< TB > const &b)
Definition scalar_product.hpp:13
static constexpr auto identity()
Definition identity.hpp:12
static constexpr X max(X &&a)
Definition max.hpp:13
Definition limits.hpp:13