9#ifndef pRC_ALGORITHMS_OPTIMIZER_LBFGSB_H
10#define pRC_ALGORITHMS_OPTIMIZER_LBFGSB_H
23 template<
class LS = LineSearch::MoreThuente, Size M = 5>
27 static constexpr Size defaultMaxIterations()
32 template<
class X,
class G,
class L,
class U,
class T>
33 static constexpr auto projectedGradientConverged(X
const &
x, G
const &g,
34 L
const &lowerBound, U
const &upperBound,
T const &tolerance)
37 min(
x - lowerBound, g));
41 return infNorm <= tolerance * identity<T>(1e-3);
44 template<
class F,
class T>
45 static constexpr auto valueConverged(F
const &f0, F
const &f,
50 return delta(f0, f) <= tolerance * scale;
54 static constexpr auto valueDiverged(F
const &f)
60 static constexpr auto valueIncreased(F
const &f0, F
const &f)
65 template<
class S,
class Y,
class STY,
class J,
class T>
66 static constexpr auto resetHistory(
S &s, Y &
y, STY &sTy, J &j,
T &theta)
91 for(
Index k = 0; k < size; ++k)
95 p2(
i) -= sTy(
i, k) * p1(k);
104 auto const scale =
rcp(sTy(
i,
i));
105 for(
Index k =
i + 1; k < size; ++k)
107 p1(
i) += sTy(k,
i) * p2(k) * scale;
114 template<
class X,
class G,
class LB,
class UB,
class T,
class S,
115 class Y,
class STY,
class J,
class C>
116 static constexpr auto computeGeneralizedCauchyPoint(X
const &
x,
117 G
const &g, LB
const &lowerBound, UB
const &upperBound,
118 T const &theta,
S const &s, Y
const &
y, STY
const &sTy, J
const &j,
121 using TX =
typename X::Type;
125 where(g <
zero(), (
x - upperBound) / g, (
x - lowerBound) / g));
130 for(
Index i = 0;
i < X::size(); ++
i)
132 sortedIndices[
i] =
i;
136 [&t](
auto const i,
auto const j)
145 for(
Index m = 0; m < s.size(); ++m)
153 auto const mp = applyMiddleMatrix(sTy, j,
p, s.size());
156 auto deltaTMin = -f1 / f2;
159 auto b = sortedIndices[sorted];
160 while(t[b] <=
zero())
164 b = sortedIndices[sorted];
172 while(deltaTMin >= deltaT)
176 xCP[b] = upperBound[b];
181 xCP[b] = lowerBound[b];
184 auto const z = xCP[b] -
x[b];
188 f1 += deltaT * f2 +
square(d[b]) - theta * d[b] *
z;
189 f2 -= theta *
square(d[b]);
194 for(
Index m = 0; m < s.size(); ++m)
196 w0(m) =
y.front(m)[b];
197 w1(m) = theta * s.front(m)[b];
200 auto wM = applyMiddleMatrix(sTy, j, w, s.
size());
208 deltaTMin = -f1 / f2;
212 if(sorted < X::size())
214 b = sortedIndices[sorted];
226 auto const scale = t0 + deltaTMin;
228 return xCP += scale * d;
231 template<
class X,
class CP,
class G,
class LB,
class UB,
class T,
232 class S,
class Y,
class STY,
class J,
class C>
233 static constexpr auto minimizeSubspace(X
const &
x, CP &xCP, G
const &g,
234 LB
const &lowerBound, UB
const &upperBound,
T const &theta,
235 S const &s, Y
const &
y, STY
const &sTy, J
const &j, C
const &c)
237 using TX =
typename X::Type;
239 Tensor r = -theta * (xCP -
x) - g;
240 Tensor const mc = applyMiddleMatrix(sTy, j, c, s.size());
241 for(
Index m = 0; m < s.size(); ++m)
243 r +=
y.front(m) * mc(m);
244 r += s.front(m) * theta * mc(M + m);
249 for(
Index i = 0;
i < X::size(); ++
i)
251 if((xCP != upperBound && xCP != lowerBound)[
i])
253 freeIndices[nFree++] =
i;
257 decltype(r) zTr =
zero();
258 for(
Index f = 0; f < nFree; ++f)
260 zTr[f] = r[freeIndices[f]];
264 for(
Index m = 0; m < s.size(); ++m)
267 for(
Index f = 0; f < nFree; ++f)
269 zTs.
back()[f] = s.front(m)[freeIndices[f]];
274 for(
Index m = 0; m <
y.size(); ++m)
277 for(
Index f = 0; f < nFree; ++f)
279 zTy.
back()[f] =
y.front(m)[freeIndices[f]];
323 for(
Index m = 0; m < s.size(); ++m)
325 for(
Index n = 0; n < s.size(); ++n)
355 d += zTy.
front(m) * rTw0(m);
356 d += zTs.
front(m) * rTw1(m) * theta;
359 auto const rTheta =
rcp(theta);
360 d = (zTr + d * rTheta) * rTheta;
365 for(
Index f = 0; f < nFree; ++f)
367 auto const i = freeIndices[f];
369 min(upperBound[
i],
max(lowerBound[
i], xCP[
i] + d[f]));
384 for(
Index f = 0; f < nFree; ++f)
386 auto const i = freeIndices[f];
389 alpha =
min(alpha, (upperBound[
i] - xCP[
i]) / d[f]);
393 alpha =
min(alpha, (lowerBound[
i] - xCP[
i]) / d[f]);
397 for(
Index f = 0; f < nFree; ++f)
399 xCP[freeIndices[f]] += alpha * d[f];
406 template<
class X,
class P,
class LB,
class UB>
407 static constexpr auto getLineSearchParameters(X
const &
x, P
const &
p,
408 LB
const &lowerBound, UB
const &upperBound)
410 using TX =
typename X::Type;
414 for(
Index i = 0;
i < X::size(); ++
i)
418 auto const d = lowerBound[
i] -
x[
i];
425 if(
p[
i] * alphaMax < d)
433 auto const d = upperBound[
i] -
x[
i];
440 if(
p[
i] * alphaMax > d)
473 return mMaxIterations;
476 template<
class XX,
class FF,
class FC,
488 (RX::Dimension == RU::Dimension) &&
493 RXE(RXE
const &), RXE
const &, VX, VX, VX>
494 inline constexpr auto operator()(XX &&x0, FF &&function, FC &&callback,
499 using TX =
typename RX::Type;
514 "L-BFGS-B: Initial parameters not in range "
524 auto f = function(
x, g);
528 if(projectedGradientConverged(
x, g, lowerBound, upperBound,
540 for(
Index iteration = 0;;)
544 Tensor xCP = computeGeneralizedCauchyPoint(
x, g, lowerBound,
545 upperBound, theta, s,
y, sTy, j, c);
547 auto d = minimizeSubspace(
x, xCP, g, lowerBound, upperBound,
548 theta, s,
y, sTy, j, c);
552 resetHistory(s,
y, sTy, j, theta);
557 auto [alphaInit, alphaMin, alphaMax] =
558 getLineSearchParameters(
x,
p, lowerBound, upperBound);
566 x, f, g, d, function,
567 [&lowerBound, &upperBound](
auto &&
x)
569 return min(upperBound,
570 max(lowerBound, forward<
decltype(
x)>(
x)));
572 p, alphaInit, alphaMin, alphaMax);
583 if(valueIncreased(f))
598 if(valueConverged(f0, f, tolerance))
604 if(projectedGradientConverged(
x, g, lowerBound, upperBound,
613 auto const skTyk = (d - d0) * alpha;
625 for(
Index n = 0; n <
y.size(); ++n)
640 for(
Index k = 0; k < m; ++k)
642 j(m, n) += sTy(m, k) * sTy(n, k) / sTy(k, k);
654 return forward<XX>(x0);
663 LS
const mLineSearch;
664 Size const mMaxIterations;
constexpr auto emplaceBack(Args &&...args) &&
Definition deque.hpp:166
constexpr auto size() const
Definition deque.hpp:28
constexpr decltype(auto) front(Index const position=0) &&
Definition deque.hpp:33
constexpr decltype(auto) back(Index const position=0) &&
Definition deque.hpp:53
constexpr auto pushBack(R const &element) &&
Definition deque.hpp:135
constexpr LBFGSB(Size const maxIterations=defaultMaxIterations())
Definition lbfgsb.hpp:461
constexpr LBFGSB(LS const &lineSearch, Size const maxIterations=defaultMaxIterations())
Definition lbfgsb.hpp:454
constexpr auto & lineSearch() const
Definition lbfgsb.hpp:466
constexpr auto operator()(XX &&x0, FF &&function, FC &&callback, VT const &tolerance=NumericLimits< VT >::tolerance(), XL &&lowerBound=unit< RL >(NumericLimits< VL >::lowest()), XU &&upperBound=unit< RU >(NumericLimits< VU >::max())) const
Definition lbfgsb.hpp:494
constexpr auto maxIterations() const
Definition lbfgsb.hpp:471
static constexpr auto size()
Definition tensor.hpp:39
Definition concepts.hpp:25
Definition concepts.hpp:31
Definition concepts.hpp:19
Definition concepts.hpp:28
Definition declarations.hpp:27
Definition declarations.hpp:45
TN::Subscripts S
Definition externs_nonTT.hpp:9
pRC::Float<> T
Definition externs_nonTT.hpp:1
int i
Definition gmock-matchers-comparisons_test.cc:603
Uncopyable z
Definition gmock-matchers-containers_test.cc:378
const double y
Definition gmock-matchers-containers_test.cc:377
int x
Definition gmock-matchers-containers_test.cc:376
const char * p
Definition gmock-matchers-containers_test.cc:379
static void info(Xs &&...args)
Definition log.hpp:27
static void error(Xs &&...args)
Definition log.hpp:14
static constexpr auto isFinite(T const &a)
Definition is_finite.hpp:13
static constexpr decltype(auto) solve(Solver &&solver, XA &&A, XB &&b)
Definition solve.hpp:18
static constexpr auto unit()
Definition unit.hpp:13
static constexpr Conditional< C, RemoveConstReference< X >, RemoveConst< X > > copy(X &&a)
Definition copy.hpp:13
static constexpr decltype(auto) where(TE const e, XA &&a, XB &&b)
Definition where.hpp:11
static constexpr auto rcp(T const &b)
Definition rcp.hpp:12
static constexpr auto diagonal(X &&a)
Definition diagonal.hpp:15
Size Index
Definition basics.hpp:32
std::size_t Size
Definition basics.hpp:31
std::tuple< Ts... > Tuple
Definition basics.hpp:23
std::invoke_result_t< F, Args... > ResultOf
Definition basics.hpp:59
std::remove_reference_t< T > RemoveReference
Definition basics.hpp:41
static constexpr auto strictlyLowerTriangular(X &&a)
Definition strictly_lower_triangular.hpp:14
std::common_type_t< Ts... > Common
Definition basics.hpp:53
static constexpr auto cholesky(X &&a)
Definition cholesky.hpp:14
static constexpr auto block(X &&a, Os const ... offsets)
Definition block.hpp:17
static constexpr auto transpose(JacobiRotation< T > const &a)
Definition jacobi_rotation.hpp:306
static constexpr auto abs(T const &a)
Definition abs.hpp:11
typename ValueType< T >::Type Value
Definition value.hpp:72
static constexpr decltype(auto) min(X &&a)
Definition min.hpp:13
Conditional<((Ns *... *1) *sizeof(T) > cHugepageSizeByte), HeapArray< T, Ns... >, StackArray< T, Ns... > > Array
Definition declarations.hpp:21
static constexpr auto scalarProduct(TA const &a, TB const &b)
Definition scalar_product.hpp:11
static constexpr auto reduce(Sequence< T, I1, I2, Is... > const)
Definition sequence.hpp:458
static constexpr auto delta(TA const &a, TB const &b)
Definition delta.hpp:11
static constexpr auto extractDiagonal(X &&a)
Definition extract_diagonal.hpp:16
constexpr auto cDebugLevel
Definition config.hpp:48
RemoveConst< RemoveReference< T > > RemoveConstReference
Definition basics.hpp:47
static constexpr auto square(T const &a)
Definition square.hpp:11
static constexpr auto identity()
Definition identity.hpp:13
static constexpr auto zero()
Definition zero.hpp:12
static constexpr void sort(C const &compare, T &a, Size const k=T::size(), Size const d=0)
Definition sort.hpp:15
static constexpr decltype(auto) eval(X &&a)
Definition eval.hpp:12
static constexpr auto norm(T const &a)
Definition norm.hpp:12
static constexpr decltype(auto) max(X &&a)
Definition max.hpp:13