# License: BSD 3 clause
import numpy as np
from scipy.optimize import fmin_bfgs
from tick.base_model import Model
from tick.prox import ProxZero, ProxL2Sq
from tick.prox.base import Prox
from .base import SolverFirstOrder
from .base.utils import relative_distance
[docs]class BFGS(SolverFirstOrder):
"""Broyden, Fletcher, Goldfarb, and Shanno algorithm
This solver is actually a simple wrapping of `scipy.optimize.fmin_bfgs`
BFGS (Broyden, Fletcher, Goldfarb, and Shanno) algorithm. This is a
quasi-newton algotithm that builds iteratively approximations of the inverse
Hessian. This solver can be used to minimize objectives of the form
.. math::
f(w) + g(w),
for :math:`f` with a smooth gradient and only :math:`g` corresponding to
the zero penalization (namely :class:`ProxZero <tick.prox.ProxZero>`)
or ridge penalization (namely :class:`ProxL2sq <tick.prox.ProxL2sq>`).
Function :math:`f` corresponds to the ``model.loss`` method of the model
(passed with ``set_model`` to the solver) and :math:`g` corresponds to
the ``prox.value`` method of the prox (passed with the ``set_prox`` method).
The iterations stop whenever tolerance ``tol`` is achieved, or
after ``max_iter`` iterations. The obtained solution :math:`w` is returned
by the ``solve`` method, and is also stored in the ``solution`` attribute
of the solver.
Parameters
----------
tol : `float`, default=1e-10
The tolerance of the solver (iterations stop when the stopping
criterion is below it)
max_iter : `int`, default=10
Maximum number of iterations of the solver
verbose : `bool`, default=True
If `True`, solver verboses history, otherwise nothing is displayed,
but history is recorded anyway
print_every : `int`, default=10
Print history information every time the iteration number is a
multiple of ``print_every``. Used only is ``verbose`` is True
record_every : `int`, default=1
Save history information every time the iteration number is a
multiple of ``record_every``
Attributes
----------
model : `Model`
The model used by the solver, passed with the ``set_model`` method
prox : `Prox`
Proximal operator used by the solver, passed with the ``set_prox``
method
solution : `numpy.array`, shape=(n_coeffs,)
Minimizer found by the solver
history : `dict`-like
A dict-type of object that contains history of the solver along
iterations. It should be accessed using the ``get_history`` method
time_start : `str`
Start date of the call to ``solve()``
time_elapsed : `float`
Duration of the call to ``solve()``, in seconds
time_end : `str`
End date of the call to ``solve()``
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
References
----------
* Quasi-Newton method of Broyden, Fletcher, Goldfarb and Shanno (BFGS),
see Wright, and Nocedal 'Numerical Optimization', 1999, pg. 198.
"""
_attrinfos = {"_prox_grad": {"writable": False}}
[docs] def __init__(self, tol: float = 1e-10, max_iter: int = 10,
verbose: bool = True, print_every: int = 1,
record_every: int = 1):
SolverFirstOrder.__init__(self, step=None, tol=tol, max_iter=max_iter,
verbose=verbose, print_every=print_every,
record_every=record_every)
self._prox_grad = None
[docs] def set_prox(self, prox: Prox):
"""Set proximal operator in the solver.
Parameters
----------
prox : `Prox`
The proximal operator of the penalization function
Returns
-------
output : `Solver`
The solver with given prox
Notes
-----
In some solvers, ``set_model`` must be called before
``set_prox``, otherwise and error might be raised.
"""
if type(prox) is ProxZero:
SolverFirstOrder.set_prox(self, prox)
self._set("_prox_grad", lambda x: x)
elif type(prox) is ProxL2Sq:
SolverFirstOrder.set_prox(self, prox)
self._set("_prox_grad", lambda x: prox.strength * x)
else:
raise ValueError("BFGS only accepts ProxZero and ProxL2sq "
"for now")
return self
[docs] def solve(self, x0=None):
"""
Launch the solver
Parameters
----------
x0 : `np.array`, shape=(n_coeffs,), default=`None`
Starting point of the solver
Returns
-------
output : `np.array`, shape=(n_coeffs,)
Obtained minimizer for the problem
"""
self._start_solve()
coeffs = self._solve(x0)
self._set("solution", coeffs)
self._end_solve()
return self.solution
def _solve(self, x0: np.ndarray = None):
if x0 is None:
x0 = np.zeros(self.model.n_coeffs, dtype=self.dtype)
obj = self.objective(x0)
# A closure to maintain history along internal BFGS's iterations
n_iter = [0]
prev_x = x0.copy()
def insp(xk):
if self._should_record_iter(n_iter[0]):
prev_obj = self.objective(prev_x)
x = xk
rel_delta = relative_distance(x, prev_x)
obj = self.objective(x)
rel_obj = abs(obj - prev_obj) / abs(prev_obj)
self._handle_history(n_iter[0], force=False, obj=obj,
x=xk.copy(), rel_delta=rel_delta,
rel_obj=rel_obj)
prev_x[:] = xk
n_iter[0] += 1
insp.n_iter = n_iter
insp.self = self
insp.prev_x = prev_x
# We simply call the scipy.optimize.fmin_bfgs routine
x_min, f_min, _, _, _, _, _ = \
fmin_bfgs(lambda x: self.model.loss(x) + self.prox.value(x),
x0,
lambda x: self.model.grad(x) + self._prox_grad(x),
maxiter=self.max_iter, gtol=self.tol,
callback=insp, full_output=True,
disp=False, retall=False)
return x_min
[docs] def set_model(self, model: Model):
"""Set model in the solver
Parameters
----------
model : `Model`
Sets the model in the solver. The model gives the first
order information about the model (loss, gradient, among
other things)
Returns
-------
output : `Solver`
The `Solver` with given model
"""
self.dtype = model.dtype
if np.dtype(self.dtype) != np.dtype("float64"):
raise ValueError(
"Solver BFGS currenty only accepts float64 array types")
return SolverFirstOrder.set_model(self, model)