Source code for tick.solver.scpg

# License: BSD 3 clause

import numpy as np
from numpy.linalg import norm

from tick.base_model import ModelSecondOrder
from tick.prox.base import Prox
from .base import SolverFirstOrder
from .base.utils import relative_distance


[docs]class SCPG(SolverFirstOrder): """Self-Concordant Proximal Gradient descent For the minimization of objectives of the form .. math:: f(w) + g(w), where :math:`f` is self-concordant and :math:`g` is prox-capable. Function :math:`f` corresponds to the ``model.loss`` method of the model (passed with ``set_model`` to the solver) and :math:`g` corresponds to the ``prox.value`` method of the prox (passed with the ``set_prox`` method). One iteration of :class:`SCPG <tick.solver.SCPG>` is as follows: .. math:: y^k &\\gets \\mathrm{prox}_{g / L_k} \\big( w^k - \\tfrac{1}{L_k} \\nabla f(w^k) \\big) \\\\ d^k &\\gets y^k - w^k \\\\ \\beta_k &\\gets \\sqrt{L_k} \| d^k \|_2 \\\\ \\lambda_k &\\gets \\sqrt{{d^k}^\\top \\nabla^2 f(w^k) d^k} \\\\ \\alpha_k &\\gets \\frac{\\beta_k}{\\lambda_k (\\lambda_k + \\beta_k^2)} \\\\ w^{k + 1} &\\gets w^{k} + \\alpha_k d^k \\\\ where :math:`\\nabla f(w)` is the gradient of :math:`f` given by the ``model.grad`` method and :math:`\\mathrm{prox}_{g / L_k}` is given by the ``prox.call`` method. The first step size :math:`1 / L_k` can be tuned with ``step`` it will then be updated with Barzilai-Borwein steps and linesearch. The iterations stop whenever tolerance ``tol`` is achieved, or after ``max_iter`` iterations. The obtained solution :math:`w` is returned by the ``solve`` method, and is also stored in the ``solution`` attribute of the solver. Parameters ---------- step : `float`, default=None Step-size parameter that will be used at the first iteration. Then the linesearch algorithm will compute the next steps. tol : `float`, default=1e-10 The tolerance of the solver (iterations stop when the stopping criterion is below it) max_iter : `int`, default=100 Maximum number of iterations of the solver. verbose : `bool`, default=True If `True`, solver verboses history, otherwise nothing is displayed, but history is recorded anyway print_every : `int`, default=10 Print history information every time the iteration number is a multiple of ``print_every``. Used only is ``verbose`` is True record_every : `int`, default=1 Save history information every time the iteration number is a multiple of ``record_every`` linesearch_step_increase : `float`, default=2. Factor of step increase when using linesearch linesearch_step_decrease : `float`, default=0.5 Factor of step decrease when using linesearch modified : `bool`, default=False Enables modified verison. The modified version of this algorithm consists in evaluating :math:`f` at :math:`y_k` and :math:`w^{k+1}` and keeping the iterate that minimizes the best :math:`f`. dtype : `{'float64', 'float32'}`, default='float64' Type of the arrays used. This value is set from model and prox dtypes. Attributes ---------- model : `Model` The model used by the solver, passed with the ``set_model`` method prox : `Prox` Proximal operator used by the solver, passed with the ``set_prox`` method solution : `numpy.array`, shape=(n_coeffs,) Minimizer found by the solver history : `dict`-like A dict-type of object that contains history of the solver along iterations. It should be accessed using the ``get_history`` method time_start : `str` Start date of the call to ``solve()`` time_elapsed : `float` Duration of the call to ``solve()``, in seconds time_end : `str` End date of the call to ``solve()`` Notes ----- This algorithm is designed to work properly with self-concordant losses such as Poisson regression with linear link `ModelPoisReg <tick.optim.model.ModelPoisReg>` or Hawkes processes likelihood `ModelHawkesSumExpKernLogLik <tick.optim.model.ModelHawkesSumExpKernLogLik>` References ---------- Tran-Dinh Q, Kyrillidis A, Cevher V. Composite self-concordant minimization. *The Journal of Machine Learning Research* (2015) """ _attrinfos = { 'model_ssc': { 'writable': False }, 'prox_ssc': { 'writable': False }, '_th_gain': {}, "_initial_n_hessiannorm_calls": { "writable": False }, } class _ModelStandardSC: """The standard self concordant version of the model """ def __init__(self, model: ModelSecondOrder): self.original_model = model self.sc_constant = model._sc_constant self.sc_corr = self.sc_constant ** 2 / 4 self.sc_corr_sqrt = self.sc_constant / 2 self._initial_n_hessiannorm_calls = 0 def loss(self, coeffs: np.ndarray): return self.original_model.loss(coeffs) * self.sc_corr def grad(self, coeffs: np.ndarray, out: np.ndarray = None): out = self.original_model.grad(coeffs, out=out) out[:] = out * self.sc_corr return out def loss_and_grad(self, coeffs: np.ndarray, out: np.ndarray = None): loss, out = self.original_model.loss_and_grad(coeffs, out=out) out[:] = out * self.sc_corr return loss * self.sc_corr, out def hessian_norm(self, coeffs: np.ndarray, point: np.ndarray): return self.original_model.hessian_norm(coeffs, point) * \ self.sc_corr_sqrt def original_loss(self, loss_ssc): """Returns the loss of the original model form the loss of the standard self-concordant model """ return loss_ssc / self.sc_corr def original_objective(self, obj_ssc): """Returns the loss of the original model form the loss of the standard self-concordant model """ return obj_ssc / self.sc_corr class _ProxStandardSC: """The standard self concordant version of the prox according to the model """ def __init__(self): self.original_prox = None self.sc_constant = None self.sc_corr = None def set_original_prox(self, prox: Prox): self.original_prox = prox def set_self_conc_constant(self, self_conc_constant: float): self.sc_constant = self_conc_constant self.sc_corr = self.sc_constant ** 2 / 4 def call(self, coeffs: np.ndarray, t: float = 1., out: np.ndarray = None): out = self.original_prox.call(coeffs, step=t * self.sc_corr, out=out) return out def value(self, coeffs: np.ndarray): return self.original_prox.value(coeffs) * self.sc_corr
[docs] def __init__(self, step: float = None, tol: float = 0., max_iter: int = 100, verbose: bool = True, print_every: int = 10, record_every: int = 1, linesearch_step_increase: float = 2., linesearch_step_decrease: float = 0.5, modified=False): SolverFirstOrder.__init__(self, step=step, tol=tol, max_iter=max_iter, verbose=verbose, print_every=print_every, record_every=record_every) self.linesearch_step_increase = linesearch_step_increase self.linesearch_step_decrease = linesearch_step_decrease self.modified = modified self.model_ssc = None self.prox_ssc = self._ProxStandardSC() self._th_gain = 0
[docs] def set_model(self, model: ModelSecondOrder): """Set model in the solver Parameters ---------- model : `ModelSecondOrder` and `ModelSelfConcordant` Sets the model in the solver. The model gives information about the model (loss, gradient, among other things) Returns ------- output : `Solver` The same instance with given model """ self._set('model', model) self._set('model_ssc', self._ModelStandardSC(model=model)) self.prox_ssc.set_self_conc_constant(model._sc_constant) self.dtype = np.dtype("float64") return self
[docs] def set_prox(self, prox: Prox): """Set proximal operator in the solver. Parameters ---------- prox : `Prox` The proximal operator of the penalization function Returns ------- output : `Solver` The solver with given prox """ SolverFirstOrder.set_prox(self, prox) self._set('prox', prox) self.prox_ssc.set_original_prox(prox) return self
def _objective_ssc(self, x, loss_ssc: float = None): """Compute objective at x for the standard self concordant model .. math::`\frac{M_f^2}{4} * (f(x) + g(x))` Parameters ---------- x : `float` The value at which we compute the objective loss_ssc: `float` The value of the f(x) if it is already known """ if loss_ssc is None: return self.model_ssc.loss(x) + self.prox_ssc.value(x) else: return loss_ssc + self.prox_ssc.value(x) def _initialize_values(self, x0, step): step, obj, x, prev_x, prev_grad_x_ssc, grad_x_ssc = \ SolverFirstOrder._initialize_values(self, x0, step, n_empty_vectors=3) grad_x_ssc = self.model_ssc.grad(x, out=grad_x_ssc) return step, obj, x, prev_x, prev_grad_x_ssc, grad_x_ssc def _perform_line_search(self, x, y, step): step *= self.linesearch_step_increase llh_y_ssc, grad_y_ssc = self.model_ssc.loss_and_grad(y) obj_y_ssc = self._objective_ssc(y, loss_ssc=llh_y_ssc) while True: x[:] = self.prox_ssc.call(y - step * grad_y_ssc, step) if norm(x) == 0: test = False else: obj_x_ssc = self._objective_ssc(x) envelope = obj_y_ssc + \ np.sum(grad_y_ssc * (x - y), axis=None) + \ 1. / (2 * step) * norm(x - y) ** 2 test = (obj_x_ssc <= envelope) if test: break step *= self.linesearch_step_decrease return step def _gradient_step(self, x, prev_x, grad_x_ssc, prev_grad_x_ssc, n_iter, l_k): # Testing if our value of l_k fits the condition for the stepsize # alpha_k # Barzilai-Borwein step if n_iter % 10 == 1: tmp = x - prev_x if np.linalg.norm(tmp, 2) ** 2 > 0: l_k = (grad_x_ssc - prev_grad_x_ssc).dot(tmp) / \ (np.linalg.norm(tmp, 2) ** 2) l_k *= 2 prev_grad_x_ssc[:] = grad_x_ssc prev_x[:] = x # Compute x_new, next step vector condition = False while not condition: y_k = self.prox_ssc.call(x - grad_x_ssc / l_k, 1. / l_k) d_k = y_k - x beta_k = np.sqrt(l_k) * np.linalg.norm(d_k, 2) lambda_k = np.sqrt(self.model_ssc.hessian_norm(x, d_k)) alpha_k = beta_k * beta_k / \ (lambda_k * (lambda_k + beta_k * beta_k)) # condition = lambda_k >= 1 or lambda_k >= beta_k condition = 0 <= alpha_k < 1 if not condition: l_k /= 2 if np.isnan(l_k): raise ValueError('l_k is nan') self._th_gain = beta_k * beta_k / lambda_k - np.log( 1 + beta_k * beta_k / lambda_k) x_new = x + alpha_k * d_k # we also "return" grad_x_ssc and prev_grad_x_ssc which are filled # during function's run return x_new, y_k, alpha_k, beta_k, lambda_k, l_k def _solve(self, x0: np.ndarray = None, step: float = None): step, obj, x, prev_x, prev_grad_x_ssc, grad_x_ssc = \ self._initialize_values(x0, step=step) if self.modified: grad_y_ssc = np.empty_like(x) if self.step is None: self.step = 1e5 step = self._perform_line_search(x, x.copy(), self.step) l_k = 1. / step for n_iter in range(self.max_iter): prev_obj = obj x, y, alpha_k, beta_k, lambda_k, l_k = \ self._gradient_step(x, prev_x, grad_x_ssc, prev_grad_x_ssc, n_iter, l_k) if self.modified: llh_y_ssc, _ = self.model_ssc.loss_and_grad(y, out=grad_y_ssc) llh_x_ssc, _ = self.model_ssc.loss_and_grad(x, out=grad_x_ssc) if self._objective_ssc(y, loss_ssc=llh_y_ssc) < \ self._objective_ssc(x, loss_ssc=llh_x_ssc): x[:] = y grad_x_ssc[:] = grad_y_ssc llh_x_ssc = llh_y_ssc else: llh_x_ssc, _ = self.model_ssc.loss_and_grad(x, out=grad_x_ssc) rel_delta = relative_distance(x, prev_x) llh_x = self.model_ssc.original_loss(llh_x_ssc) obj = self.objective(x, loss=llh_x) rel_obj = abs(obj - prev_obj) / abs(prev_obj) obj_gain = prev_obj - obj converged = rel_obj < self.tol # if converged, we stop the loop and record the last step in history self._handle_history(n_iter + 1, force=converged, obj=obj, x=x.copy(), rel_delta=rel_delta, step=alpha_k, rel_obj=rel_obj, l_k=l_k, beta_k=beta_k, lambda_k=lambda_k, th_gain=self._th_gain, obj_gain=obj_gain) if converged: break self._set("solution", x) return x def _handle_history(self, n_iter: int, force: bool = False, **kwargs): """Updates the history of the solver. """ if n_iter == 1: self._set('_initial_n_hessiannorm_calls', self.model.n_calls_hessian_norm) hessiannorm_calls = self.model.n_calls_hessian_norm - \ self._initial_n_hessiannorm_calls SolverFirstOrder._handle_history(self, n_iter, force=force, n_calls_hessiannorm=hessiannorm_calls, **kwargs)