tick.solver.scpg — tick 0.6.0 documentation

# License: BSD 3 clause

import numpy as np
from numpy.linalg import norm

from tick.base_model import ModelSecondOrder
from tick.prox.base import Prox
from .base import SolverFirstOrder
from .base.utils import relative_distance


[docs]class SCPG(SolverFirstOrder):
    """Self-Concordant Proximal Gradient descent

    For the minimization of objectives of the form

    .. math::
        f(w) + g(w),

    where :math:`f` is self-concordant and :math:`g` is prox-capable.
    Function :math:`f` corresponds to the ``model.loss`` method of the model
    (passed with ``set_model`` to the solver) and :math:`g` corresponds to
    the ``prox.value`` method of the prox (passed with the ``set_prox`` method).
    One iteration of :class:`SCPG <tick.solver.SCPG>` is as follows:

    .. math::
        y^k &\\gets \\mathrm{prox}_{g / L_k} \\big( w^k - \\tfrac{1}{L_k}
        \\nabla f(w^k) \\big) \\\\
        d^k &\\gets y^k - w^k \\\\
        \\beta_k &\\gets \\sqrt{L_k} \| d^k \|_2 \\\\
        \\lambda_k &\\gets \\sqrt{{d^k}^\\top \\nabla^2 f(w^k) d^k} \\\\
        \\alpha_k &\\gets \\frac{\\beta_k}{\\lambda_k (\\lambda_k +
        \\beta_k^2)} \\\\
        w^{k + 1} &\\gets w^{k} + \\alpha_k d^k \\\\

    where :math:`\\nabla f(w)` is the gradient of :math:`f` given by the
    ``model.grad`` method and :math:`\\mathrm{prox}_{g / L_k}` is given by the
    ``prox.call`` method. The first step size :math:`1 / L_k` can be tuned with
    ``step`` it will then be updated with Barzilai-Borwein steps and linesearch.
    The iterations stop whenever tolerance ``tol`` is achieved, or after
    ``max_iter`` iterations. The obtained solution :math:`w` is returned
    by the ``solve`` method, and is also stored in the ``solution`` attribute
    of the solver.

    Parameters
    ----------
    step : `float`, default=None
        Step-size parameter that will be used at the first iteration. Then
        the linesearch algorithm will compute the next steps.

    tol : `float`, default=1e-10
        The tolerance of the solver (iterations stop when the stopping
        criterion is below it)

    max_iter : `int`, default=100
        Maximum number of iterations of the solver.

    verbose : `bool`, default=True
        If `True`, solver verboses history, otherwise nothing is displayed,
        but history is recorded anyway

    print_every : `int`, default=10
        Print history information every time the iteration number is a
        multiple of ``print_every``. Used only is ``verbose`` is True

    record_every : `int`, default=1
        Save history information every time the iteration number is a
        multiple of ``record_every``

    linesearch_step_increase : `float`, default=2.
        Factor of step increase when using linesearch

    linesearch_step_decrease : `float`, default=0.5
        Factor of step decrease when using linesearch

    modified : `bool`, default=False
        Enables modified verison. The modified version of this algorithm
        consists in evaluating :math:`f` at :math:`y_k` and :math:`w^{k+1}`
        and keeping the iterate that minimizes the best :math:`f`.

    dtype : `{'float64', 'float32'}`, default='float64'
        Type of the arrays used. This value is set from model and prox dtypes.

    Attributes
    ----------
    model : `Model`
        The model used by the solver, passed with the ``set_model`` method

    prox : `Prox`
        Proximal operator used by the solver, passed with the ``set_prox``
        method

    solution : `numpy.array`, shape=(n_coeffs,)
        Minimizer found by the solver

    history : `dict`-like
        A dict-type of object that contains history of the solver along
        iterations. It should be accessed using the ``get_history`` method

    time_start : `str`
        Start date of the call to ``solve()``

    time_elapsed : `float`
        Duration of the call to ``solve()``, in seconds

    time_end : `str`
        End date of the call to ``solve()``

    Notes
    -----
    This algorithm is designed to work properly with self-concordant losses
    such as Poisson regression with linear link
    `ModelPoisReg <tick.optim.model.ModelPoisReg>` or Hawkes processes likelihood
    `ModelHawkesSumExpKernLogLik <tick.optim.model.ModelHawkesSumExpKernLogLik>`

    References
    ----------
    Tran-Dinh Q, Kyrillidis A, Cevher V. Composite self-concordant
    minimization. *The Journal of Machine Learning Research* (2015)
    """

    _attrinfos = {
        'model_ssc': {
            'writable': False
        },
        'prox_ssc': {
            'writable': False
        },
        '_th_gain': {},
        "_initial_n_hessiannorm_calls": {
            "writable": False
        },
    }

    class _ModelStandardSC:
        """The standard self concordant version of the model
        """

        def __init__(self, model: ModelSecondOrder):
            self.original_model = model
            self.sc_constant = model._sc_constant
            self.sc_corr = self.sc_constant ** 2 / 4
            self.sc_corr_sqrt = self.sc_constant / 2
            self._initial_n_hessiannorm_calls = 0

        def loss(self, coeffs: np.ndarray):
            return self.original_model.loss(coeffs) * self.sc_corr

        def grad(self, coeffs: np.ndarray, out: np.ndarray = None):
            out = self.original_model.grad(coeffs, out=out)
            out[:] = out * self.sc_corr
            return out

        def loss_and_grad(self, coeffs: np.ndarray, out: np.ndarray = None):
            loss, out = self.original_model.loss_and_grad(coeffs, out=out)
            out[:] = out * self.sc_corr
            return loss * self.sc_corr, out

        def hessian_norm(self, coeffs: np.ndarray, point: np.ndarray):
            return self.original_model.hessian_norm(coeffs, point) * \
                   self.sc_corr_sqrt

        def original_loss(self, loss_ssc):
            """Returns the loss of the original model form the loss of the
            standard self-concordant model
            """
            return loss_ssc / self.sc_corr

        def original_objective(self, obj_ssc):
            """Returns the loss of the original model form the loss of the
            standard self-concordant model
            """
            return obj_ssc / self.sc_corr

    class _ProxStandardSC:
        """The standard self concordant version of the prox according to the
        model
        """

        def __init__(self):
            self.original_prox = None
            self.sc_constant = None
            self.sc_corr = None

        def set_original_prox(self, prox: Prox):
            self.original_prox = prox

        def set_self_conc_constant(self, self_conc_constant: float):
            self.sc_constant = self_conc_constant
            self.sc_corr = self.sc_constant ** 2 / 4

        def call(self, coeffs: np.ndarray, t: float = 1.,
                 out: np.ndarray = None):
            out = self.original_prox.call(coeffs, step=t * self.sc_corr,
                                          out=out)
            return out

        def value(self, coeffs: np.ndarray):
            return self.original_prox.value(coeffs) * self.sc_corr

[docs]    def __init__(self, step: float = None, tol: float = 0.,
                 max_iter: int = 100, verbose: bool = True,
                 print_every: int = 10, record_every: int = 1,
                 linesearch_step_increase: float = 2.,
                 linesearch_step_decrease: float = 0.5, modified=False):
        SolverFirstOrder.__init__(self, step=step, tol=tol, max_iter=max_iter,
                                  verbose=verbose, print_every=print_every,
                                  record_every=record_every)

        self.linesearch_step_increase = linesearch_step_increase
        self.linesearch_step_decrease = linesearch_step_decrease

        self.modified = modified
        self.model_ssc = None
        self.prox_ssc = self._ProxStandardSC()
        self._th_gain = 0

[docs]    def set_model(self, model: ModelSecondOrder):
        """Set model in the solver

        Parameters
        ----------
        model : `ModelSecondOrder` and `ModelSelfConcordant`
            Sets the model in the solver. The model gives
            information about the model (loss, gradient, among
            other things)

        Returns
        -------
        output : `Solver`
            The same instance with given model
        """
        self._set('model', model)
        self._set('model_ssc', self._ModelStandardSC(model=model))
        self.prox_ssc.set_self_conc_constant(model._sc_constant)
        self.dtype = np.dtype("float64")
        return self

[docs]    def set_prox(self, prox: Prox):
        """Set proximal operator in the solver.

        Parameters
        ----------
        prox : `Prox`
            The proximal operator of the penalization function

        Returns
        -------
        output : `Solver`
            The solver with given prox
        """
        SolverFirstOrder.set_prox(self, prox)
        self._set('prox', prox)
        self.prox_ssc.set_original_prox(prox)
        return self

    def _objective_ssc(self, x, loss_ssc: float = None):
        """Compute objective at x for the standard self concordant model

        .. math::`\frac{M_f^2}{4} * (f(x) + g(x))`

        Parameters
        ----------
        x : `float`
            The value at which we compute the objective

        loss_ssc: `float`
            The value of the f(x) if it is already known
        """
        if loss_ssc is None:
            return self.model_ssc.loss(x) + self.prox_ssc.value(x)
        else:
            return loss_ssc + self.prox_ssc.value(x)

    def _initialize_values(self, x0, step):
        step, obj, x, prev_x, prev_grad_x_ssc, grad_x_ssc = \
            SolverFirstOrder._initialize_values(self, x0, step,
                                                n_empty_vectors=3)

        grad_x_ssc = self.model_ssc.grad(x, out=grad_x_ssc)
        return step, obj, x, prev_x, prev_grad_x_ssc, grad_x_ssc

    def _perform_line_search(self, x, y, step):
        step *= self.linesearch_step_increase
        llh_y_ssc, grad_y_ssc = self.model_ssc.loss_and_grad(y)
        obj_y_ssc = self._objective_ssc(y, loss_ssc=llh_y_ssc)
        while True:

            x[:] = self.prox_ssc.call(y - step * grad_y_ssc, step)
            if norm(x) == 0:
                test = False
            else:
                obj_x_ssc = self._objective_ssc(x)
                envelope = obj_y_ssc + \
                           np.sum(grad_y_ssc * (x - y), axis=None) + \
                           1. / (2 * step) * norm(x - y) ** 2
                test = (obj_x_ssc <= envelope)
            if test:
                break
            step *= self.linesearch_step_decrease
        return step

    def _gradient_step(self, x, prev_x, grad_x_ssc, prev_grad_x_ssc, n_iter,
                       l_k):
        # Testing if our value of l_k fits the condition for the stepsize
        # alpha_k

        # Barzilai-Borwein step
        if n_iter % 10 == 1:
            tmp = x - prev_x
            if np.linalg.norm(tmp, 2) ** 2 > 0:
                l_k = (grad_x_ssc - prev_grad_x_ssc).dot(tmp) / \
                      (np.linalg.norm(tmp, 2) ** 2)
        l_k *= 2

        prev_grad_x_ssc[:] = grad_x_ssc
        prev_x[:] = x
        # Compute x_new, next step vector
        condition = False
        while not condition:
            y_k = self.prox_ssc.call(x - grad_x_ssc / l_k, 1. / l_k)
            d_k = y_k - x
            beta_k = np.sqrt(l_k) * np.linalg.norm(d_k, 2)
            lambda_k = np.sqrt(self.model_ssc.hessian_norm(x, d_k))

            alpha_k = beta_k * beta_k / \
                      (lambda_k * (lambda_k + beta_k * beta_k))
            # condition = lambda_k >= 1 or lambda_k >= beta_k
            condition = 0 <= alpha_k < 1

            if not condition:
                l_k /= 2

            if np.isnan(l_k):
                raise ValueError('l_k is nan')

        self._th_gain = beta_k * beta_k / lambda_k - np.log(
            1 + beta_k * beta_k / lambda_k)
        x_new = x + alpha_k * d_k

        # we also "return" grad_x_ssc and prev_grad_x_ssc which are filled
        # during function's run
        return x_new, y_k, alpha_k, beta_k, lambda_k, l_k

    def _solve(self, x0: np.ndarray = None, step: float = None):

        step, obj, x, prev_x, prev_grad_x_ssc, grad_x_ssc = \
            self._initialize_values(x0, step=step)

        if self.modified:
            grad_y_ssc = np.empty_like(x)

        if self.step is None:
            self.step = 1e5

        step = self._perform_line_search(x, x.copy(), self.step)
        l_k = 1. / step

        for n_iter in range(self.max_iter):

            prev_obj = obj

            x, y, alpha_k, beta_k, lambda_k, l_k = \
                self._gradient_step(x, prev_x, grad_x_ssc,
                                    prev_grad_x_ssc, n_iter, l_k)

            if self.modified:
                llh_y_ssc, _ = self.model_ssc.loss_and_grad(y, out=grad_y_ssc)
                llh_x_ssc, _ = self.model_ssc.loss_and_grad(x, out=grad_x_ssc)

                if self._objective_ssc(y, loss_ssc=llh_y_ssc) < \
                        self._objective_ssc(x, loss_ssc=llh_x_ssc):
                    x[:] = y
                    grad_x_ssc[:] = grad_y_ssc
                    llh_x_ssc = llh_y_ssc
            else:
                llh_x_ssc, _ = self.model_ssc.loss_and_grad(x, out=grad_x_ssc)

            rel_delta = relative_distance(x, prev_x)
            llh_x = self.model_ssc.original_loss(llh_x_ssc)
            obj = self.objective(x, loss=llh_x)
            rel_obj = abs(obj - prev_obj) / abs(prev_obj)
            obj_gain = prev_obj - obj

            converged = rel_obj < self.tol
            # if converged, we stop the loop and record the last step in history

            self._handle_history(n_iter + 1, force=converged, obj=obj,
                                 x=x.copy(),
                                 rel_delta=rel_delta, step=alpha_k,
                                 rel_obj=rel_obj, l_k=l_k, beta_k=beta_k,
                                 lambda_k=lambda_k, th_gain=self._th_gain,
                                 obj_gain=obj_gain)
            if converged:
                break
        self._set("solution", x)
        return x

    def _handle_history(self, n_iter: int, force: bool = False, **kwargs):
        """Updates the history of the solver.
        """
        if n_iter == 1:
            self._set('_initial_n_hessiannorm_calls',
                      self.model.n_calls_hessian_norm)

        hessiannorm_calls = self.model.n_calls_hessian_norm - \
                            self._initial_n_hessiannorm_calls
        SolverFirstOrder._handle_history(self, n_iter, force=force,
                                         n_calls_hessiannorm=hessiannorm_calls,
                                         **kwargs)
Source code for tick.solver.scpg