tick.robust.robust_linear_regression — tick 0.6.0 documentation

# License: BSD 3 clause

from tick.base import actual_kwargs
from .base import LearnerRobustGLM
from .model_linreg_with_intercepts import ModelLinRegWithIntercepts


[docs]class RobustLinearRegression(LearnerRobustGLM):
    """Robust linear regression learner.
    This is linear regression with sample intercepts (one for each sample).
    An ordered-L1 penalization is used on the individual intercepts, in order to
    obtain a sparse vector of individual intercepts, with a (theoretically)
    guaranteed False Discovery Rate control (see ``fdr`` below).

    The features matrix should contain only continuous features, and columns
    should be normalized.
    Note that C_sample_intercepts is a sensitive parameter, that should be
    tuned in theory as n_samples / noise_level, where noise_level can be chosen
    as a robust estimation of the standard deviation, using for instance
    `tick.hawkes.inference.std_mad` and `tick.hawkes.inference.std_iqr` on the array of
    labels.

    Parameters
    ----------
    C_sample_intercepts : `float`
        Level of penalization of the ProxSlope penalization used for detection
        of outliers. Ideally, this should be equal to n_samples / noise_level,
        where noise_level is an estimated noise level

    C : `float`, default=1e3
        Level of penalization of the model weights

    fdr : `float`, default=0.05
        Target false discovery rate for the detection of outliers, namely for
        the detection of non-zero entries in ``sample_intercepts``

    penalty : {'none', 'l1', 'l2', 'elasticnet', 'slope'}, default='l2'
        The penalization to use. Default 'l2', namely ridge penalization.

    fit_intercept : `bool`, default=True
        If `True`, include an intercept in the model, namely a global intercept

    refit : 'bool', default=False
        Not implemented yet

    solver : 'gd', 'agd'
        The name of the solver to use. For now, only gradient descent and
        accelerated gradient descent are available

    warm_start : `bool`, default=False
        If true, learning will start from the last reached solution

    step : `float`, default=None
        Initial step size used for learning.

    tol : `float`, default=1e-7
        The tolerance of the solver (iterations stop when the stopping
        criterion is below it).

    max_iter : `int`, default=100
        Maximum number of iterations of the solver

    verbose : `bool`, default=True
        If `True`, we verbose things, otherwise the solver does not
        print anything (but records information in history anyway)

    print_every : `int`, default=10
        Print history information when ``n_iter`` (iteration number) is
        a multiple of ``print_every``

    record_every : `int`, default=10
        Record history information when ``n_iter`` (iteration number) is
        a multiple of ``record_every``

    Other Parameters
    ----------------
    elastic_net_ratio : `float`, default=0.95
        Ratio of elastic net mixing parameter with 0 <= ratio <= 1.
        For ratio = 0 this is ridge (L2 squared) regularization
        For ratio = 1 this is lasso (L1) regularization
        For 0 < ratio < 1, the regularization is a linear combination
        of L1 and L2.
        Used in 'elasticnet' penalty only.

    slope_fdr : `float`, default=0.05
        Target false discovery rate for the detection of detection of non-zero
        entries in the model weights.
        Used in the 'slope' penalty only.

    Attributes
    ----------
    weights : `numpy.array`, shape=(n_features,)
        The learned weights of the model (not including the intercept)

    sample_intercepts : `numpy.array`, shape=(n_samples,)
        Sample intercepts. This should be a sparse vector, since a non-zero
        entry means that the sample is an outlier.

    intercept : `float` or `None`
        The intercept, if ``fit_intercept=True``, otherwise `None`

    coeffs : `numpy.array`, shape=(n_features + n_samples + 1,)
        The full array of coefficients of the model. Namely, this is simply
        the concatenation of ``weights``, ``sample_intercepts``
        and ``intercept``
    """

    _attrinfos = {"_actual_kwargs": {"writable": False}}

    @actual_kwargs
    def __init__(self, C_sample_intercepts, C=1e3, fdr=0.05, penalty='l2',
                 fit_intercept=True, refit=False, solver='agd',
                 warm_start=False, step=None, tol=1e-7, max_iter=200,
                 verbose=True, print_every=10, record_every=10,
                 elastic_net_ratio=0.95, slope_fdr=0.05):
        self._actual_kwargs = RobustLinearRegression.__init__.actual_kwargs
        LearnerRobustGLM.__init__(
            self, C_sample_intercepts=C_sample_intercepts, C=C, fdr=fdr,
            penalty=penalty, fit_intercept=fit_intercept, refit=refit,
            solver=solver, warm_start=warm_start, step=step, tol=tol,
            max_iter=max_iter, verbose=verbose, print_every=print_every,
            record_every=record_every, elastic_net_ratio=elastic_net_ratio,
            slope_fdr=slope_fdr)

    def _construct_model_obj(self, fit_intercept=True):
        return ModelLinRegWithIntercepts(fit_intercept)
Source code for tick.robust.robust_linear_regression