# License: BSD 3 clause
import numpy as np
from tick.base import actual_kwargs
from tick.preprocessing.utils import safe_array
from .model_coxreg_partial_lik import ModelCoxRegPartialLik
from tick.base.learner import LearnerOptim
[docs]class CoxRegression(LearnerOptim):
"""Cox regression learner, using the partial Cox likelihood for
proportional risks, with many choices of penalization.
Note that this learner does not have predict functions
Parameters
----------
C : `float`, default=1e3
Level of penalization
penalty : {'none', 'l1', 'l2', 'elasticnet', 'tv', 'binarsity'}, default='l2'
The penalization to use. Default is 'l2', namely Ridge penalization
solver : {'gd', 'agd'}, default='agd'
The name of the solver to use.
warm_start : `bool`, default=False
If true, learning will start from the last reached solution
step : `float`, default=None
Initial step size used for learning. Used when solver is 'gd' or
'agd'.
tol : `float`, default=1e-5
The tolerance of the solver (iterations stop when the stopping
criterion is below it). By default the solver does ``max_iter``
iterations
max_iter : `int`, default=100
Maximum number of iterations of the solver
verbose : `bool`, default=True
If `True`, we verbose things, otherwise the solver does not
print anything (but records information in history anyway)
print_every : `int`, default=10
Print history information when ``n_iter`` (iteration number) is
a multiple of ``print_every``
record_every : `int`, default=10
Record history information when ``n_iter`` (iteration number) is
a multiple of ``record_every``
Other Parameters
----------------
elastic_net_ratio : `float`, default=0.95
Ratio of elastic net mixing parameter with 0 <= ratio <= 1.
For ratio = 0 this is ridge (L2 squared) regularization
For ratio = 1 this is lasso (L1) regularization
For 0 < ratio < 1, the regularization is a linear combination
of L1 and L2.
Used in 'elasticnet' penalty
random_state : int seed, RandomState instance, or None (default)
The seed that will be used by stochastic solvers. Used in 'sgd',
'svrg', and 'sdca' solvers
blocks_start : `numpy.array`, shape=(n_features,), default=None
The indices of the first column of each binarized feature blocks. It
corresponds to the ``feature_indices`` property of the
``FeaturesBinarizer`` preprocessing.
Used in 'binarsity' penalty
blocks_length : `numpy.array`, shape=(n_features,), default=None
The length of each binarized feature blocks. It corresponds to the
``n_values`` property of the ``FeaturesBinarizer`` preprocessing.
Used in 'binarsity' penalty
Attributes
----------
coeffs : np.array, shape=(n_features,)
The learned coefficients of the model
"""
_solvers = {'gd': 'GD', 'agd': 'AGD'}
_attrinfos = {"_actual_kwargs": {"writable": False}}
@actual_kwargs
def __init__(self, penalty='l2', C=1e3, solver='agd', step=None, tol=1e-5,
max_iter=100, verbose=False, warm_start=False, print_every=10,
record_every=10, elastic_net_ratio=0.95, random_state=None,
blocks_start=None, blocks_length=None):
self._actual_kwargs = CoxRegression.__init__.actual_kwargs
LearnerOptim.__init__(
self, penalty=penalty, C=C, solver=solver, step=step, tol=tol,
max_iter=max_iter, verbose=verbose, warm_start=warm_start,
print_every=print_every, record_every=record_every,
sdca_ridge_strength=0, elastic_net_ratio=elastic_net_ratio,
random_state=random_state, blocks_start=blocks_start,
blocks_length=blocks_length)
self.coeffs = None
def _construct_model_obj(self):
return ModelCoxRegPartialLik()
def _all_safe(self, features: np.ndarray, times: np.array,
censoring: np.array):
if not set(np.unique(censoring)).issubset({0, 1}):
raise ValueError('``censoring`` must only have values in {0, 1}')
# All times must be positive
if not np.all(times >= 0):
raise ValueError('``times`` array must contain only non-negative '
'entries')
features = safe_array(features)
times = safe_array(times)
censoring = safe_array(censoring, np.ushort)
return features, times, censoring
def fit(self, features: np.ndarray, times: np.array, censoring: np.array):
"""Fit the model according to the given training data.
Parameters
----------
features : `numpy.ndarray`, shape=(n_samples, n_features)
The features matrix
times : `numpy.array`, shape = (n_samples,)
Observed times
censoring : `numpy.array`, shape = (n_samples,)
Indicator of censoring of each sample.
``True`` means true failure, namely non-censored time.
dtype must be unsigned short
Returns
-------
output : `CoxRegression`
The current instance with given data
"""
# The fit from Model calls the _set_data below
solver_obj = self._solver_obj
model_obj = self._model_obj
prox_obj = self._prox_obj
features, times, censoring = self._all_safe(features, times, censoring)
# Pass the data to the model
model_obj.fit(features, times, censoring)
if self.step is None and self.solver in self._solvers_with_step:
if self.solver in self._solvers_with_linesearch:
self._solver_obj.linesearch = True
# No intercept in this model
prox_obj.range = (0, model_obj.n_coeffs)
# Now, we can pass the model and prox objects to the solver
solver_obj.set_model(model_obj).set_prox(prox_obj)
coeffs_start = None
if self.warm_start and self.coeffs is not None:
coeffs = self.coeffs
# ensure starting point has the right format
if coeffs.shape == (model_obj.n_coeffs,):
coeffs_start = coeffs
# Launch the solver
coeffs = solver_obj.solve(coeffs_start)
# Get the learned coefficients
self._set("coeffs", coeffs)
self._set("_fitted", True)
return self
def score(self, features=None, times=None, censoring=None):
"""Returns the negative log-likelihood of the model, using the current
fitted coefficients on the passed data.
If no data is passed, the negative log-likelihood is computed using the
data used for training.
Parameters
----------
features : `None` or `numpy.ndarray`, shape=(n_samples, n_features)
The features matrix
times : `None` or `numpy.array`, shape = (n_samples,)
Observed times
censoring : `None` or `numpy.array`, shape = (n_samples,)
Indicator of censoring of each sample.
``True`` means true failure, namely non-censored time.
dtype must be unsigned short
Returns
-------
output : `float`
The value of the negative log-likelihood
"""
if self._fitted:
all_none = all(e is None for e in [features, times, censoring])
if all_none:
return self._model_obj.loss(self.coeffs)
else:
if features is None:
raise ValueError('Passed ``features`` is None')
elif times is None:
raise ValueError('Passed ``times`` is None')
elif censoring is None:
raise ValueError('Passed ``censoring`` is None')
else:
features, times, censoring = self._all_safe(
features, times, censoring)
model = ModelCoxRegPartialLik().fit(
features, times, censoring)
return model.loss(self.coeffs)
else:
raise RuntimeError('You must fit the model first')