Source code for tick.linear_model.simu_linreg

# License: BSD 3 clause

import numpy as np

from tick.base.simulation import SimuWithFeatures


[docs]class SimuLinReg(SimuWithFeatures): """Simulation of a Linear regression model Parameters ---------- weights : `numpy.ndarray`, shape=(n_features,) The array of weights of the model intercept : `float`, default=`None` The intercept. If None, then no intercept is used features : `numpy.ndarray`, shape=(n_samples, n_features), default=`None` The features matrix to use. If None, it is simulated n_samples : `int`, default=200 Number of samples std : `float`, default=1. Standard deviation of the noise (Gaussian) features_type : `str`, default="cov_toeplitz" The type of features matrix to simulate * If ``"cov_toeplitz"`` : a Gaussian distribution with Toeplitz correlation matrix * If ``"cov_uniform"`` : a Gaussian distribution with correlation matrix given by .5 * (U + U.T), where U is uniform on [0, 1] and diagonal filled with ones. cov_corr : `float`, default=.5 Correlation to use in the Toeplitz correlation matrix features_scaling : `str`, default="none" The way the features matrix is scaled after simulation * If ``"standard"`` : the columns are centered and normalized * If ``"min-max"`` : remove the minimum and divide by max-min * If ``"norm"`` : the columns are normalized but not centered * If ``"none"`` : nothing is done to the features seed : `int`, default=None The seed of the random number generator. If `None` it is not seeded verbose : `bool`, default=True If `True`, print things Attributes ---------- features : `numpy.ndarray`, shape=(n_samples, n_features) The simulated (or given) features matrix labels : `numpy.ndarray`, shape=(n_samples,) The simulated labels time_start : `str` Start date of the simulation time_elapsed : `int` Duration of the simulation, in seconds time_end : `str` End date of the simulation dtype : `{'float64', 'float32'}`, default='float64' Type of the generated arrays. Used in the case features is None """ _attrinfos = {"labels": {"writable": False}}
[docs] def __init__(self, weights: np.ndarray, intercept: float = None, features: np.ndarray = None, n_samples: int = 200, std: float = 1., features_type: str = "cov_toeplitz", cov_corr: float = 0.5, features_scaling: str = "none", seed: int = None, verbose: bool = True, dtype="float64"): n_features = weights.shape[0] SimuWithFeatures.__init__(self, intercept, features, n_samples, n_features, features_type, cov_corr, features_scaling, seed, verbose, dtype=dtype) self.weights = weights self.std = std self._set("labels", None)
[docs] def simulate(self): """ Launch simulation of the data Returns ------- features: `numpy.ndarray`, shape=(n_samples, n_features) The features matrix labels: `numpy.ndarray`, shape=(n_samples,) The labels vector """ return SimuWithFeatures.simulate(self)
def _simulate(self): # The features matrix already exists, and is created by the # super class features = self.features n_samples, n_features = features.shape u = features.dot(self.weights) # Add the intercept if necessary if self.intercept is not None: u += self.intercept labels = u + self.std * np.random.randn(n_samples) # "astype" must be used for labels as it is always float64 if labels.dtype != self.dtype: labels = labels.astype(self.dtype) self._set("labels", labels) return features, labels