Source code for tick.hawkes.inference.hawkes_em

# License: BSD 3 clause

import numpy as np

from tick.hawkes.inference.base import LearnerHawkesNoParam
from tick.hawkes.inference.build.hawkes_inference import (HawkesEM as
                                                          _HawkesEM)
from tick.solver.base.utils import relative_distance


[docs]class HawkesEM(LearnerHawkesNoParam): """This class is used for performing non parametric estimation of multi-dimensional Hawkes processes based on expectation maximization algorithm. Hawkes processes are point processes defined by the intensity: .. math:: \\forall i \\in [1 \\dots D], \\quad \\lambda_i = \\mu_i + \\sum_{j=1}^D \\int \\phi_{ij} dN_j where * :math:`D` is the number of nodes * :math:`\mu_i` are the baseline intensities * :math:`\phi_{ij}` are the kernels Parameters ---------- kernel_support : `float`, default=`None` The support size common to all the kernels. Might be `None` if `kernel_discretization` is set kernel_size : `int`, default=10 Number of discretizations of the kernel kernel_discretization : `np.ndarray`, default=None Explicit discretization of the kernel. If set, it will override `kernel_support` and `kernel_size` values. tol : `float`, default=1e-5 The tolerance of the solver (iterations stop when the stopping criterion is below it). If not reached the solver does ``max_iter`` iterations max_iter : `int`, default=100 Maximum number of iterations of the solver verbose : `bool`, default=False If `True`, we verbose things, otherwise the solver does not print anything (but records information in history anyway) print_every : `int`, default=10 Print history information when ``n_iter`` (iteration number) is a multiple of ``print_every`` record_every : `int`, default=10 Record history information when ``n_iter`` (iteration number) is a multiple of ``record_every`` n_threads : `int`, default=1 Number of threads used for parallel computation. * if `int <= 0`: the number of physical cores available on the CPU * otherwise the desired number of threads Attributes ---------- n_nodes : `int` Number of nodes of the estimated Hawkes process n_realizations : `int` Number of given realizations kernel : `np.array` shape=(n_nodes, n_nodes, kernel_size) The estimated kernels baseline : `np.array` shape=(n_nodes) The estimated baseline References ---------- Lewis, E., & Mohler, G. (2011). A nonparametric EM algorithm for multiscale Hawkes processes. `preprint, 1-16`_. The n-dimensional extension of this algorithm can be found in the latex documentation. .. _preprint, 1-16: http://paleo.sscnet.ucla.edu/Lewis-Molher-EM_Preprint.pdf """
[docs] def __init__(self, kernel_support=None, kernel_size=10, kernel_discretization=None, tol=1e-5, max_iter=100, print_every=10, record_every=10, verbose=False, n_threads=1): LearnerHawkesNoParam.__init__( self, n_threads=n_threads, verbose=verbose, tol=tol, max_iter=max_iter, print_every=print_every, record_every=record_every) if kernel_discretization is not None: self._learner = _HawkesEM(kernel_discretization, n_threads) elif kernel_support is not None: self._learner = _HawkesEM(kernel_support, kernel_size, n_threads) else: raise ValueError('Either kernel support or kernel discretization ' 'must be provided') self.baseline = None self.kernel = None self.history.print_order = ["n_iter", "rel_baseline", "rel_kernel"]
[docs] def fit(self, events, end_times=None, baseline_start=None, kernel_start=None): """Fit the model according to the given training data. Parameters ---------- events : `list` of `list` of `np.ndarray` List of Hawkes processes realizations. Each realization of the Hawkes process is a list of n_node for each component of the Hawkes. Namely `events[i][j]` contains a one-dimensional `numpy.array` of the events' timestamps of component j of realization i. If only one realization is given, it will be wrapped into a list end_times : `np.ndarray` or `float`, default = None List of end time of all hawkes processes that will be given to the model. If None, it will be set to each realization's latest time. If only one realization is provided, then a float can be given. baseline_start : `None` or `np.ndarray`, shape=(n_nodes), default=None Used to force start values for baseline parameter If `None` starts with uniform 1 values kernel_start : `None` or `np.ndarray`, shape=(n_nodes, n_nodes, kernel_size), default=None Used to force start values for kernel parameter If `None` starts with random values """ LearnerHawkesNoParam.fit(self, events, end_times=end_times) self.solve(baseline_start=baseline_start, kernel_start=kernel_start) return self
def _solve(self, baseline_start=None, kernel_start=None): """ Performs nonparametric estimation and stores the result in the attributes `kernel` and `baseline` Parameters ---------- baseline_start : `None` or `np.ndarray`, shape=(n_nodes), default=None Used to force start values for mu parameter If `None` starts with uniform 1 values kernel_start : `None` or `np.ndarray', shape=(n_nodes, n_nodes, kernel_size), default=None Used to force start values for kernel parameter If `None` starts with random values """ if kernel_start is None: self.kernel = 0.1 * np.random.uniform( size=(self.n_nodes, self.n_nodes, self.kernel_size)) else: if kernel_start.shape != (self.n_nodes, self.n_nodes, self.kernel_size): raise ValueError( 'kernel_start has shape {} but should have ' 'shape {}'.format( kernel_start.shape, (self.n_nodes, self.n_nodes, self.kernel_size))) self.kernel = kernel_start.copy() if baseline_start is None: self.baseline = np.zeros(self.n_nodes) + 1 else: self.baseline = baseline_start.copy() for i in range(self.max_iter): if self._should_record_iter(i): prev_baseline = self.baseline.copy() prev_kernel = self.kernel.copy() self._learner.solve(self.baseline, self._flat_kernels) if self._should_record_iter(i): rel_baseline = relative_distance(self.baseline, prev_baseline) rel_kernel = relative_distance(self.kernel, prev_kernel) converged = max(rel_baseline, rel_kernel) <= self.tol force_print = (i + 1 == self.max_iter) or converged self._handle_history(i, rel_baseline=rel_baseline, rel_kernel=rel_kernel, force=force_print) if converged: break
[docs] def get_kernel_supports(self): """Computes kernel support. This makes our learner compliant with `tick.plot.plot_hawkes_kernels` API Returns ------- output : `np.ndarray`, shape=(n_nodes, n_nodes) 2d array in which each entry i, j corresponds to the support of kernel i, j """ return np.zeros((self.n_nodes, self.n_nodes)) + self.kernel_support
[docs] def get_kernel_values(self, i, j, abscissa_array): """Computes value of the specified kernel on given time values. This makes our learner compliant with `tick.plot.plot_hawkes_kernels` API Parameters ---------- i : `int` First index of the kernel j : `int` Second index of the kernel abscissa_array : `np.ndarray`, shape=(n_points, ) 1d array containing all the times at which this kernel will computes it value Returns ------- output : `np.ndarray`, shape=(n_points, ) 1d array containing the values of the specified kernels at the given times. """ indices_in_support = (abscissa_array > 0) & \ (abscissa_array < self.kernel_support) index = np.searchsorted(self.kernel_discretization, abscissa_array[indices_in_support]) - 1 kernel_values = np.empty_like(abscissa_array) kernel_values[np.invert(indices_in_support)] = 0 kernel_values[indices_in_support] = self.kernel[i, j, index] return kernel_values
[docs] def get_kernel_norms(self): """Computes kernel norms. This makes our learner compliant with `tick.plot.plot_hawkes_kernel_norms` API Returns ------- norms : `np.ndarray`, shape=(n_nodes, n_nodes) 2d array in which each entry i, j corresponds to the norm of kernel i, j """ return self._learner.get_kernel_norms(self._flat_kernels)
[docs] def objective(self, coeffs, loss: float = None): raise NotImplementedError()
[docs] def score(self, events=None, end_times=None, baseline=None, kernel=None): """Compute score metric Score metric is log likelihood (the higher the better) Parameters ---------- events : `list` of `list` of `np.ndarray`, default = None List of Hawkes processes realizations used to measure score. Each realization of the Hawkes process is a list of n_node for each component of the Hawkes. Namely `events[i][j]` contains a one-dimensional `numpy.array` of the events' timestamps of component j of realization i. If only one realization is given, it will be wrapped into a list If None, events given while fitting model will be used end_times : `np.ndarray` or `float`, default = None List of end time of all hawkes processes used to measure score. If None, it will be set to each realization's latest time. If only one realization is provided, then a float can be given. baseline : `np.ndarray`, shape=(n_nodes, ), default = None Baseline vector for which the score is measured If `None` baseline obtained during fitting is used kernel : `None` or `np.ndarray`, shape=(n_nodes, n_nodes, kernel_size), default=None Used to force start values for kernel parameter If `None` kernel obtained during fitting is used Returns ------- likelihood : `double` Computed log likelihood value """ if events is None and not self._fitted: raise ValueError('You must either call `fit` before `score` or ' 'provide events') if events is None and end_times is None: learner = self else: learner = HawkesEM(**self.get_params()) learner._set('_end_times', end_times) learner._set_data(events) n_nodes = learner.n_nodes kernel_size = learner.kernel_size if baseline is None: baseline = self.baseline if kernel is None: kernel = self.kernel flat_kernels = kernel.reshape((n_nodes, n_nodes * kernel_size)) return learner._learner.loglikelihood(baseline, flat_kernels)
def get_params(self): return { 'kernel_support': self.kernel_support, 'kernel_size': self.kernel_size, 'kernel_discretization': self.kernel_discretization, 'tol': self.tol, 'max_iter': self.max_iter, 'print_every': self.print_every, 'record_every': self.record_every, 'verbose': self.verbose, 'n_threads': self.n_threads } @property def _flat_kernels(self): return self.kernel.reshape((self.n_nodes, self.n_nodes * self.kernel_size)) @property def kernel_support(self): return self._learner.get_kernel_support() @kernel_support.setter def kernel_support(self, val): self._learner.set_kernel_support(val) @property def kernel_size(self): return self._learner.get_kernel_size() @kernel_size.setter def kernel_size(self, val): self._learner.set_kernel_size(val) @property def kernel_dt(self): return self._learner.get_kernel_fixed_dt() @kernel_dt.setter def kernel_dt(self, val): self._learner.set_kernel_dt(val) @property def kernel_discretization(self): return self._learner.get_kernel_discretization()