forked from hmmlearn/hmmlearn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstats.py
98 lines (80 loc) · 3.9 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import numpy as np
from scipy import linalg
def log_multivariate_normal_density(X, means, covars, covariance_type='diag'):
"""
Compute the log probability under a multivariate Gaussian distribution.
Parameters
----------
X : array_like, shape (n_samples, n_features)
List of n_features-dimensional data points. Each row corresponds to a
single data point.
means : array_like, shape (n_components, n_features)
List of n_features-dimensional mean vectors for n_components Gaussians.
Each row corresponds to a single mean vector.
covars : array_like
List of n_components covariance parameters for each Gaussian. The shape
depends on `covariance_type`:
* (n_components, ) if "spherical",
* (n_components, n_features) if "diag",
* (n_components, n_features, n_features) if "full",
* (n_features, n_features) if "tied".
covariance_type : {"sperical", "diag", "full", "tied"}, optional
The type of the covariance parameters. Defaults to 'diag'.
Returns
-------
lpr : array_like, shape (n_samples, n_components)
Array containing the log probabilities of each data point in
X under each of the n_components multivariate Gaussian distributions.
"""
log_multivariate_normal_density_dict = {
'spherical': _log_multivariate_normal_density_spherical,
'tied': _log_multivariate_normal_density_tied,
'diag': _log_multivariate_normal_density_diag,
'full': _log_multivariate_normal_density_full}
return log_multivariate_normal_density_dict[covariance_type](
X, means, covars
)
def _log_multivariate_normal_density_diag(X, means, covars):
"""Compute Gaussian log-density at X for a diagonal model."""
# X: (ns, nf); means: (nc, nf); covars: (nc, nf) -> (ns, nc)
n_samples, n_dim = X.shape
# Avoid 0 log 0 = nan in degenerate covariance case.
covars = np.maximum(covars, np.finfo(float).tiny)
with np.errstate(over="ignore"):
return -0.5 * (n_dim * np.log(2 * np.pi)
+ np.log(covars).sum(axis=-1)
+ ((X[:, None, :] - means) ** 2 / covars).sum(axis=-1))
def _log_multivariate_normal_density_spherical(X, means, covars):
"""Compute Gaussian log-density at X for a spherical model."""
cv = covars.copy()
if covars.ndim == 1:
cv = cv[:, np.newaxis]
if cv.shape[1] == 1:
cv = np.tile(cv, (1, X.shape[-1]))
return _log_multivariate_normal_density_diag(X, means, cv)
def _log_multivariate_normal_density_tied(X, means, covars):
"""Compute Gaussian log-density at X for a tied model."""
cv = np.tile(covars, (means.shape[0], 1, 1))
return _log_multivariate_normal_density_full(X, means, cv)
def _log_multivariate_normal_density_full(X, means, covars, min_covar=1.e-7):
"""Log probability for full covariance matrices."""
n_samples, n_dim = X.shape
nmix = len(means)
log_prob = np.empty((n_samples, nmix))
for c, (mu, cv) in enumerate(zip(means, covars)):
try:
cv_chol = linalg.cholesky(cv, lower=True)
except linalg.LinAlgError:
# The model is most probably stuck in a component with too
# few observations, we need to reinitialize this components
try:
cv_chol = linalg.cholesky(cv + min_covar * np.eye(n_dim),
lower=True)
except linalg.LinAlgError:
raise ValueError("'covars' must be symmetric, "
"positive-definite")
cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol)))
cv_sol = linalg.solve_triangular(cv_chol, (X - mu).T, lower=True).T
log_prob[:, c] = - .5 * (np.sum(cv_sol ** 2, axis=1) +
n_dim * np.log(2 * np.pi) + cv_log_det)
return log_prob