Begin introducing the Variational GMM-HMM

hmmlearn · Apr 2, 2023 · 61e3b37 · 61e3b37
1 parent 033ceb8
commit 61e3b37
Show file tree

Hide file tree

Showing 5 changed files with 1,134 additions and 75 deletions.
diff --git a/lib/hmmlearn/stats.py b/lib/hmmlearn/stats.py
@@ -1,5 +1,7 @@
 import numpy as np
-from scipy import linalg
+from scipy import linalg, special
+from ._utils import logdet
+from .utils import fill_covars
 
 
 def log_multivariate_normal_density(X, means, covars, covariance_type='diag'):
@@ -96,3 +98,81 @@ def _log_multivariate_normal_density_full(X, means, covars, min_covar=1.e-7):
                                + cv_log_det))
 
     return np.transpose(log_prob)
+
+
+def _variational_log_multivariate_normal_density(X, means, beta, scale, dof,
+                                                 covariance_type):
+    """
+    Compute the log probability under a multivariate Gaussian distribution.
+
+    Parameters
+    ----------
+    X : array_like, shape (n_samples, n_features)
+        List of n_features-dimensional data points. Each row corresponds to a
+        single data point.
+
+    means : array_like, shape (n_components, n_features)
+        List of n_features-dimensional mean vectors for n_components Gaussians.
+        Each row corresponds to a single mean vector.
+
+    beta: array_like, shape (n_components, )
+        List of n_components estimate on the scale of the variance over
+        the means.
+
+    scale : array_like
+        List of n_components covariance parameters for each Gaussian. The shape
+        depends on `covariance_type`:
+
+        * (n_components, )                        if "spherical",
+        * (n_components, n_features)              if "diag",
+        * (n_components, n_features, n_features)  if "full",
+        * (n_features, n_features)                if "tied".
+
+    dof: array_like, shape (n_components, )
+        List of n_components estimate on the scale of the variance over
+        the means.
+
+    covariance_type : {"spherical", "diag", "full", "tied"}, optional
+        The type of the covariance parameters.  Defaults to 'diag'.
+
+    Returns
+    -------
+    lpr : array_like, shape (n_samples, n_components)
+        Array containing the log probabilities of each data point in
+        X under each of the n_components multivariate Gaussian distributions.
+    """
+    # Refer to the Gruhl/Sick paper for the notation
+    # In general, things are neater if we pretend the covariance is
+    # full / tied.  Or, we could treat each case separately, and reduce
+    # the number of operations. That's left for the future :-)
+    nc, nf = means.shape
+    term1 = special.digamma(
+        .5 * (dof - np.arange(0, nf)[:, None])
+    ).sum(axis=0)
+    if covariance_type in ("diag", "spherical"):
+        scale = fill_covars(scale, covariance_type, nc, nf)
+
+    W_k = np.linalg.inv(scale)
+
+    term1 += nf * np.log(2) + logdet(W_k)
+    term1 /= 2
+
+    # We ignore the constant that is typically excluded in the literature
+    term2 = 0
+    term3 = nf / beta
+
+    # (X - Means) * W_k * (X-Means)^T * dof
+    # shape becomes (nc, n_samples,
+    delta = (X - means[:, None])
+    # m is the dimension of the mixture
+    # i is the length of the sequence X
+    # j, k are the n_features
+    if covariance_type in ("full", "diag", "spherical"):
+       dots = np.einsum("mij,mjk,mik,m->im",
+                             delta, W_k, delta, dof)
+    else:
+       dots = np.einsum("mij,jk,mik,->im",
+                             delta, W_k, delta, dof)
+    last_term = .5 * (dots + term3)
+
+    return term1 - term2 - last_term
diff --git a/lib/hmmlearn/tests/test_gmm_hmm_multisequence.py b/lib/hmmlearn/tests/test_gmm_hmm_multisequence.py
@@ -4,7 +4,7 @@
 from numpy.testing import assert_allclose
 import pytest
 
-from hmmlearn import hmm
+from hmmlearn import hmm, vhmm
 from hmmlearn.base import ConvergenceMonitor
 
 
@@ -221,11 +221,67 @@ def make_permutations(items):
     return [list(p) for p in itertools.permutations(sequence_indices)]
 
 
+def setup_em(covariance_type, implementation, init_params, verbose):
+    model = hmm.GMMHMM(
+        n_components=2,
+        n_mix=2,
+        n_iter=100,
+        covariance_type=covariance_type,
+        verbose=verbose,
+        init_params=init_params,
+        random_state=1234,
+        implementation=implementation
+    )
+
+    # don't use random parameters for testing
+    init = 1. / model.n_components
+    model.startprob_ = np.full(model.n_components, init)
+    model.transmat_ = \
+        np.full((model.n_components, model.n_components), init)
+
+    model.monitor_ = StrictMonitor(
+        model.monitor_.tol,
+        model.monitor_.n_iter,
+        model.monitor_.verbose,
+    )
+    return model
+
+def setup_vi(covariance_type, implementation, init_params, verbose, lengths):
+    model = vhmm.VariationalGMMHMM(
+        n_components=2,
+        n_mix=2,
+        n_iter=100,
+        covariance_type=covariance_type,
+        verbose=verbose,
+        init_params=init_params,
+        random_state=1234,
+        implementation=implementation
+    )
+    # don't use random parameters for testing
+    prior_init = 1. / model.n_components
+    model.startprob_prior_ = np.full(model.n_components, prior_init)
+    model.transmat_prior_= \
+        np.full((model.n_components, model.n_components), prior_init)
+    model.startprob_posterior_ = np.full(
+        model.n_components,  len(lengths) * prior_init)
+    model.transmat_prior_= \
+        np.full((
+            model.n_components, model.n_components), sum(lengths) * prior_init
+        )
+    model.monitor_ = StrictMonitor(
+        model.monitor_.tol,
+        model.monitor_.n_iter,
+        model.monitor_.verbose,
+    )
+    return model
+
+@pytest.mark.parametrize("hmm_type",
+                         ["em", "vi"])
 @pytest.mark.parametrize("covariance_type",
                          ["diag", "spherical", "tied", "full"])
 @pytest.mark.parametrize("implementation", ["scaling", "log"])
 def test_gmmhmm_multi_sequence_fit_invariant_to_sequence_ordering(
-    covariance_type, implementation, init_params='mcw', verbose=False
+    hmm_type, covariance_type, implementation, init_params='mcw', verbose=False
 ):
     """
     Sanity check GMM-HMM fit behaviour when run on multiple sequences
@@ -253,34 +309,20 @@ def test_gmmhmm_multi_sequence_fit_invariant_to_sequence_ordering(
         sequences = sequence_data[p]
         X = np.concatenate(sequences)
         lengths = [len(seq) for seq in sequences]
-
-        model = hmm.GMMHMM(
-            n_components=2,
-            n_mix=2,
-            n_iter=100,
-            covariance_type=covariance_type,
-            verbose=verbose,
-            init_params=init_params,
-            random_state=1234,
-            implementation=implementation
-        )
-
-        # don't use random parameters for testing
-        init = 1. / model.n_components
-        model.startprob_ = np.full(model.n_components, init)
-        model.transmat_ = \
-            np.full((model.n_components, model.n_components), init)
-
-        model.monitor_ = StrictMonitor(
-            model.monitor_.tol,
-            model.monitor_.n_iter,
-            model.monitor_.verbose,
-        )
+        if hmm_type == "em":
+            model = setup_em(covariance_type, implementation, init_params,
+                verbose)
+            # Choice of rtol value is ad-hoc, no theoretical motivation.
+            rtol = 5e-3
+        else:
+            model = setup_vi(covariance_type, implementation, init_params,
+                verbose, lengths)
+            # In General, the EM solution can use a smaller rtol, while the VI
+            # solution needs a bit larger
+            rtol = 5e-2
 
         model.fit(X, lengths)
-
         assert model.monitor_.converged
         scores.append(model.score(X, lengths))
 
-    # Choice of rtol value is ad-hoc, no theoretical motivation.
-    assert_allclose(scores, np.mean(scores), rtol=5e-03)
+    assert_allclose(scores, np.mean(scores), rtol=rtol)
diff --git a/lib/hmmlearn/tests/test_gmm_hmm_new.py b/lib/hmmlearn/tests/test_gmm_hmm_new.py
@@ -131,7 +131,6 @@ def test_fit(self, implementation):
         lengths = None
         h = self.new_hmm(implementation)
         X, _state_sequence = h.sample(n_samples)
-
         # Mess up the parameters and see if we can re-learn them.
         covs0, means0, priors0, trans0, weights0 = prep_params(
             self.n_components, self.n_mix, self.n_features,