diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 43c50b867cba8d..132005ee7878c9 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -798,6 +798,13 @@ Miscellaneous :mod:`utils` have been removed or deprecated accordingly. :issue:`8854` and :issue:`8874` by :user:`Naoya Kanai ` +- The ``store_covariances`` and ``covariances_`` parameters of + :class:`discriminant_analysis.QuadraticDiscriminantAnalysis` + has been renamed to ``store_covariance`` and ``covariance_`` to be + consistent with the corresponding parameter names of the + :class:`discriminant_analysis.LinearDiscriminantAnalysis`. They will be + removed in version 0.21. :issue:`7998` by :user:`Jiacheng ` + Removed in 0.19: - ``utils.fixes.argpartition`` diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 8506d35a76c9a6..e26ca771eb5128 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -11,8 +11,8 @@ from __future__ import print_function import warnings - import numpy as np +from .utils import deprecated from scipy import linalg from .externals.six import string_types from .externals.six.moves import xrange @@ -170,7 +170,8 @@ class LinearDiscriminantAnalysis(BaseEstimator, LinearClassifierMixin, Number of components (< n_classes - 1) for dimensionality reduction. store_covariance : bool, optional - Additionally compute class covariance matrix (default False). + Additionally compute class covariance matrix (default False), used + only in 'svd' solver. .. versionadded:: 0.17 @@ -245,6 +246,7 @@ class LinearDiscriminantAnalysis(BaseEstimator, LinearClassifierMixin, >>> print(clf.predict([[-0.8, -1]])) [1] """ + def __init__(self, solver='svd', shrinkage=None, priors=None, n_components=None, store_covariance=False, tol=1e-4): self.solver = solver @@ -554,7 +556,7 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin): Regularizes the covariance estimate as ``(1-reg_param)*Sigma + reg_param*np.eye(n_features)`` - store_covariances : boolean + store_covariance : boolean If True the covariance matrices are computed and stored in the `self.covariances_` attribute. @@ -567,7 +569,7 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin): Attributes ---------- - covariances_ : list of array-like, shape = [n_features, n_features] + covariance_ : list of array-like, shape = [n_features, n_features] Covariance matrices of each class. means_ : array-like, shape = [n_classes, n_features] @@ -597,7 +599,8 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin): >>> clf.fit(X, y) ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE QuadraticDiscriminantAnalysis(priors=None, reg_param=0.0, - store_covariances=False, tol=0.0001) + store_covariance=False, + store_covariances=None, tol=0.0001) >>> print(clf.predict([[-0.8, -1]])) [1] @@ -607,21 +610,30 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin): Discriminant Analysis """ - def __init__(self, priors=None, reg_param=0., store_covariances=False, - tol=1.0e-4): + def __init__(self, priors=None, reg_param=0., store_covariance=False, + tol=1.0e-4, store_covariances=None): self.priors = np.asarray(priors) if priors is not None else None self.reg_param = reg_param self.store_covariances = store_covariances + self.store_covariance = store_covariance self.tol = tol + @property + @deprecated("Attribute covariances_ was deprecated in version" + " 0.19 and will be removed in 0.21. Use " + "covariance_ instead") + def covariances_(self): + return self.covariance_ + def fit(self, X, y): """Fit the model according to the given training data and parameters. .. versionchanged:: 0.19 - *store_covariance* has been moved to main constructor. + ``store_covariances`` has been moved to main constructor as + ``store_covariance`` .. versionchanged:: 0.19 - *tol* has been moved to main constructor. + ``tol`` has been moved to main constructor. Parameters ---------- @@ -645,7 +657,12 @@ def fit(self, X, y): self.priors_ = self.priors cov = None + store_covariance = self.store_covariance or self.store_covariances if self.store_covariances: + warnings.warn("'store_covariances' was renamed to store_covariance" + " in version 0.19 and will be removed in 0.21.", + DeprecationWarning) + if store_covariance: cov = [] means = [] scalings = [] @@ -665,13 +682,13 @@ def fit(self, X, y): warnings.warn("Variables are collinear") S2 = (S ** 2) / (len(Xg) - 1) S2 = ((1 - self.reg_param) * S2) + self.reg_param - if self.store_covariances: + if self.store_covariance or store_covariance: # cov = V * (S^2 / (n-1)) * V.T cov.append(np.dot(S2 * Vt.T, Vt)) scalings.append(S2) rotations.append(Vt.T) - if self.store_covariances: - self.covariances_ = cov + if self.store_covariance or store_covariance: + self.covariance_ = cov self.means_ = np.asarray(means) self.scalings_ = scalings self.rotations_ = rotations diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index a7a878a73160ed..8eb5da1908ba70 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -5,9 +5,11 @@ from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_true +from sklearn.utils.testing import assert_false from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import assert_warns +from sklearn.utils.testing import assert_warns_message from sklearn.utils.testing import assert_greater from sklearn.utils.testing import ignore_warnings @@ -223,6 +225,38 @@ def test_lda_scaling(): 'using covariance: %s' % solver) +def test_lda_store_covariance(): + # Test for slover 'lsqr' and 'eigen' + # 'store_covariance' has no effect on 'lsqr' and 'eigen' solvers + for solver in ('lsqr', 'eigen'): + clf = LinearDiscriminantAnalysis(solver=solver).fit(X6, y6) + assert_true(hasattr(clf, 'covariance_')) + + # Test the actual attribute: + clf = LinearDiscriminantAnalysis(solver=solver, + store_covariance=True).fit(X6, y6) + assert_true(hasattr(clf, 'covariance_')) + + assert_array_almost_equal( + clf.covariance_, + np.array([[0.422222, 0.088889], [0.088889, 0.533333]]) + ) + + # Test for SVD slover, the default is to not set the covariances_ attribute + clf = LinearDiscriminantAnalysis(solver='svd').fit(X6, y6) + assert_false(hasattr(clf, 'covariance_')) + + # Test the actual attribute: + clf = LinearDiscriminantAnalysis(solver=solver, + store_covariance=True).fit(X6, y6) + assert_true(hasattr(clf, 'covariance_')) + + assert_array_almost_equal( + clf.covariance_, + np.array([[0.422222, 0.088889], [0.088889, 0.533333]]) + ) + + def test_qda(): # QDA classification. # This checks that QDA implements fit and predict and returns @@ -262,26 +296,40 @@ def test_qda_priors(): assert_greater(n_pos2, n_pos) -def test_qda_store_covariances(): +def test_qda_store_covariance(): # The default is to not set the covariances_ attribute clf = QuadraticDiscriminantAnalysis().fit(X6, y6) - assert_true(not hasattr(clf, 'covariances_')) + assert_false(hasattr(clf, 'covariance_')) # Test the actual attribute: - clf = QuadraticDiscriminantAnalysis(store_covariances=True).fit(X6, y6) - assert_true(hasattr(clf, 'covariances_')) + clf = QuadraticDiscriminantAnalysis(store_covariance=True).fit(X6, y6) + assert_true(hasattr(clf, 'covariance_')) assert_array_almost_equal( - clf.covariances_[0], + clf.covariance_[0], np.array([[0.7, 0.45], [0.45, 0.7]]) ) assert_array_almost_equal( - clf.covariances_[1], + clf.covariance_[1], np.array([[0.33333333, -0.33333333], [-0.33333333, 0.66666667]]) ) +def test_qda_deprecation(): + # Test the deprecation + clf = QuadraticDiscriminantAnalysis(store_covariances=True) + assert_warns_message(DeprecationWarning, "'store_covariances' was renamed" + " to store_covariance in version 0.19 and will be " + "removed in 0.21.", clf.fit, X, y) + + # check that covariance_ (and covariances_ with warning) is stored + assert_warns_message(DeprecationWarning, "Attribute covariances_ was " + "deprecated in version 0.19 and will be removed " + "in 0.21. Use covariance_ instead", getattr, clf, + 'covariances_') + + def test_qda_regularization(): # the default is reg_param=0. and will cause issues # when there is a constant variable