Skip to content

Commit

Permalink
[MRG + 1] Raising an error when batch_size < n_components in Incremen…
Browse files Browse the repository at this point in the history
…talPCA (scikit-learn#9303)
  • Loading branch information
wallygauze authored and jnothman committed Aug 14, 2017
1 parent 86d8f18 commit baa2048
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 6 deletions.
6 changes: 5 additions & 1 deletion doc/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,16 @@ Bug fixes

Decomposition, manifold learning and clustering

- Fix for uninformative error in :class:`decomposition.incremental_pca`:
now an error is raised if the number of components is larger than the
chosen batch size. The ``n_components=None`` case was adapted accordingly.
:issue:`6452`. By :user:`Wally Gauze <wallygauze>`.

- Fixed a bug where the ``partial_fit`` method of
:class:`decomposition.IncrementalPCA` used integer division instead of float
division on Python 2 versions. :issue:`9492` by
:user:`James Bourbeau <jrbourbeau>`.


Version 0.19
============

Expand Down
9 changes: 8 additions & 1 deletion sklearn/decomposition/incremental_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,11 +211,18 @@ def partial_fit(self, X, y=None, check_input=True):
self.components_ = None

if self.n_components is None:
self.n_components_ = n_features
if self.components_ is None:
self.n_components_ = min(n_samples, n_features)
else:
self.n_components_ = self.components_.shape[0]
elif not 1 <= self.n_components <= n_features:
raise ValueError("n_components=%r invalid for n_features=%d, need "
"more rows than columns for IncrementalPCA "
"processing" % (self.n_components, n_features))
elif not self.n_components <= n_samples:
raise ValueError("n_components=%r must be less or equal to "
"the batch number of samples "
"%d." % (self.n_components, n_samples))
else:
self.n_components_ = self.n_components

Expand Down
40 changes: 36 additions & 4 deletions sklearn/decomposition/tests/test_incremental_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from sklearn.utils.testing import assert_almost_equal
from sklearn.utils.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_raises
from sklearn.utils.testing import assert_raises_regex

from sklearn import datasets
from sklearn.decomposition import PCA, IncrementalPCA
Expand Down Expand Up @@ -73,10 +74,41 @@ def test_incremental_pca_inverse():

def test_incremental_pca_validation():
# Test that n_components is >=1 and <= n_features.
X = [[0, 1], [1, 0]]
for n_components in [-1, 0, .99, 3]:
assert_raises(ValueError, IncrementalPCA(n_components,
batch_size=10).fit, X)
X = np.array([[0, 1, 0], [1, 0, 0]])
n_samples, n_features = X.shape
for n_components in [-1, 0, .99, 4]:
assert_raises_regex(ValueError,
"n_components={} invalid for n_features={}, need"
" more rows than columns for IncrementalPCA "
"processing".format(n_components, n_features),
IncrementalPCA(n_components, batch_size=10).fit, X)

# Tests that n_components is also <= n_samples.
n_components = 3
assert_raises_regex(ValueError,
"n_components={} must be less or equal to "
"the batch number of samples {}".format(
n_components, n_samples),
IncrementalPCA(
n_components=n_components).partial_fit, X)


def test_n_components_none():
# Ensures that n_components == None is handled correctly
rng = np.random.RandomState(1999)
for n_samples, n_features in [(50, 10), (10, 50)]:
X = rng.rand(n_samples, n_features)
ipca = IncrementalPCA(n_components=None)

# First partial_fit call, ipca.n_components_ is inferred from
# min(X.shape)
ipca.partial_fit(X)
assert ipca.n_components_ == min(X.shape)

# Second partial_fit call, ipca.n_components_ is inferred from
# ipca.components_ computed from the first partial_fit call
ipca.partial_fit(X)
assert ipca.n_components_ == ipca.components_.shape[0]


def test_incremental_pca_set_params():
Expand Down

0 comments on commit baa2048

Please sign in to comment.