From 9e50115917c70ea35bdde87749b38f78668e6a75 Mon Sep 17 00:00:00 2001 From: "mnarayan (SMC 2)" Date: Mon, 9 Oct 2017 03:20:58 +0000 Subject: [PATCH 01/31] Added basic functions to test two-way center and scaling --- inverse_covariance/tests/clean_test.py | 36 ++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 inverse_covariance/tests/clean_test.py diff --git a/inverse_covariance/tests/clean_test.py b/inverse_covariance/tests/clean_test.py new file mode 100644 index 0000000..ffed484 --- /dev/null +++ b/inverse_covariance/tests/clean_test.py @@ -0,0 +1,36 @@ +import numpy as np +from scipy import sparse +import pytest + +from sklearn.utils.testing import assert_raises +from sklearn.utils.testing import assert_allclose +from sklearn import datasets + +from clean import ( + twoway_standardize +) + +def custom_init(n_rows, n_cols, with_mean=False): + prng = np.random.RandomState(1) + X = prng.normal(0, np.ones(shape=(n_rows,n_cols))) + if with_mean: + mu = np.ones(shape=(n_rows, 1)) * \ + prng.randint(1, 5, size=(1, n_cols)) + else: + mu = np.zeros(shape=(n_rows,n_cols)) + var_rows = prng.lognormal(2, 1, size=(n_rows, 1)) + sqcov_rows = np.diag(np.sqrt(var_rows)) + var_cols = prng.lognormal(2, 1, size=(1, n_cols)) + sqcov_cols = np.diag(np.sqrt(var_cols)) + return mu + sqcov_rows * X * sqcov_cols + +def test_invalid_argument(): + ''' + Test behavior of invalid sparse inputs. + ''' + X = np.zeros(shape=(10,10)) + X_csc = sparse.csc_matrix(X) + assert_raises(TypeError, twoway_standardize(X_csc)) + + X_csr = sparse.csr_matrix(X) + assert_raises(TypeError, twoway_standardize(X_csr)) From 2f74e1bd4a4e83f884bab7366828d1fd526404c6 Mon Sep 17 00:00:00 2001 From: "mnarayan (SMC 2)" Date: Mon, 9 Oct 2017 03:22:56 +0000 Subject: [PATCH 02/31] Added basic twoway standardization algorithm. Relevant to issue #93 --- inverse_covariance/clean.py | 258 ++++++++++++++++++++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100644 inverse_covariance/clean.py diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py new file mode 100644 index 0000000..2cc01f4 --- /dev/null +++ b/inverse_covariance/clean.py @@ -0,0 +1,258 @@ +import numpy as np +from scipy import sparse +from scipy import stats + +from sklearn.preprocessing.data import scale +from sklearn.base import BaseEstimator, TransformerMixin + +from sklearn.utils import check_array +from sklearn.utils.extmath import row_norms +from sklearn.utils.extmath import _incremental_mean_and_var +from sklearn.utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1, + inplace_csr_row_normalize_l2) +from sklearn.utils.sparsefuncs import (inplace_column_scale, + mean_variance_axis, incr_mean_variance_axis, + min_max_axis) +from sklearn.utils.validation import (check_is_fitted, check_random_state, + FLOAT_DTYPES) + + +def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, max_iter=10): + """Standardize a two-dimensional data matrix along both axes. + Center to the mean and component wise scale to unit variance. + Read more in the :ref:`User Guide `. + Parameters + ---------- + X : {array-like, sparse matrix} + The data to center and scale. + axis : int (0 by default) + axis used to compute the means and standard deviations along. If 0, + independently standardize each feature, otherwise (if 1) standardize + each sample. + with_mean : boolean, True by default + Is always true for two-way standardize + with_std : boolean, True by default + If True, scale the data to unit variance (or equivalently, + unit standard deviation). + copy : boolean, optional, default True + set to False to perform inplace row normalization and avoid a + copy (if the input is already a numpy array or a scipy.sparse + CSC matrix and if axis is 1). + Notes + ----- + This function invokes sklearn's scale function. Thus, the same restrictions + for scale, apply here as well. + This implementation will refuse to center scipy.sparse matrices + since it would make them non-sparse and would potentially crash the + program with memory exhaustion problems. + Instead the caller is expected to either set explicitly + `with_mean=False` (in that case, only variance scaling will be + performed on the features of the CSC matrix) or to call `X.toarray()` + if he/she expects the materialized dense array to fit in memory. + To avoid memory copy the caller should pass a CSC matrix. + For a comparison of the different scalers, transformers, and normalizers, + see sklearn documentation `examples/preprocessing/plot_all_scaling.py + See also + -------- + StandardScaler: Performs scaling to unit variance using the``Transformer`` API + (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`). + """ # noqa + + X = check_array(X, accept_sparse=None, warn_on_dtype=True, + dtype=FLOAT_DTYPES + ) + Xrow_polish = np.copy(X) + Xcol_polish = np.copy(X) + + if sparse.issparse(X): + print('Input is sparse') + raise NotImplemented( + "Algorithm for sparse matrices currently not supported.") + + else: + n_iter = 0 + while n_iter <= max_iter: + Xcol_polish = scale(Xrow_polish, axis=0, + with_mean=True, + with_std=with_std + ) + Xrow_polish = scale(Xcol_polish, axis=1, + with_mean=True, + with_std=with_std + ) + n_iter += 1 + X = Xrow_polisy + + return X + + +class TwoWayStandardScaler(BaseEstimator, TransformerMixin): + """Standardize features by removing the mean and scaling to unit variance + in both row and column dimensions. + This is modeled after StandardScaler in scikit-learn. + Read more in the :ref:`User Guide `. + Parameters + ---------- + copy : boolean, optional, default True + If False, try to avoid a copy and do inplace scaling instead. + This is not guaranteed to always work inplace; e.g. if the data is + not a NumPy array or scipy.sparse CSR matrix, a copy may still be + returned. + with_mean : boolean, True by default + If True, center the data before scaling. + This does not work (and will raise an exception) when attempted on + sparse matrices, because centering them entails building a dense + matrix which in common use cases is likely to be too large to fit in + memory. + with_std : boolean, True by default + If True, scale the data to unit variance (or equivalently, + unit standard deviation). + Attributes + ---------- + scale_ : ndarray, shape (n_features,) + Per feature relative scaling of the data. + .. versionadded:: 0.17 + *scale_* + mean_ : array of floats with shape [n_features] + The mean value for each feature in the training set. + var_ : array of floats with shape [n_features] + The variance for each feature in the training set. Used to compute + `scale_` + n_samples_seen_ : int + The number of samples processed by the estimator. Will be reset on + new calls to fit, but increments across ``partial_fit`` calls. + Examples + -------- + >>> from sklearn.preprocessing import StandardScaler + >>> + >>> data = [[0, 0], [0, 0], [1, 1], [1, 1]] + >>> scaler = StandardScaler() + >>> print(scaler.fit(data)) + StandardScaler(copy=True, with_mean=True, with_std=True) + >>> print(scaler.mean_) + [ 0.5 0.5] + >>> print(scaler.transform(data)) + [[-1. -1.] + [-1. -1.] + [ 1. 1.] + [ 1. 1.]] + >>> print(scaler.transform([[2, 2]])) + [[ 3. 3.]] + See also + -------- + scale: Equivalent function without the estimator API. + :class:`sklearn.preprocessing.StandardScaler` + :class:`sklearn.decomposition.PCA` + Further removes the linear correlation across features with 'whiten=True'. + Notes + ----- + See the implications of one-way vs. two-way standardization in here. TBD + + """ # noqa + + def __init__(self, copy=True, with_mean=True, with_std=True): + self.with_mean = with_mean + self.with_std = with_std + self.copy = copy + + def _reset(self): + """Reset internal data-dependent state of the scaler, if necessary. + __init__ parameters are not touched. + """ + + # Checking one attribute is enough, becase they are all set together + # in partial_fit + if hasattr(self, 'scale_'): + del self.scale_ + del self.n_samples_seen_ + del self.mean_ + del self.var_ + + def fit(self, X, y=None): + """Compute the mean and std to be used for later scaling. + Parameters + ---------- + X : {array-like, sparse matrix}, shape [n_samples, n_features] + The data used to compute the mean and standard deviation + used for later scaling along the features axis. + y : Passthrough for ``Pipeline`` compatibility. + """ + + # Reset internal state before fitting + self._reset() + return self.partial_fit(X, y) + + def transform(self, X, y='deprecated', copy=None): + """Perform standardization by centering and scaling + Parameters + ---------- + X : array-like, shape [n_samples, n_features] + The data used to scale along the features axis. + y : (ignored) + .. deprecated:: 0.19 + This parameter will be removed in 0.21. + copy : bool, optional (default: None) + Copy the input X or not. + """ + if not isinstance(y, string_types) or y != 'deprecated': + warnings.warn("The parameter y on transform() is " + "deprecated since 0.19 and will be removed in 0.21", + DeprecationWarning) + + check_is_fitted(self, 'scale_') + + copy = copy if copy is not None else self.copy + X = check_array(X, accept_sparse='csr', copy=copy, warn_on_dtype=True, + estimator=self, dtype=FLOAT_DTYPES) + + if sparse.issparse(X): + if self.with_mean: + raise ValueError( + "Cannot center sparse matrices: pass `with_mean=False` " + "instead. See docstring for motivation and alternatives.") + if self.scale_ is not None: + inplace_column_scale(X, 1 / self.scale_) + else: + if self.with_mean: + X -= self.mean_ + if self.with_std: + X /= self.scale_ + return X + + def inverse_transform(self, X, copy=None): + """Scale back the data to the original representation + Parameters + ---------- + X : array-like, shape [n_samples, n_features] + The data used to scale along the features axis. + copy : bool, optional (default: None) + Copy the input X or not. + Returns + ------- + X_tr : array-like, shape [n_samples, n_features] + Transformed array. + """ + check_is_fitted(self, 'scale_') + + copy = copy if copy is not None else self.copy + if sparse.issparse(X): + if self.with_mean: + raise ValueError( + "Cannot uncenter sparse matrices: pass `with_mean=False` " + "instead See docstring for motivation and alternatives.") + if not sparse.isspmatrix_csr(X): + X = X.tocsr() + copy = False + if copy: + X = X.copy() + if self.scale_ is not None: + inplace_column_scale(X, self.scale_) + else: + X = np.asarray(X) + if copy: + X = X.copy() + if self.with_std: + X *= self.scale_ + if self.with_mean: + X += self.mean_ + return X \ No newline at end of file From f2a1b2059d4cb4dd2069d7a754dbf4be785515fe Mon Sep 17 00:00:00 2001 From: "mnarayan (SMC 2)" Date: Mon, 9 Oct 2017 05:30:40 +0000 Subject: [PATCH 03/31] Cleaned up TwoWayStandardScaler API. partial_fit not supported --- inverse_covariance/clean.py | 45 +++++++++++++------------------------ 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py index 2cc01f4..ac27cfd 100644 --- a/inverse_covariance/clean.py +++ b/inverse_covariance/clean.py @@ -88,8 +88,8 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, max_ class TwoWayStandardScaler(BaseEstimator, TransformerMixin): """Standardize features by removing the mean and scaling to unit variance - in both row and column dimensions. - This is modeled after StandardScaler in scikit-learn. + in both row and column dimensions. + This class is modeled after StandardScaler in scikit-learn. Read more in the :ref:`User Guide `. Parameters ---------- @@ -123,24 +123,22 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin): new calls to fit, but increments across ``partial_fit`` calls. Examples -------- - >>> from sklearn.preprocessing import StandardScaler + >>> from inverse_covariance.clean import TwoWayStandardScaler >>> - >>> data = [[0, 0], [0, 0], [1, 1], [1, 1]] + >>> data = [[1, 0], [1, 0], [2, 1], [2, 1]] >>> scaler = StandardScaler() >>> print(scaler.fit(data)) StandardScaler(copy=True, with_mean=True, with_std=True) >>> print(scaler.mean_) - [ 0.5 0.5] + [ 3.0 0.5] >>> print(scaler.transform(data)) [[-1. -1.] [-1. -1.] [ 1. 1.] [ 1. 1.]] - >>> print(scaler.transform([[2, 2]])) - [[ 3. 3.]] See also -------- - scale: Equivalent function without the estimator API. + twoway_standardize: Equivalent function without the estimator API. :class:`sklearn.preprocessing.StandardScaler` :class:`sklearn.decomposition.PCA` Further removes the linear correlation across features with 'whiten=True'. @@ -151,42 +149,31 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin): """ # noqa def __init__(self, copy=True, with_mean=True, with_std=True): - self.with_mean = with_mean + """Unlike StandardScaler, with_mean is always set to True, to ensure + that two-way standardization is always performed with centering. The + argument `with_mean` is retained for the sake of model API compatibility. + """ + self.with_mean = True self.with_std = with_std self.copy = copy - def _reset(self): - """Reset internal data-dependent state of the scaler, if necessary. - __init__ parameters are not touched. - """ - - # Checking one attribute is enough, becase they are all set together - # in partial_fit - if hasattr(self, 'scale_'): - del self.scale_ - del self.n_samples_seen_ - del self.mean_ - del self.var_ - def fit(self, X, y=None): - """Compute the mean and std to be used for later scaling. + """Compute the mean and std for both row and column dimensions. Parameters ---------- - X : {array-like, sparse matrix}, shape [n_samples, n_features] + X : {array-like}, shape [n_rows, n_cols] The data used to compute the mean and standard deviation - used for later scaling along the features axis. - y : Passthrough for ``Pipeline`` compatibility. + along both row and column axes + y : Passthrough for ``Pipeline`` compatibility. Input is ignored. """ - # Reset internal state before fitting - self._reset() return self.partial_fit(X, y) def transform(self, X, y='deprecated', copy=None): """Perform standardization by centering and scaling Parameters ---------- - X : array-like, shape [n_samples, n_features] + X : array-like, shape [n_rows, n_cols] The data used to scale along the features axis. y : (ignored) .. deprecated:: 0.19 From a30bc8e623c7bf3a9f9ad0b112fca516774b26b5 Mon Sep 17 00:00:00 2001 From: "mnarayan (SMC 2)" Date: Mon, 9 Oct 2017 05:43:21 +0000 Subject: [PATCH 04/31] Reset internal row,col attributes --- inverse_covariance/clean.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py index ac27cfd..c364102 100644 --- a/inverse_covariance/clean.py +++ b/inverse_covariance/clean.py @@ -157,6 +157,21 @@ def __init__(self, copy=True, with_mean=True, with_std=True): self.with_std = with_std self.copy = copy + def _reset(self): + """Reset internal data-dependent state of the scaler, if necessary. + __init__ parameters are not touched. + """ + + # Checking one attribute is enough, becase they are all set together + # in partial_fit + if hasattr(self, 'col_scale_'): + del self.row_scale_ + del self.row_mean_ + del self.row_var_ + del self.col_scale_ + del self.col_mean_ + del self.col_var_ + def fit(self, X, y=None): """Compute the mean and std for both row and column dimensions. Parameters From ed6fd34066b5128abde9149b222111e5cb27c662 Mon Sep 17 00:00:00 2001 From: "mnarayan (SMC 2)" Date: Mon, 9 Oct 2017 17:46:05 +0000 Subject: [PATCH 05/31] Added basic structure for partial_fit --- inverse_covariance/clean.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py index c364102..4210547 100644 --- a/inverse_covariance/clean.py +++ b/inverse_covariance/clean.py @@ -184,6 +184,42 @@ def fit(self, X, y=None): return self.partial_fit(X, y) + def partial_fit(self, X, y=None): + """Compute the mean and std for both row and column dimensions. + Equivalent to fit. Online algorithm not supported at this time. + Parameters + ---------- + X : {array-like}, shape [n_rows, n_cols] + The data used to compute the mean and standard deviation + used for later scaling along the features axis. + y : Passthrough for ``Pipeline`` compatibility. + """ + X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy, + warn_on_dtype=True, estimator=self, dtype=FLOAT_DTYPES) + + if sparse.issparse(X): + + else: + # First pass + if not hasattr(self, 'n_samples_seen_'): + self.mean_ = .0 + self.n_samples_seen_ = 0 + if self.with_std: + self.var_ = .0 + else: + self.var_ = None + + self.mean_, self.var_, self.n_samples_seen_ = \ + _incremental_mean_and_var(X, self.mean_, self.var_, + self.n_samples_seen_) + + if self.with_std: + self.scale_ = _handle_zeros_in_scale(np.sqrt(self.var_)) + else: + self.scale_ = None + + return self + def transform(self, X, y='deprecated', copy=None): """Perform standardization by centering and scaling Parameters From 47c87cb05f393f2aa43c9719350bb44fb62f45f2 Mon Sep 17 00:00:00 2001 From: "mnarayan (SMC 2)" Date: Wed, 11 Oct 2017 18:41:04 +0000 Subject: [PATCH 06/31] partial_fit now calculates row, col statistics --- inverse_covariance/clean.py | 42 +++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py index 4210547..0855ac6 100644 --- a/inverse_covariance/clean.py +++ b/inverse_covariance/clean.py @@ -171,6 +171,8 @@ def _reset(self): del self.col_scale_ del self.col_mean_ del self.col_var_ + del self.n_rows_seen_ + del self.n_cols_seen_ def fit(self, X, y=None): """Compute the mean and std for both row and column dimensions. @@ -194,29 +196,43 @@ def partial_fit(self, X, y=None): used for later scaling along the features axis. y : Passthrough for ``Pipeline`` compatibility. """ - X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy, - warn_on_dtype=True, estimator=self, dtype=FLOAT_DTYPES) + X = check_array(X, accept_sparse=None, copy=self.copy, + warn_on_dtype=True, dtype=FLOAT_DTYPES) if sparse.issparse(X): - + print('Input is sparse') + raise NotImplemented( + "Algorithm for sparse matrices currently not supported.") else: # First pass - if not hasattr(self, 'n_samples_seen_'): - self.mean_ = .0 - self.n_samples_seen_ = 0 + if not hasattr(self, 'n_rows_seen_'): + self.col_mean_ = .0 + self.n_rows_seen_ = 0 if self.with_std: - self.var_ = .0 + self.col_var_ = .0 else: - self.var_ = None + self.col_var_ = None - self.mean_, self.var_, self.n_samples_seen_ = \ - _incremental_mean_and_var(X, self.mean_, self.var_, - self.n_samples_seen_) + self.col_mean_, self.col_var_, self.n_rows_seen_ = \ + _incremental_mean_and_var(X, self.col_mean_, self.col_var_, + self.n_rows_seen_) + if not hasattr(self, 'n_cols_seen_'): + self.row_mean_ = .0 + self.n_cols_seen_ = 0 + if self.with_std: + self.row_var_ = .0 + else: + self.row_var_ = None + self.row_mean_, self.row_var_, self.n_cols_seen_ = \ + _incremental_mean_and_var(X, self.row_mean_, self.row_var_, + self.n_cols_seen_) if self.with_std: - self.scale_ = _handle_zeros_in_scale(np.sqrt(self.var_)) + self.row_scale_ = _handle_zeros_in_scale(np.sqrt(self.row_var_)) + self.col_scale_ = _handle_zeros_in_scale(np.sqrt(self.col_var_)) else: - self.scale_ = None + self.row_scale_ = None + self.col_scale_ = None return self From 2dcde0adf471d81219087f424bf04eab3cab1b6b Mon Sep 17 00:00:00 2001 From: "mnarayan (SMC 2)" Date: Wed, 11 Oct 2017 19:19:45 +0000 Subject: [PATCH 07/31] Added convergence checks. Algorithm completed --- inverse_covariance/clean.py | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py index 0855ac6..8127d29 100644 --- a/inverse_covariance/clean.py +++ b/inverse_covariance/clean.py @@ -17,7 +17,8 @@ FLOAT_DTYPES) -def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, max_iter=10): +def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, + max_iter=50, tol=1e-6, verbose=False): """Standardize a two-dimensional data matrix along both axes. Center to the mean and component wise scale to unit variance. Read more in the :ref:`User Guide `. @@ -38,6 +39,10 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, max_ set to False to perform inplace row normalization and avoid a copy (if the input is already a numpy array or a scipy.sparse CSC matrix and if axis is 1). + max_iter : int, optional (50 by default) + Set the maximum number of iterations of successive normalization algorithm + tol : float, optional (1e-6 by default) + Set the convergence threshold for successive normalization Notes ----- This function invokes sklearn's scale function. Thus, the same restrictions @@ -58,11 +63,11 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, max_ (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`). """ # noqa - X = check_array(X, accept_sparse=None, warn_on_dtype=True, - dtype=FLOAT_DTYPES - ) - Xrow_polish = np.copy(X) + X = check_array(X, accept_sparse=None, copy=copy, warn_on_dtype=True, + dtype=FLOAT_DTYPES) + Xrow_polish = np.copy(X.T) Xcol_polish = np.copy(X) + [n_rows,n_cols] = np.shape(X) if sparse.issparse(X): print('Input is sparse') @@ -71,18 +76,28 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, max_ else: n_iter = 0 - while n_iter <= max_iter: - Xcol_polish = scale(Xrow_polish, axis=0, + err_norm = np.inf + oldXrow = np.copy(Xrow_polish) + oldXcol = np.copy(Xcol_polish) + while n_iter <= max_iter and err_norm > tol : + Xcol_polish = scale(Xrow_polish.T, axis=1, with_mean=True, with_std=with_std ) - Xrow_polish = scale(Xcol_polish, axis=1, + Xrow_polish = scale(Xcol_polish.T, axis=1, with_mean=True, with_std=with_std ) n_iter += 1 - X = Xrow_polisy - + err_norm_row = np.linalg.norm(oldXrow-Xrow_polish,'fro') + err_norm_col = np.linalg.norm(oldXcol-Xcol_polish,'fro') + err_norm = .5 * err_norm_row/(n_rows*n_cols) + .5 * err_norm_col/(n_rows*n_cols) + if verbose: + print('Iteration: {}, Convergence Err: {}'.format(n_iter,err_norm)) + oldXrow = np.copy(Xrow_polish) + oldXcol = np.copy(Xcol_polish) + + X = Xrow_polish return X From dd92d808c2b321e454453979ff6ae464de407f99 Mon Sep 17 00:00:00 2001 From: "mnarayan (SMC 2)" Date: Wed, 11 Oct 2017 19:20:27 +0000 Subject: [PATCH 08/31] Transform now calls twoway_standardize --- inverse_covariance/clean.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py index 8127d29..784907d 100644 --- a/inverse_covariance/clean.py +++ b/inverse_covariance/clean.py @@ -268,24 +268,18 @@ def transform(self, X, y='deprecated', copy=None): "deprecated since 0.19 and will be removed in 0.21", DeprecationWarning) - check_is_fitted(self, 'scale_') + check_is_fitted(self, 'row_scale_') copy = copy if copy is not None else self.copy - X = check_array(X, accept_sparse='csr', copy=copy, warn_on_dtype=True, + X = check_array(X, accept_sparse=None, copy=copy, warn_on_dtype=True, estimator=self, dtype=FLOAT_DTYPES) if sparse.issparse(X): - if self.with_mean: - raise ValueError( - "Cannot center sparse matrices: pass `with_mean=False` " - "instead. See docstring for motivation and alternatives.") - if self.scale_ is not None: - inplace_column_scale(X, 1 / self.scale_) + print('Input is sparse') + raise NotImplemented( + "Algorithm for sparse matrices currently not supported.") else: - if self.with_mean: - X -= self.mean_ - if self.with_std: - X /= self.scale_ + X = twoway_standardize(X) return X def inverse_transform(self, X, copy=None): From a754f44687b22a7b8611304818e8e3f953950cbc Mon Sep 17 00:00:00 2001 From: "mnarayan (SMC 2)" Date: Wed, 11 Oct 2017 21:50:32 +0000 Subject: [PATCH 09/31] Updated algorithm. Test passes --- inverse_covariance/tests/clean_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inverse_covariance/tests/clean_test.py b/inverse_covariance/tests/clean_test.py index ffed484..eb22291 100644 --- a/inverse_covariance/tests/clean_test.py +++ b/inverse_covariance/tests/clean_test.py @@ -30,7 +30,7 @@ def test_invalid_argument(): ''' X = np.zeros(shape=(10,10)) X_csc = sparse.csc_matrix(X) - assert_raises(TypeError, twoway_standardize(X_csc)) + assert_raises(TypeError, twoway_standardize, X_csc) X_csr = sparse.csr_matrix(X) - assert_raises(TypeError, twoway_standardize(X_csr)) + assert_raises(TypeError, twoway_standardize, X_csr) From 4a3c038c33ca262b2a44cb2c98d4804f72b239ce Mon Sep 17 00:00:00 2001 From: "mnarayan (SMC 2)" Date: Wed, 11 Oct 2017 21:52:23 +0000 Subject: [PATCH 10/31] Fixed bug in transform() --- inverse_covariance/clean.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py index 784907d..fde96a9 100644 --- a/inverse_covariance/clean.py +++ b/inverse_covariance/clean.py @@ -2,10 +2,15 @@ from scipy import sparse from scipy import stats -from sklearn.preprocessing.data import scale +from sklearn.preprocessing.data import ( + scale, + _handle_zeros_in_scale + ) from sklearn.base import BaseEstimator, TransformerMixin from sklearn.utils import check_array +from sklearn.externals import six +from sklearn.externals.six import string_types from sklearn.utils.extmath import row_norms from sklearn.utils.extmath import _incremental_mean_and_var from sklearn.utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1, @@ -68,12 +73,12 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, Xrow_polish = np.copy(X.T) Xcol_polish = np.copy(X) [n_rows,n_cols] = np.shape(X) - + if sparse.issparse(X): print('Input is sparse') raise NotImplemented( "Algorithm for sparse matrices currently not supported.") - + else: n_iter = 0 err_norm = np.inf @@ -102,7 +107,7 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, class TwoWayStandardScaler(BaseEstimator, TransformerMixin): - """Standardize features by removing the mean and scaling to unit variance + """Standardize features by removing the mean and scaling to unit variance in both row and column dimensions. This class is modeled after StandardScaler in scikit-learn. Read more in the :ref:`User Guide `. @@ -165,7 +170,7 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin): def __init__(self, copy=True, with_mean=True, with_std=True): """Unlike StandardScaler, with_mean is always set to True, to ensure - that two-way standardization is always performed with centering. The + that two-way standardization is always performed with centering. The argument `with_mean` is retained for the sake of model API compatibility. """ self.with_mean = True @@ -202,7 +207,7 @@ def fit(self, X, y=None): return self.partial_fit(X, y) def partial_fit(self, X, y=None): - """Compute the mean and std for both row and column dimensions. + """Compute the mean and std for both row and column dimensions. Equivalent to fit. Online algorithm not supported at this time. Parameters ---------- @@ -211,7 +216,7 @@ def partial_fit(self, X, y=None): used for later scaling along the features axis. y : Passthrough for ``Pipeline`` compatibility. """ - X = check_array(X, accept_sparse=None, copy=self.copy, + X = check_array(X, accept_sparse=None, copy=self.copy, warn_on_dtype=True, dtype=FLOAT_DTYPES) if sparse.issparse(X): @@ -239,7 +244,7 @@ def partial_fit(self, X, y=None): else: self.row_var_ = None self.row_mean_, self.row_var_, self.n_cols_seen_ = \ - _incremental_mean_and_var(X, self.row_mean_, self.row_var_, + _incremental_mean_and_var(X.T, self.row_mean_, self.row_var_, self.n_cols_seen_) if self.with_std: From cc1d8d3390a220801c4d1417f1d55fc7e31a1198 Mon Sep 17 00:00:00 2001 From: "mnarayan (SMC 2)" Date: Wed, 11 Oct 2017 22:01:09 +0000 Subject: [PATCH 11/31] Return original dimensions --- inverse_covariance/clean.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py index fde96a9..d94c025 100644 --- a/inverse_covariance/clean.py +++ b/inverse_covariance/clean.py @@ -102,7 +102,7 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, oldXrow = np.copy(Xrow_polish) oldXcol = np.copy(Xcol_polish) - X = Xrow_polish + X = Xrow_polish.T return X From 16b11164ad0bc8c5b5f1d7d32ae7221cb9ea9582 Mon Sep 17 00:00:00 2001 From: "mnarayan (SMC 2)" Date: Wed, 11 Oct 2017 22:21:45 +0000 Subject: [PATCH 12/31] inverse_transform completed, raises not implemented error --- inverse_covariance/clean.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py index d94c025..766b655 100644 --- a/inverse_covariance/clean.py +++ b/inverse_covariance/clean.py @@ -300,27 +300,27 @@ def inverse_transform(self, X, copy=None): X_tr : array-like, shape [n_samples, n_features] Transformed array. """ - check_is_fitted(self, 'scale_') + check_is_fitted(self, 'row_scale_') copy = copy if copy is not None else self.copy if sparse.issparse(X): - if self.with_mean: - raise ValueError( - "Cannot uncenter sparse matrices: pass `with_mean=False` " - "instead See docstring for motivation and alternatives.") - if not sparse.isspmatrix_csr(X): - X = X.tocsr() - copy = False - if copy: - X = X.copy() - if self.scale_ is not None: - inplace_column_scale(X, self.scale_) + print('Input is sparse') + raise NotImplementedError( + 'Algorithm for sparse matrices currently not supported.') else: + raise NotImplementedError( + 'Two Way standardization cannot currently be reversed with accuracy') X = np.asarray(X) if copy: X = X.copy() + X = X.T + if self.with_std: + X *= self.row_scale_ + if self.with_mean: + X += self.row_mean_ + X = X.T if self.with_std: - X *= self.scale_ + X *= self.col_scale_ if self.with_mean: - X += self.mean_ + X += self.col_mean_ return X \ No newline at end of file From a58383a677647e3a645c2ff5d92bec33c9bb47bd Mon Sep 17 00:00:00 2001 From: "mnarayan (SMC 2)" Date: Wed, 11 Oct 2017 22:43:21 +0000 Subject: [PATCH 13/31] Delinting --- inverse_covariance/clean.py | 63 ++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py index 766b655..014b105 100644 --- a/inverse_covariance/clean.py +++ b/inverse_covariance/clean.py @@ -1,6 +1,5 @@ import numpy as np from scipy import sparse -from scipy import stats from sklearn.preprocessing.data import ( scale, @@ -9,20 +8,15 @@ from sklearn.base import BaseEstimator, TransformerMixin from sklearn.utils import check_array -from sklearn.externals import six from sklearn.externals.six import string_types -from sklearn.utils.extmath import row_norms from sklearn.utils.extmath import _incremental_mean_and_var -from sklearn.utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1, - inplace_csr_row_normalize_l2) -from sklearn.utils.sparsefuncs import (inplace_column_scale, - mean_variance_axis, incr_mean_variance_axis, - min_max_axis) -from sklearn.utils.validation import (check_is_fitted, check_random_state, - FLOAT_DTYPES) +from sklearn.utils.validation import ( + check_is_fitted, + FLOAT_DTYPES + ) -def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, +def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, max_iter=50, tol=1e-6, verbose=False): """Standardize a two-dimensional data matrix along both axes. Center to the mean and component wise scale to unit variance. @@ -68,11 +62,11 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`). """ # noqa - X = check_array(X, accept_sparse=None, copy=copy, warn_on_dtype=True, - dtype=FLOAT_DTYPES) + X = check_array(X, accept_sparse=None, copy=copy, + warn_on_dtype=True, dtype=FLOAT_DTYPES) Xrow_polish = np.copy(X.T) Xcol_polish = np.copy(X) - [n_rows,n_cols] = np.shape(X) + [n_rows, n_cols] = np.shape(X) if sparse.issparse(X): print('Input is sparse') @@ -84,21 +78,19 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, err_norm = np.inf oldXrow = np.copy(Xrow_polish) oldXcol = np.copy(Xcol_polish) - while n_iter <= max_iter and err_norm > tol : + while n_iter <= max_iter and err_norm > tol: Xcol_polish = scale(Xrow_polish.T, axis=1, - with_mean=True, - with_std=with_std - ) + with_mean=True, with_std=with_std) Xrow_polish = scale(Xcol_polish.T, axis=1, - with_mean=True, - with_std=with_std - ) + with_mean=True, with_std=with_std) n_iter += 1 - err_norm_row = np.linalg.norm(oldXrow-Xrow_polish,'fro') - err_norm_col = np.linalg.norm(oldXcol-Xcol_polish,'fro') - err_norm = .5 * err_norm_row/(n_rows*n_cols) + .5 * err_norm_col/(n_rows*n_cols) + err_norm_row = np.linalg.norm(oldXrow-Xrow_polish, 'fro') + err_norm_col = np.linalg.norm(oldXcol-Xcol_polish, 'fro') + err_norm = .5 * err_norm_row/(n_rows*n_cols) + \ + .5 * err_norm_col/(n_rows*n_cols) if verbose: - print('Iteration: {}, Convergence Err: {}'.format(n_iter,err_norm)) + print('Iteration: {}, Convergence Err: {}'.format( + n_iter, err_norm)) oldXrow = np.copy(Xrow_polish) oldXcol = np.copy(Xcol_polish) @@ -171,7 +163,8 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin): def __init__(self, copy=True, with_mean=True, with_std=True): """Unlike StandardScaler, with_mean is always set to True, to ensure that two-way standardization is always performed with centering. The - argument `with_mean` is retained for the sake of model API compatibility. + argument `with_mean` is retained for the sake of sklearn + API compatibility. """ self.with_mean = True self.with_std = with_std @@ -234,8 +227,10 @@ def partial_fit(self, X, y=None): self.col_var_ = None self.col_mean_, self.col_var_, self.n_rows_seen_ = \ - _incremental_mean_and_var(X, self.col_mean_, self.col_var_, - self.n_rows_seen_) + _incremental_mean_and_var(X, self.col_mean_, + self.col_var_, + self.n_rows_seen_ + ) if not hasattr(self, 'n_cols_seen_'): self.row_mean_ = .0 self.n_cols_seen_ = 0 @@ -244,8 +239,10 @@ def partial_fit(self, X, y=None): else: self.row_var_ = None self.row_mean_, self.row_var_, self.n_cols_seen_ = \ - _incremental_mean_and_var(X.T, self.row_mean_, self.row_var_, - self.n_cols_seen_) + _incremental_mean_and_var(X.T, self.row_mean_, + self.row_var_, + self.n_cols_seen_ + ) if self.with_std: self.row_scale_ = _handle_zeros_in_scale(np.sqrt(self.row_var_)) @@ -269,7 +266,7 @@ def transform(self, X, y='deprecated', copy=None): Copy the input X or not. """ if not isinstance(y, string_types) or y != 'deprecated': - warnings.warn("The parameter y on transform() is " + warnings.warn("The parameter y on transform() is " # noqa "deprecated since 0.19 and will be removed in 0.21", DeprecationWarning) @@ -309,7 +306,7 @@ def inverse_transform(self, X, copy=None): 'Algorithm for sparse matrices currently not supported.') else: raise NotImplementedError( - 'Two Way standardization cannot currently be reversed with accuracy') + 'Two Way standardization not reversible with accuracy') X = np.asarray(X) if copy: X = X.copy() @@ -323,4 +320,4 @@ def inverse_transform(self, X, copy=None): X *= self.col_scale_ if self.with_mean: X += self.col_mean_ - return X \ No newline at end of file + return X From 5904f60c6214b8f0aa7ed799f724cf1b11b3aa03 Mon Sep 17 00:00:00 2001 From: "mnarayan (SMC 2)" Date: Wed, 11 Oct 2017 22:46:01 +0000 Subject: [PATCH 14/31] More delinting --- inverse_covariance/tests/clean_test.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/inverse_covariance/tests/clean_test.py b/inverse_covariance/tests/clean_test.py index eb22291..0a7e893 100644 --- a/inverse_covariance/tests/clean_test.py +++ b/inverse_covariance/tests/clean_test.py @@ -1,36 +1,35 @@ import numpy as np from scipy import sparse -import pytest from sklearn.utils.testing import assert_raises -from sklearn.utils.testing import assert_allclose -from sklearn import datasets from clean import ( twoway_standardize ) + def custom_init(n_rows, n_cols, with_mean=False): prng = np.random.RandomState(1) - X = prng.normal(0, np.ones(shape=(n_rows,n_cols))) + X = prng.normal(0, np.ones(shape=(n_rows, n_cols))) if with_mean: mu = np.ones(shape=(n_rows, 1)) * \ prng.randint(1, 5, size=(1, n_cols)) else: - mu = np.zeros(shape=(n_rows,n_cols)) + mu = np.zeros(shape=(n_rows, n_cols)) var_rows = prng.lognormal(2, 1, size=(n_rows, 1)) sqcov_rows = np.diag(np.sqrt(var_rows)) var_cols = prng.lognormal(2, 1, size=(1, n_cols)) sqcov_cols = np.diag(np.sqrt(var_cols)) return mu + sqcov_rows * X * sqcov_cols + def test_invalid_argument(): ''' - Test behavior of invalid sparse inputs. + Test behavior of invalid sparse data matrix inputs. ''' - X = np.zeros(shape=(10,10)) + X = np.zeros(shape=(10, 10)) X_csc = sparse.csc_matrix(X) assert_raises(TypeError, twoway_standardize, X_csc) - + X_csr = sparse.csr_matrix(X) assert_raises(TypeError, twoway_standardize, X_csr) From a17a5309b202129df46406514ed3e1a2bcc51ebd Mon Sep 17 00:00:00 2001 From: "mnarayan (SMC 2)" Date: Thu, 12 Oct 2017 17:57:25 +0000 Subject: [PATCH 15/31] Fixed import error --- inverse_covariance/tests/clean_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inverse_covariance/tests/clean_test.py b/inverse_covariance/tests/clean_test.py index 0a7e893..409979d 100644 --- a/inverse_covariance/tests/clean_test.py +++ b/inverse_covariance/tests/clean_test.py @@ -3,7 +3,7 @@ from sklearn.utils.testing import assert_raises -from clean import ( +from inverse_covariance.clean import ( twoway_standardize ) From e5395bdfd6e97a87474fae4068dfce7838ee3937 Mon Sep 17 00:00:00 2001 From: "mnarayan (SMC 2)" Date: Thu, 12 Oct 2017 17:57:47 +0000 Subject: [PATCH 16/31] Added clean.py --- inverse_covariance/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/inverse_covariance/__init__.py b/inverse_covariance/__init__.py index c9d58a7..1398af5 100644 --- a/inverse_covariance/__init__.py +++ b/inverse_covariance/__init__.py @@ -8,6 +8,10 @@ QuicGraphLassoCV, QuicGraphLassoEBIC, ) +from .clean import ( + twoway_standardize, + TwoWayStandardScaler, +) from .metrics import ( log_likelihood, kl_loss, @@ -22,12 +26,15 @@ from .adaptive_graph_lasso import AdaptiveGraphLasso from .cross_validation import RepeatedKFold + __all__ = [ 'InverseCovarianceEstimator', 'quic', 'QuicGraphLasso', 'QuicGraphLassoCV', 'QuicGraphLassoEBIC', + 'twoway_standardize', + 'TwoWayStandardScaler', 'log_likelihood', 'kl_loss', 'quadratic_loss', From a2940dff6bdf91515d85f01491b653fdaf030ce0 Mon Sep 17 00:00:00 2001 From: Jaska Date: Sun, 9 Sep 2018 11:27:49 -0700 Subject: [PATCH 17/31] Rename files from `clean` to `two_way_standard_scaler` --- inverse_covariance/__init__.py | 6 +++--- .../{clean_test.py => two_way_standard_scaler_test.py} | 8 ++++---- .../{clean.py => two_way_standard_scaler.py} | 10 +++++----- 3 files changed, 12 insertions(+), 12 deletions(-) rename inverse_covariance/tests/{clean_test.py => two_way_standard_scaler_test.py} (81%) rename inverse_covariance/{clean.py => two_way_standard_scaler.py} (98%) diff --git a/inverse_covariance/__init__.py b/inverse_covariance/__init__.py index c20068d..6f5ce1d 100644 --- a/inverse_covariance/__init__.py +++ b/inverse_covariance/__init__.py @@ -9,8 +9,8 @@ QuicGraphicalLassoCV, QuicGraphicalLassoEBIC, ) -from .clean import ( - twoway_standardize, +from .two_way_standard_scaler import ( + two_way_standardize, TwoWayStandardScaler, ) from .metrics import ( @@ -37,7 +37,7 @@ "QuicGraphicalLasso", "QuicGraphicalLassoCV", "QuicGraphicalLassoEBIC", - 'twoway_standardize', + 'two_way_standardize', 'TwoWayStandardScaler', "log_likelihood", "kl_loss", diff --git a/inverse_covariance/tests/clean_test.py b/inverse_covariance/tests/two_way_standard_scaler_test.py similarity index 81% rename from inverse_covariance/tests/clean_test.py rename to inverse_covariance/tests/two_way_standard_scaler_test.py index 409979d..2ca23b4 100644 --- a/inverse_covariance/tests/clean_test.py +++ b/inverse_covariance/tests/two_way_standard_scaler_test.py @@ -3,8 +3,8 @@ from sklearn.utils.testing import assert_raises -from inverse_covariance.clean import ( - twoway_standardize +from inverse_covariance.two_way_standard_scaler import ( + two_way_standardize ) @@ -29,7 +29,7 @@ def test_invalid_argument(): ''' X = np.zeros(shape=(10, 10)) X_csc = sparse.csc_matrix(X) - assert_raises(TypeError, twoway_standardize, X_csc) + assert_raises(TypeError, two_way_standardize, X_csc) X_csr = sparse.csr_matrix(X) - assert_raises(TypeError, twoway_standardize, X_csr) + assert_raises(TypeError, two_way_standardize, X_csr) diff --git a/inverse_covariance/clean.py b/inverse_covariance/two_way_standard_scaler.py similarity index 98% rename from inverse_covariance/clean.py rename to inverse_covariance/two_way_standard_scaler.py index 014b105..f00c88f 100644 --- a/inverse_covariance/clean.py +++ b/inverse_covariance/two_way_standard_scaler.py @@ -16,7 +16,7 @@ ) -def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, +def two_way_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, max_iter=50, tol=1e-6, verbose=False): """Standardize a two-dimensional data matrix along both axes. Center to the mean and component wise scale to unit variance. @@ -100,7 +100,7 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, class TwoWayStandardScaler(BaseEstimator, TransformerMixin): """Standardize features by removing the mean and scaling to unit variance - in both row and column dimensions. + in both row and column dimensions. This class is modeled after StandardScaler in scikit-learn. Read more in the :ref:`User Guide `. Parameters @@ -150,14 +150,14 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin): [ 1. 1.]] See also -------- - twoway_standardize: Equivalent function without the estimator API. + two_way_standardize: Equivalent function without the estimator API. :class:`sklearn.preprocessing.StandardScaler` :class:`sklearn.decomposition.PCA` Further removes the linear correlation across features with 'whiten=True'. Notes ----- See the implications of one-way vs. two-way standardization in here. TBD - + """ # noqa def __init__(self, copy=True, with_mean=True, with_std=True): @@ -281,7 +281,7 @@ def transform(self, X, y='deprecated', copy=None): raise NotImplemented( "Algorithm for sparse matrices currently not supported.") else: - X = twoway_standardize(X) + X = two_way_standardize(X) return X def inverse_transform(self, X, copy=None): From 238f393c0b1b577ec74a222fc66dde2a1e66a93c Mon Sep 17 00:00:00 2001 From: Jaska Date: Sun, 9 Sep 2018 11:29:58 -0700 Subject: [PATCH 18/31] Add estimator check --- inverse_covariance/tests/common_test.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/inverse_covariance/tests/common_test.py b/inverse_covariance/tests/common_test.py index aadfd96..f7d1b4b 100644 --- a/inverse_covariance/tests/common_test.py +++ b/inverse_covariance/tests/common_test.py @@ -5,6 +5,7 @@ QuicGraphicalLassoEBIC, AdaptiveGraphicalLasso, ModelAverage, + TwoWayStandardScaler, ) @@ -26,3 +27,7 @@ def test_adaptive_graphical_lasso(): def test_model_average(): return check_estimator(ModelAverage) + + +def test_two_way_standard_scaler(): + return check_estimator(TwoWayStandardScaler) From 34dc9368b5ea9d57044fa7fccf015952944648e2 Mon Sep 17 00:00:00 2001 From: Jaska Date: Sun, 9 Sep 2018 11:31:39 -0700 Subject: [PATCH 19/31] Rename commont_test to sklearn_test as is more descriptive of this test. --- inverse_covariance/tests/{common_test.py => sklearn_test.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename inverse_covariance/tests/{common_test.py => sklearn_test.py} (100%) diff --git a/inverse_covariance/tests/common_test.py b/inverse_covariance/tests/sklearn_test.py similarity index 100% rename from inverse_covariance/tests/common_test.py rename to inverse_covariance/tests/sklearn_test.py From 78696598b31746a3e2631034a963705ee3f56c25 Mon Sep 17 00:00:00 2001 From: Jaska Date: Sun, 9 Sep 2018 11:53:08 -0700 Subject: [PATCH 20/31] Address initial comments and some cleanup. --- inverse_covariance/two_way_standard_scaler.py | 162 +++++++----------- 1 file changed, 58 insertions(+), 104 deletions(-) diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py index f00c88f..15e65eb 100644 --- a/inverse_covariance/two_way_standard_scaler.py +++ b/inverse_covariance/two_way_standard_scaler.py @@ -1,3 +1,4 @@ +import warnings import numpy as np from scipy import sparse @@ -60,7 +61,7 @@ def two_way_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, -------- StandardScaler: Performs scaling to unit variance using the``Transformer`` API (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`). - """ # noqa + """ X = check_array(X, accept_sparse=None, copy=copy, warn_on_dtype=True, dtype=FLOAT_DTYPES) @@ -86,16 +87,16 @@ def two_way_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, n_iter += 1 err_norm_row = np.linalg.norm(oldXrow-Xrow_polish, 'fro') err_norm_col = np.linalg.norm(oldXcol-Xcol_polish, 'fro') - err_norm = .5 * err_norm_row/(n_rows*n_cols) + \ - .5 * err_norm_col/(n_rows*n_cols) + err_norm = .5 * err_norm_row/(n_rows*n_cols) + .5 * err_norm_col/(n_rows*n_cols) + if verbose: print('Iteration: {}, Convergence Err: {}'.format( n_iter, err_norm)) + oldXrow = np.copy(Xrow_polish) oldXcol = np.copy(Xcol_polish) - X = Xrow_polish.T - return X + return Xrow_polish.T class TwoWayStandardScaler(BaseEstimator, TransformerMixin): @@ -132,7 +133,7 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin): `scale_` n_samples_seen_ : int The number of samples processed by the estimator. Will be reset on - new calls to fit, but increments across ``partial_fit`` calls. + new calls to fit, but increments across ``fit`` calls. Examples -------- >>> from inverse_covariance.clean import TwoWayStandardScaler @@ -158,7 +159,7 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin): ----- See the implications of one-way vs. two-way standardization in here. TBD - """ # noqa + """ def __init__(self, copy=True, with_mean=True, with_std=True): """Unlike StandardScaler, with_mean is always set to True, to ensure @@ -166,27 +167,10 @@ def __init__(self, copy=True, with_mean=True, with_std=True): argument `with_mean` is retained for the sake of sklearn API compatibility. """ - self.with_mean = True + self.with_mean = with_mean self.with_std = with_std self.copy = copy - def _reset(self): - """Reset internal data-dependent state of the scaler, if necessary. - __init__ parameters are not touched. - """ - - # Checking one attribute is enough, becase they are all set together - # in partial_fit - if hasattr(self, 'col_scale_'): - del self.row_scale_ - del self.row_mean_ - del self.row_var_ - del self.col_scale_ - del self.col_mean_ - del self.col_var_ - del self.n_rows_seen_ - del self.n_cols_seen_ - def fit(self, X, y=None): """Compute the mean and std for both row and column dimensions. Parameters @@ -196,19 +180,6 @@ def fit(self, X, y=None): along both row and column axes y : Passthrough for ``Pipeline`` compatibility. Input is ignored. """ - - return self.partial_fit(X, y) - - def partial_fit(self, X, y=None): - """Compute the mean and std for both row and column dimensions. - Equivalent to fit. Online algorithm not supported at this time. - Parameters - ---------- - X : {array-like}, shape [n_rows, n_cols] - The data used to compute the mean and standard deviation - used for later scaling along the features axis. - y : Passthrough for ``Pipeline`` compatibility. - """ X = check_array(X, accept_sparse=None, copy=self.copy, warn_on_dtype=True, dtype=FLOAT_DTYPES) @@ -217,80 +188,59 @@ def partial_fit(self, X, y=None): raise NotImplemented( "Algorithm for sparse matrices currently not supported.") else: - # First pass - if not hasattr(self, 'n_rows_seen_'): - self.col_mean_ = .0 - self.n_rows_seen_ = 0 - if self.with_std: - self.col_var_ = .0 - else: - self.col_var_ = None - - self.col_mean_, self.col_var_, self.n_rows_seen_ = \ - _incremental_mean_and_var(X, self.col_mean_, - self.col_var_, - self.n_rows_seen_ - ) - if not hasattr(self, 'n_cols_seen_'): - self.row_mean_ = .0 - self.n_cols_seen_ = 0 - if self.with_std: - self.row_var_ = .0 - else: - self.row_var_ = None - self.row_mean_, self.row_var_, self.n_cols_seen_ = \ - _incremental_mean_and_var(X.T, self.row_mean_, - self.row_var_, - self.n_cols_seen_ - ) + self.col_mean_ = 0. + self.n_rows_seen_ = 0 + + self.col_var_ = None + if self.with_std: + self.col_var_ = 0. + + self.col_mean_, self.col_var_, self.n_rows_seen_ = _incremental_mean_and_var(X, self.col_mean_, self.col_var_, self.n_rows_seen_) + + self.row_mean_ = 0. + self.n_cols_seen_ = 0 + + self.row_var_ = None + if self.with_std: + self.row_var_ = 0. + + self.row_mean_, self.row_var_, self.n_cols_seen_ = _incremental_mean_and_var(X.T, self.row_mean_, self.row_var_, self.n_cols_seen_) + self.row_scale_ = None + self.col_scale_ = None if self.with_std: self.row_scale_ = _handle_zeros_in_scale(np.sqrt(self.row_var_)) self.col_scale_ = _handle_zeros_in_scale(np.sqrt(self.col_var_)) - else: - self.row_scale_ = None - self.col_scale_ = None return self - def transform(self, X, y='deprecated', copy=None): + def transform(self, X, copy=None): """Perform standardization by centering and scaling Parameters ---------- X : array-like, shape [n_rows, n_cols] The data used to scale along the features axis. - y : (ignored) - .. deprecated:: 0.19 - This parameter will be removed in 0.21. copy : bool, optional (default: None) Copy the input X or not. """ - if not isinstance(y, string_types) or y != 'deprecated': - warnings.warn("The parameter y on transform() is " # noqa - "deprecated since 0.19 and will be removed in 0.21", - DeprecationWarning) - check_is_fitted(self, 'row_scale_') - copy = copy if copy is not None else self.copy X = check_array(X, accept_sparse=None, copy=copy, warn_on_dtype=True, estimator=self, dtype=FLOAT_DTYPES) if sparse.issparse(X): - print('Input is sparse') raise NotImplemented( - "Algorithm for sparse matrices currently not supported.") - else: - X = two_way_standardize(X) - return X + "Input is sparse: Algorithm for sparse matrices currently not supported.") - def inverse_transform(self, X, copy=None): + return two_way_standardize(X) + + def inverse_transform(self, X, copy=False): """Scale back the data to the original representation Parameters ---------- X : array-like, shape [n_samples, n_features] The data used to scale along the features axis. - copy : bool, optional (default: None) + copy : bool, optional (default: False) Copy the input X or not. Returns ------- @@ -298,26 +248,30 @@ def inverse_transform(self, X, copy=None): Transformed array. """ check_is_fitted(self, 'row_scale_') - - copy = copy if copy is not None else self.copy if sparse.issparse(X): - print('Input is sparse') - raise NotImplementedError( - 'Algorithm for sparse matrices currently not supported.') - else: raise NotImplementedError( - 'Two Way standardization not reversible with accuracy') - X = np.asarray(X) - if copy: - X = X.copy() - X = X.T - if self.with_std: - X *= self.row_scale_ - if self.with_mean: - X += self.row_mean_ - X = X.T - if self.with_std: - X *= self.col_scale_ - if self.with_mean: - X += self.col_mean_ + 'Input is sparse: Algorithm for sparse matrices currently not supported.') + + warnings.warn('Two Way standardization not reversible with accuracy') + + X = np.asarray(X) + if copy: + X = X.copy() + + X = X.T + + if self.with_std: + X *= self.row_scale_ + + if self.with_mean: + X += self.row_mean_ + + X = X.T + + if self.with_std: + X *= self.col_scale_ + + if self.with_mean: + X += self.col_mean_ + return X From 9cbb212e6c13fc43b56fd390340a881725e926d9 Mon Sep 17 00:00:00 2001 From: Jaska Date: Sun, 9 Sep 2018 12:00:41 -0700 Subject: [PATCH 21/31] Black formatting and more simplification and cleanup. --- inverse_covariance/__init__.py | 21 +-- .../tests/two_way_standard_scaler_test.py | 13 +- inverse_covariance/two_way_standard_scaler.py | 145 ++++++++++-------- 3 files changed, 90 insertions(+), 89 deletions(-) diff --git a/inverse_covariance/__init__.py b/inverse_covariance/__init__.py index 6f5ce1d..817ea30 100644 --- a/inverse_covariance/__init__.py +++ b/inverse_covariance/__init__.py @@ -9,20 +9,9 @@ QuicGraphicalLassoCV, QuicGraphicalLassoEBIC, ) -from .two_way_standard_scaler import ( - two_way_standardize, - TwoWayStandardScaler, -) -from .metrics import ( - log_likelihood, - kl_loss, - quadratic_loss, - ebic, -) -from .rank_correlation import ( - spearman_correlation, - kendalltau_correlation, -) +from .two_way_standard_scaler import two_way_standardize, TwoWayStandardScaler +from .metrics import log_likelihood, kl_loss, quadratic_loss, ebic +from .rank_correlation import spearman_correlation, kendalltau_correlation from .model_average import ModelAverage from .adaptive_graph_lasso import AdaptiveGraphLasso, AdaptiveGraphicalLasso from .cross_validation import RepeatedKFold @@ -37,8 +26,8 @@ "QuicGraphicalLasso", "QuicGraphicalLassoCV", "QuicGraphicalLassoEBIC", - 'two_way_standardize', - 'TwoWayStandardScaler', + "two_way_standardize", + "TwoWayStandardScaler", "log_likelihood", "kl_loss", "quadratic_loss", diff --git a/inverse_covariance/tests/two_way_standard_scaler_test.py b/inverse_covariance/tests/two_way_standard_scaler_test.py index 2ca23b4..93bcd19 100644 --- a/inverse_covariance/tests/two_way_standard_scaler_test.py +++ b/inverse_covariance/tests/two_way_standard_scaler_test.py @@ -3,30 +3,29 @@ from sklearn.utils.testing import assert_raises -from inverse_covariance.two_way_standard_scaler import ( - two_way_standardize -) +from inverse_covariance.two_way_standard_scaler import two_way_standardize def custom_init(n_rows, n_cols, with_mean=False): prng = np.random.RandomState(1) X = prng.normal(0, np.ones(shape=(n_rows, n_cols))) if with_mean: - mu = np.ones(shape=(n_rows, 1)) * \ - prng.randint(1, 5, size=(1, n_cols)) + mu = np.ones(shape=(n_rows, 1)) * prng.randint(1, 5, size=(1, n_cols)) else: mu = np.zeros(shape=(n_rows, n_cols)) + var_rows = prng.lognormal(2, 1, size=(n_rows, 1)) sqcov_rows = np.diag(np.sqrt(var_rows)) var_cols = prng.lognormal(2, 1, size=(1, n_cols)) sqcov_cols = np.diag(np.sqrt(var_cols)) + return mu + sqcov_rows * X * sqcov_cols def test_invalid_argument(): - ''' + """ Test behavior of invalid sparse data matrix inputs. - ''' + """ X = np.zeros(shape=(10, 10)) X_csc = sparse.csc_matrix(X) assert_raises(TypeError, two_way_standardize, X_csc) diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py index 15e65eb..3f033ec 100644 --- a/inverse_covariance/two_way_standard_scaler.py +++ b/inverse_covariance/two_way_standard_scaler.py @@ -2,23 +2,25 @@ import numpy as np from scipy import sparse -from sklearn.preprocessing.data import ( - scale, - _handle_zeros_in_scale - ) +from sklearn.preprocessing.data import scale, _handle_zeros_in_scale from sklearn.base import BaseEstimator, TransformerMixin from sklearn.utils import check_array from sklearn.externals.six import string_types from sklearn.utils.extmath import _incremental_mean_and_var -from sklearn.utils.validation import ( - check_is_fitted, - FLOAT_DTYPES - ) - - -def two_way_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, - max_iter=50, tol=1e-6, verbose=False): +from sklearn.utils.validation import check_is_fitted, FLOAT_DTYPES + + +def two_way_standardize( + X, + axis=0, + with_mean=True, + with_std=True, + copy=True, + max_iter=50, + tol=1e-6, + verbose=False, +): """Standardize a two-dimensional data matrix along both axes. Center to the mean and component wise scale to unit variance. Read more in the :ref:`User Guide `. @@ -62,39 +64,37 @@ def two_way_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, StandardScaler: Performs scaling to unit variance using the``Transformer`` API (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`). """ + X = check_array( + X, accept_sparse=None, copy=copy, warn_on_dtype=True, dtype=FLOAT_DTYPES + ) + if sparse.issparse(X): + raise NotImplemented( + "Input is sparse: Algorithm for sparse matrices currently not supported." + ) - X = check_array(X, accept_sparse=None, copy=copy, - warn_on_dtype=True, dtype=FLOAT_DTYPES) Xrow_polish = np.copy(X.T) Xcol_polish = np.copy(X) [n_rows, n_cols] = np.shape(X) - if sparse.issparse(X): - print('Input is sparse') - raise NotImplemented( - "Algorithm for sparse matrices currently not supported.") + n_iter = 0 + err_norm = np.inf + oldXrow = np.copy(Xrow_polish) + oldXcol = np.copy(Xcol_polish) + while n_iter <= max_iter and err_norm > tol: + Xcol_polish = scale(Xrow_polish.T, axis=1, with_mean=True, with_std=with_std) + Xrow_polish = scale(Xcol_polish.T, axis=1, with_mean=True, with_std=with_std) + n_iter += 1 + err_norm_row = np.linalg.norm(oldXrow - Xrow_polish, "fro") + err_norm_col = np.linalg.norm(oldXcol - Xcol_polish, "fro") + err_norm = .5 * err_norm_row / (n_rows * n_cols) + .5 * err_norm_col / ( + n_rows * n_cols + ) + + if verbose: + print("Iteration: {}, Convergence Err: {}".format(n_iter, err_norm)) - else: - n_iter = 0 - err_norm = np.inf oldXrow = np.copy(Xrow_polish) oldXcol = np.copy(Xcol_polish) - while n_iter <= max_iter and err_norm > tol: - Xcol_polish = scale(Xrow_polish.T, axis=1, - with_mean=True, with_std=with_std) - Xrow_polish = scale(Xcol_polish.T, axis=1, - with_mean=True, with_std=with_std) - n_iter += 1 - err_norm_row = np.linalg.norm(oldXrow-Xrow_polish, 'fro') - err_norm_col = np.linalg.norm(oldXcol-Xcol_polish, 'fro') - err_norm = .5 * err_norm_row/(n_rows*n_cols) + .5 * err_norm_col/(n_rows*n_cols) - - if verbose: - print('Iteration: {}, Convergence Err: {}'.format( - n_iter, err_norm)) - - oldXrow = np.copy(Xrow_polish) - oldXcol = np.copy(Xcol_polish) return Xrow_polish.T @@ -180,31 +180,39 @@ def fit(self, X, y=None): along both row and column axes y : Passthrough for ``Pipeline`` compatibility. Input is ignored. """ - X = check_array(X, accept_sparse=None, copy=self.copy, - warn_on_dtype=True, dtype=FLOAT_DTYPES) - + X = check_array( + X, + accept_sparse=None, + copy=self.copy, + warn_on_dtype=True, + dtype=FLOAT_DTYPES, + ) if sparse.issparse(X): - print('Input is sparse') raise NotImplemented( - "Algorithm for sparse matrices currently not supported.") - else: - self.col_mean_ = 0. - self.n_rows_seen_ = 0 + "Input is sparse: Algorithm for sparse matrices currently not supported." + ) - self.col_var_ = None - if self.with_std: - self.col_var_ = 0. + self.col_mean_ = 0. + self.n_rows_seen_ = 0 - self.col_mean_, self.col_var_, self.n_rows_seen_ = _incremental_mean_and_var(X, self.col_mean_, self.col_var_, self.n_rows_seen_) + self.col_var_ = None + if self.with_std: + self.col_var_ = 0. - self.row_mean_ = 0. - self.n_cols_seen_ = 0 + self.col_mean_, self.col_var_, self.n_rows_seen_ = _incremental_mean_and_var( + X, self.col_mean_, self.col_var_, self.n_rows_seen_ + ) - self.row_var_ = None - if self.with_std: - self.row_var_ = 0. + self.row_mean_ = 0. + self.n_cols_seen_ = 0 - self.row_mean_, self.row_var_, self.n_cols_seen_ = _incremental_mean_and_var(X.T, self.row_mean_, self.row_var_, self.n_cols_seen_) + self.row_var_ = None + if self.with_std: + self.row_var_ = 0. + + self.row_mean_, self.row_var_, self.n_cols_seen_ = _incremental_mean_and_var( + X.T, self.row_mean_, self.row_var_, self.n_cols_seen_ + ) self.row_scale_ = None self.col_scale_ = None @@ -214,23 +222,27 @@ def fit(self, X, y=None): return self - def transform(self, X, copy=None): + def transform(self, X, y=None, copy=False): """Perform standardization by centering and scaling Parameters ---------- X : array-like, shape [n_rows, n_cols] The data used to scale along the features axis. - copy : bool, optional (default: None) - Copy the input X or not. """ - check_is_fitted(self, 'row_scale_') - copy = copy if copy is not None else self.copy - X = check_array(X, accept_sparse=None, copy=copy, warn_on_dtype=True, - estimator=self, dtype=FLOAT_DTYPES) + check_is_fitted(self, "row_scale_") + X = check_array( + X, + accept_sparse=None, + copy=copy, + warn_on_dtype=True, + estimator=self, + dtype=FLOAT_DTYPES, + ) if sparse.issparse(X): raise NotImplemented( - "Input is sparse: Algorithm for sparse matrices currently not supported.") + "Input is sparse: Algorithm for sparse matrices currently not supported." + ) return two_way_standardize(X) @@ -247,12 +259,13 @@ def inverse_transform(self, X, copy=False): X_tr : array-like, shape [n_samples, n_features] Transformed array. """ - check_is_fitted(self, 'row_scale_') + check_is_fitted(self, "row_scale_") if sparse.issparse(X): raise NotImplementedError( - 'Input is sparse: Algorithm for sparse matrices currently not supported.') + "Input is sparse: Algorithm for sparse matrices currently not supported." + ) - warnings.warn('Two Way standardization not reversible with accuracy') + warnings.warn("Two Way standardization not reversible with accuracy") X = np.asarray(X) if copy: From a8e980ffe9114fd5e567a4c54c959308ba29e514 Mon Sep 17 00:00:00 2001 From: Jaska Date: Sun, 9 Sep 2018 12:02:32 -0700 Subject: [PATCH 22/31] Black formatting and more simplification and cleanup. --- inverse_covariance/two_way_standard_scaler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py index 3f033ec..d27d23f 100644 --- a/inverse_covariance/two_way_standard_scaler.py +++ b/inverse_covariance/two_way_standard_scaler.py @@ -240,7 +240,7 @@ def transform(self, X, y=None, copy=False): ) if sparse.issparse(X): - raise NotImplemented( + raise NotImplementedError( "Input is sparse: Algorithm for sparse matrices currently not supported." ) @@ -265,7 +265,7 @@ def inverse_transform(self, X, copy=False): "Input is sparse: Algorithm for sparse matrices currently not supported." ) - warnings.warn("Two Way standardization not reversible with accuracy") + warnings.warn("Reversing two way transformation is not accurate.") X = np.asarray(X) if copy: From e864e7288728479fd483b6b2fc503cd2d31d6d1f Mon Sep 17 00:00:00 2001 From: Jaska Date: Sun, 9 Sep 2018 12:21:43 -0700 Subject: [PATCH 23/31] Ensure interface can be validated. --- inverse_covariance/two_way_standard_scaler.py | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py index d27d23f..9db570f 100644 --- a/inverse_covariance/two_way_standard_scaler.py +++ b/inverse_covariance/two_way_standard_scaler.py @@ -64,17 +64,9 @@ def two_way_standardize( StandardScaler: Performs scaling to unit variance using the``Transformer`` API (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`). """ - X = check_array( - X, accept_sparse=None, copy=copy, warn_on_dtype=True, dtype=FLOAT_DTYPES - ) - if sparse.issparse(X): - raise NotImplemented( - "Input is sparse: Algorithm for sparse matrices currently not supported." - ) - Xrow_polish = np.copy(X.T) Xcol_polish = np.copy(X) - [n_rows, n_cols] = np.shape(X) + n_rows, n_cols = np.shape(X) n_iter = 0 err_norm = np.inf @@ -182,16 +174,20 @@ def fit(self, X, y=None): """ X = check_array( X, - accept_sparse=None, + accept_sparse=False, copy=self.copy, warn_on_dtype=True, dtype=FLOAT_DTYPES, + estimator=self, + ensure_min_features=2, ) if sparse.issparse(X): raise NotImplemented( "Input is sparse: Algorithm for sparse matrices currently not supported." ) + self.n_rows_, self.n_cols_ = np.shape(X) + self.col_mean_ = 0. self.n_rows_seen_ = 0 @@ -222,7 +218,7 @@ def fit(self, X, y=None): return self - def transform(self, X, y=None, copy=False): + def transform(self, X, copy=False): """Perform standardization by centering and scaling Parameters ---------- @@ -232,12 +228,15 @@ def transform(self, X, y=None, copy=False): check_is_fitted(self, "row_scale_") X = check_array( X, - accept_sparse=None, + accept_sparse=False, copy=copy, warn_on_dtype=True, - estimator=self, dtype=FLOAT_DTYPES, + estimator=self, ) + n_rows, n_cols = np.shape(X) + if self.n_cols_ != n_cols: + raise ValueError("Number of features must be same as for fit().") if sparse.issparse(X): raise NotImplementedError( From 7f86bb3f0f46b33cbb6ebf9a154da0e2eecb7832 Mon Sep 17 00:00:00 2001 From: Jaska Date: Sun, 9 Sep 2018 12:30:20 -0700 Subject: [PATCH 24/31] More simplification. --- inverse_covariance/two_way_standard_scaler.py | 47 +++++++------------ 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py index 9db570f..5d85a56 100644 --- a/inverse_covariance/two_way_standard_scaler.py +++ b/inverse_covariance/two_way_standard_scaler.py @@ -11,6 +11,7 @@ from sklearn.utils.validation import check_is_fitted, FLOAT_DTYPES + def two_way_standardize( X, axis=0, @@ -64,31 +65,30 @@ def two_way_standardize( StandardScaler: Performs scaling to unit variance using the``Transformer`` API (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`). """ - Xrow_polish = np.copy(X.T) - Xcol_polish = np.copy(X) n_rows, n_cols = np.shape(X) - n_iter = 0 err_norm = np.inf - oldXrow = np.copy(Xrow_polish) - oldXcol = np.copy(Xcol_polish) + rows_X = np.copy(X.T) + cols_X = np.copy(X) + n_iter = 0 while n_iter <= max_iter and err_norm > tol: - Xcol_polish = scale(Xrow_polish.T, axis=1, with_mean=True, with_std=with_std) - Xrow_polish = scale(Xcol_polish.T, axis=1, with_mean=True, with_std=with_std) - n_iter += 1 - err_norm_row = np.linalg.norm(oldXrow - Xrow_polish, "fro") - err_norm_col = np.linalg.norm(oldXcol - Xcol_polish, "fro") + col_polish = scale(row_polish.T, axis=1, with_mean=True, with_std=with_std) + row_polish = scale(col_polish.T, axis=1, with_mean=True, with_std=with_std) + + err_norm_row = np.linalg.norm(rows_X - row_polish, "fro") + err_norm_col = np.linalg.norm(cols_X - col_polish, "fro") err_norm = .5 * err_norm_row / (n_rows * n_cols) + .5 * err_norm_col / ( n_rows * n_cols ) + n_iter += 1 if verbose: print("Iteration: {}, Convergence Err: {}".format(n_iter, err_norm)) - oldXrow = np.copy(Xrow_polish) - oldXcol = np.copy(Xcol_polish) + rows_X = np.copy(row_polish) + cols_X = np.copy(col_polish) - return Xrow_polish.T + return row_polish.T class TwoWayStandardScaler(BaseEstimator, TransformerMixin): @@ -188,26 +188,14 @@ def fit(self, X, y=None): self.n_rows_, self.n_cols_ = np.shape(X) - self.col_mean_ = 0. - self.n_rows_seen_ = 0 - - self.col_var_ = None - if self.with_std: - self.col_var_ = 0. - + self.col_var_ = 0. if self.with_std else None self.col_mean_, self.col_var_, self.n_rows_seen_ = _incremental_mean_and_var( - X, self.col_mean_, self.col_var_, self.n_rows_seen_ + X, 0.0, self.col_var_, 0 ) - self.row_mean_ = 0. - self.n_cols_seen_ = 0 - - self.row_var_ = None - if self.with_std: - self.row_var_ = 0. - + self.row_var_ = 0. if self.with_std else None self.row_mean_, self.row_var_, self.n_cols_seen_ = _incremental_mean_and_var( - X.T, self.row_mean_, self.row_var_, self.n_cols_seen_ + X.T, 0.0, self.row_var_, 0 ) self.row_scale_ = None @@ -266,7 +254,6 @@ def inverse_transform(self, X, copy=False): warnings.warn("Reversing two way transformation is not accurate.") - X = np.asarray(X) if copy: X = X.copy() From 748fe33d031b830e98116e568e3b20b8527fb344 Mon Sep 17 00:00:00 2001 From: Jaska Date: Sun, 9 Sep 2018 12:32:02 -0700 Subject: [PATCH 25/31] Autoformat. --- inverse_covariance/two_way_standard_scaler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py index 5d85a56..eb7ca30 100644 --- a/inverse_covariance/two_way_standard_scaler.py +++ b/inverse_covariance/two_way_standard_scaler.py @@ -11,7 +11,6 @@ from sklearn.utils.validation import check_is_fitted, FLOAT_DTYPES - def two_way_standardize( X, axis=0, From d2800fc78a0f384bd64f7096ed5cfd0067215ee0 Mon Sep 17 00:00:00 2001 From: Jaska Date: Sun, 9 Sep 2018 15:34:01 -0700 Subject: [PATCH 26/31] Bring back partial_fit capability, add tests, ask questions. --- .../tests/quic_graph_lasso_test.py | 3 +- .../tests/two_way_standard_scaler_test.py | 189 +++++++++++++++++- inverse_covariance/two_way_standard_scaler.py | 144 ++++++++----- 3 files changed, 281 insertions(+), 55 deletions(-) diff --git a/inverse_covariance/tests/quic_graph_lasso_test.py b/inverse_covariance/tests/quic_graph_lasso_test.py index dec6b63..a45bb98 100644 --- a/inverse_covariance/tests/quic_graph_lasso_test.py +++ b/inverse_covariance/tests/quic_graph_lasso_test.py @@ -1,8 +1,7 @@ import numpy as np import pytest -from sklearn.utils.testing import assert_raises -from sklearn.utils.testing import assert_allclose +from sklearn.utils.testing import assert_raises, assert_allclose from sklearn import datasets from inverse_covariance import ( diff --git a/inverse_covariance/tests/two_way_standard_scaler_test.py b/inverse_covariance/tests/two_way_standard_scaler_test.py index 93bcd19..e677a29 100644 --- a/inverse_covariance/tests/two_way_standard_scaler_test.py +++ b/inverse_covariance/tests/two_way_standard_scaler_test.py @@ -1,9 +1,13 @@ +import pytest import numpy as np from scipy import sparse +from sklearn.utils.testing import assert_raises, assert_allclose +from sklearn.exceptions import NotFittedError -from sklearn.utils.testing import assert_raises - -from inverse_covariance.two_way_standard_scaler import two_way_standardize +from inverse_covariance.two_way_standard_scaler import ( + two_way_standardize, + TwoWayStandardScaler, +) def custom_init(n_rows, n_cols, with_mean=False): @@ -22,13 +26,182 @@ def custom_init(n_rows, n_cols, with_mean=False): return mu + sqcov_rows * X * sqcov_cols -def test_invalid_argument(): +def test_fit_exception_on_sparse_input(): + """ + Test behavior of invalid sparse data matrix inputs. + """ + X = np.zeros(shape=(10, 10)) + sparse_Xs = [sparse.csc_matrix(X), sparse.csr_matrix(X)] + for sparse_X in sparse_Xs: + scaler = TwoWayStandardScaler() + assert_raises(TypeError, scaler.fit, sparse_X) + + +def test_transform_exception_not_fitted(): + """ + Test behavior of invalid sparse data matrix inputs. + """ + scaler = TwoWayStandardScaler() + assert_raises(NotFittedError, scaler.transform, np.zeros(shape=(10, 10))) + + +def test_transform_exception_on_sparse_input(): """ Test behavior of invalid sparse data matrix inputs. """ X = np.zeros(shape=(10, 10)) - X_csc = sparse.csc_matrix(X) - assert_raises(TypeError, two_way_standardize, X_csc) + sparse_Xs = [sparse.csc_matrix(X), sparse.csr_matrix(X)] + for sparse_X in sparse_Xs: + scaler = TwoWayStandardScaler() + scaler.fit(X) + assert_raises(TypeError, scaler.transform, sparse_X) + + +@pytest.mark.parametrize( + "data, with_std, expected", + [ + ( + [[1, 0], [1, 0], [2, 1], [2, 1]], + True, # with_std=True + [ + [0.5, 0.5, 1.5, 1.5], # row_mean_ + [0.25, 0.25, 0.25, 0.25], # row_var_ + [1.5, 0.5], # col_mean_ + [0.25, 0.25], # col_var_ + [0.5, 0.5, 0.5, 0.5], # row_scale_ + [0.5, 0.5], # col_scale_ + [4], # [n_rows_seen_] + [2], # [n_cols_seen_] + ], + ), + ( + [[1, 0], [1, 0], [2, 1], [2, 1]], + False, # with_std=False + [ + [0.5, 0.5, 1.5, 1.5], # row_mean_ + None, # row_var_ + [1.5, 0.5], # col_mean_ + None, # col_var_ + None, # row_scale_ + None, # col_scale_ + [4], # [n_rows_seen_] + [2], # [n_cols_seen_] + ], + ), + ], +) +def test_fit(data, with_std, expected): + scaler = TwoWayStandardScaler(with_std=with_std) + scaler.fit(data) + result = [ + scaler.row_mean_, + scaler.row_var_, + scaler.col_mean_, + scaler.col_var_, + scaler.row_scale_, + scaler.col_scale_, + [scaler.n_rows_seen_], + [scaler.n_cols_seen_], + ] + assert_allclose( + [i for e in expected if e is not None for i in e], + [j for r in result if r is not None for j in r], + ) + + +@pytest.mark.parametrize( + "data, with_std, expected", + [ + ( + [ + [[1, 0], [1, 0], [2, 1], [2, 1]], + [[1, 0], [1, 0], [2, 1], [2, 1]], + [[1, 0], [1, 0], [2, 1], [2, 1]], + ], # multiple data examples for "online" estimation + True, # with_std=True + [ + [0.5, 0.5, 1.5, 1.5], # row_mean_ + [0.25, 0.25, 0.25, 0.25], # row_var_ + [1.5, 0.5], # col_mean_ + [0.25, 0.25], # col_var_ + [0.5, 0.5, 0.5, 0.5], # row_scale_ + [0.5, 0.5], # col_scale_ + [12], # [n_rows_seen_] + [6], # [n_cols_seen_] + ], + ) + ], +) +def test_partial_fit(data, with_std, expected): + scaler = TwoWayStandardScaler(with_std=with_std) + for d in data: + scaler.partial_fit(d) + + result = [ + scaler.row_mean_, + scaler.row_var_, + scaler.col_mean_, + scaler.col_var_, + scaler.row_scale_, + scaler.col_scale_, + [scaler.n_rows_seen_], + [scaler.n_cols_seen_], + ] + print(result) + assert_allclose( + [i for e in expected if e is not None for i in e], + [j for r in result if r is not None for j in r], + ) + - X_csr = sparse.csr_matrix(X) - assert_raises(TypeError, two_way_standardize, X_csr) +@pytest.mark.parametrize( + "n_rows, n_cols, with_mean, with_std, expected", + [ + ( + 6, # n_rows + 2, # n_cols + False, # with_mean + True, # with_std + [ + [1.24852525, -0.47021609], + [-1.66629192, -3.38503326], + [0.46966753, -1.24907381], + [1.1966711, -0.52207024], + [0.96470187, -0.75403946], + [0.71346052, -1.00528082], + ], + ), + ( + 6, # n_rows + 2, # n_cols + True, # with_mean + False, # with_std + [ + [1.85393809, -1.85393809], + [-19.71891691, 19.71891691], + [13.72700391, -13.72700391], + [5.29676963, -5.29676963], + [-19.41773454, 19.41773454], + [18.25893982, -18.25893982], + ], + ), + ( + 6, # n_rows + 2, # n_cols + True, # with_mean + True, # with_std + [ + [-1.41421356, 1.41421356], + [-1.41421356, 1.41421356], + [0.70710678, -0.70710678], + [0.70710678, -0.70710678], + [0.70710678, -0.70710678], + [0.70710678, -0.70710678], + ], + ), + ], +) +def test_two_way_standardize(n_rows, n_cols, with_mean, with_std, expected): + X = custom_init(n_rows, n_cols, with_mean=with_mean) + result = two_way_standardize(X, with_mean=with_mean, with_std=with_std) + assert_allclose(result, expected) diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py index eb7ca30..176eb4a 100644 --- a/inverse_covariance/two_way_standard_scaler.py +++ b/inverse_covariance/two_way_standard_scaler.py @@ -24,61 +24,69 @@ def two_way_standardize( """Standardize a two-dimensional data matrix along both axes. Center to the mean and component wise scale to unit variance. Read more in the :ref:`User Guide `. + Parameters ---------- X : {array-like, sparse matrix} The data to center and scale. + axis : int (0 by default) axis used to compute the means and standard deviations along. If 0, independently standardize each feature, otherwise (if 1) standardize each sample. + with_mean : boolean, True by default Is always true for two-way standardize + with_std : boolean, True by default If True, scale the data to unit variance (or equivalently, unit standard deviation). + copy : boolean, optional, default True set to False to perform inplace row normalization and avoid a copy (if the input is already a numpy array or a scipy.sparse CSC matrix and if axis is 1). + max_iter : int, optional (50 by default) Set the maximum number of iterations of successive normalization algorithm + tol : float, optional (1e-6 by default) Set the convergence threshold for successive normalization + Notes ----- - This function invokes sklearn's scale function. Thus, the same restrictions + This function invokes sklearn's scale function, thus the same restrictions for scale, apply here as well. - This implementation will refuse to center scipy.sparse matrices - since it would make them non-sparse and would potentially crash the - program with memory exhaustion problems. - Instead the caller is expected to either set explicitly - `with_mean=False` (in that case, only variance scaling will be - performed on the features of the CSC matrix) or to call `X.toarray()` - if he/she expects the materialized dense array to fit in memory. - To avoid memory copy the caller should pass a CSC matrix. + + The caller should pass a CSC matrix. The caller is expected to either set + explicitly `with_mean=False`(in that case, only variance scaling will be + performed on the features of the CSC matrix) or to call `X.toarray()` if + the array fits in memory. + For a comparison of the different scalers, transformers, and normalizers, see sklearn documentation `examples/preprocessing/plot_all_scaling.py + See also -------- - StandardScaler: Performs scaling to unit variance using the``Transformer`` API - (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`). + StandardScaler: Performs scaling to unit variance using the``Transformer`` API, + e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`. """ n_rows, n_cols = np.shape(X) - err_norm = np.inf + row_polish = np.copy(X.T) rows_X = np.copy(X.T) cols_X = np.copy(X) + + err_norm = np.inf n_iter = 0 + while n_iter <= max_iter and err_norm > tol: - col_polish = scale(row_polish.T, axis=1, with_mean=True, with_std=with_std) - row_polish = scale(col_polish.T, axis=1, with_mean=True, with_std=with_std) + col_polish = scale(row_polish.T, axis=1, with_mean=with_mean, with_std=with_std) + row_polish = scale(col_polish.T, axis=1, with_mean=with_mean, with_std=with_std) - err_norm_row = np.linalg.norm(rows_X - row_polish, "fro") - err_norm_col = np.linalg.norm(cols_X - col_polish, "fro") - err_norm = .5 * err_norm_row / (n_rows * n_cols) + .5 * err_norm_col / ( - n_rows * n_cols - ) + err_row = np.linalg.norm(rows_X - row_polish, "fro") + err_col = np.linalg.norm(cols_X - col_polish, "fro") + err_norm = .5 * err_row / (n_rows * n_cols) + .5 * err_col / (n_rows * n_cols) n_iter += 1 if verbose: @@ -93,8 +101,10 @@ def two_way_standardize( class TwoWayStandardScaler(BaseEstimator, TransformerMixin): """Standardize features by removing the mean and scaling to unit variance in both row and column dimensions. + This class is modeled after StandardScaler in scikit-learn. Read more in the :ref:`User Guide `. + Parameters ---------- copy : boolean, optional, default True @@ -102,54 +112,51 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin): This is not guaranteed to always work inplace; e.g. if the data is not a NumPy array or scipy.sparse CSR matrix, a copy may still be returned. + with_mean : boolean, True by default If True, center the data before scaling. This does not work (and will raise an exception) when attempted on sparse matrices, because centering them entails building a dense matrix which in common use cases is likely to be too large to fit in memory. + with_std : boolean, True by default If True, scale the data to unit variance (or equivalently, unit standard deviation). + Attributes ---------- scale_ : ndarray, shape (n_features,) Per feature relative scaling of the data. .. versionadded:: 0.17 *scale_* - mean_ : array of floats with shape [n_features] + + row_mean_ : array of floats with shape [n_examples] + The mean value for each feature in the training set. + + col_mean_ : array of floats with shape [n_features] The mean value for each feature in the training set. - var_ : array of floats with shape [n_features] + + row_var_ : array of floats with shape [n_examples] + The variance for each feature in the training set. Used to compute + `scale_` + + col_var_ : array of floats with shape [n_features] The variance for each feature in the training set. Used to compute `scale_` + n_samples_seen_ : int The number of samples processed by the estimator. Will be reset on new calls to fit, but increments across ``fit`` calls. - Examples - -------- - >>> from inverse_covariance.clean import TwoWayStandardScaler - >>> - >>> data = [[1, 0], [1, 0], [2, 1], [2, 1]] - >>> scaler = StandardScaler() - >>> print(scaler.fit(data)) - StandardScaler(copy=True, with_mean=True, with_std=True) - >>> print(scaler.mean_) - [ 3.0 0.5] - >>> print(scaler.transform(data)) - [[-1. -1.] - [-1. -1.] - [ 1. 1.] - [ 1. 1.]] + See also -------- - two_way_standardize: Equivalent function without the estimator API. :class:`sklearn.preprocessing.StandardScaler` :class:`sklearn.decomposition.PCA` - Further removes the linear correlation across features with 'whiten=True'. + Notes ----- See the implications of one-way vs. two-way standardization in here. TBD - """ def __init__(self, copy=True, with_mean=True, with_std=True): @@ -162,15 +169,43 @@ def __init__(self, copy=True, with_mean=True, with_std=True): self.with_std = with_std self.copy = copy + def _reset(self): + fit_attrs = [ + "n_rows_", + "n_cols_", + "col_var_", + "col_mean_", + "row_var_", + "row_mean_", + "row_scale_", + "col_scale_", + "n_rows_seen_", + "n_cols_seen_", + ] + for attr in fit_attrs: + if hasattr(self, attr): + delattr(self, attr) + + def _initial_state(self): + return not hasattr(self, "n_rows_seen_") + def fit(self, X, y=None): """Compute the mean and std for both row and column dimensions. + Parameters ---------- X : {array-like}, shape [n_rows, n_cols] The data used to compute the mean and standard deviation along both row and column axes + y : Passthrough for ``Pipeline`` compatibility. Input is ignored. """ + # Reset internal state before fitting + self._reset() + return self.partial_fit(X, y) + + def partial_fit(self, X, y=None): + """Online computation of mean and std on X for later scaling.""" X = check_array( X, accept_sparse=False, @@ -187,14 +222,23 @@ def fit(self, X, y=None): self.n_rows_, self.n_cols_ = np.shape(X) - self.col_var_ = 0. if self.with_std else None + # Q: This doesnt seem to actually get used in the transform, only in + # the inverse transform which it sounds like we should not support. + + # initialize variables on first pass + if self._initial_state(): + self.col_mean_ = 0. + self.col_var_ = 0. if self.with_std else None + self.n_cols_seen_ = 0 + self.row_mean_ = 0. + self.row_var_ = 0. if self.with_std else None + self.n_rows_seen_ = 0 + self.col_mean_, self.col_var_, self.n_rows_seen_ = _incremental_mean_and_var( - X, 0.0, self.col_var_, 0 + X, self.col_mean_, self.col_var_, self.n_rows_seen_ ) - - self.row_var_ = 0. if self.with_std else None self.row_mean_, self.row_var_, self.n_cols_seen_ = _incremental_mean_and_var( - X.T, 0.0, self.row_var_, 0 + X.T, self.row_mean_, self.row_var_, self.n_cols_seen_ ) self.row_scale_ = None @@ -211,6 +255,8 @@ def transform(self, X, copy=False): ---------- X : array-like, shape [n_rows, n_cols] The data used to scale along the features axis. + copy : bool, optional (default: False) + Copy the input X or not. """ check_is_fitted(self, "row_scale_") X = check_array( @@ -230,16 +276,21 @@ def transform(self, X, copy=False): "Input is sparse: Algorithm for sparse matrices currently not supported." ) - return two_way_standardize(X) + return two_way_standardize( + X, with_mean=self.with_mean, with_std=self.with_std, copy=self.copy + ) def inverse_transform(self, X, copy=False): """Scale back the data to the original representation + Parameters ---------- X : array-like, shape [n_samples, n_features] The data used to scale along the features axis. + copy : bool, optional (default: False) Copy the input X or not. + Returns ------- X_tr : array-like, shape [n_samples, n_features] @@ -253,6 +304,9 @@ def inverse_transform(self, X, copy=False): warnings.warn("Reversing two way transformation is not accurate.") + # Q: Should ^ be a warning or should we just rais here and delete the + # rest of the code? + if copy: X = X.copy() From 7c370307252f9a333e9848f076735376a1d2662a Mon Sep 17 00:00:00 2001 From: Jaska Date: Sun, 9 Sep 2018 15:39:23 -0700 Subject: [PATCH 27/31] Minor cleanup. --- inverse_covariance/two_way_standard_scaler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py index 176eb4a..0d2779f 100644 --- a/inverse_covariance/two_way_standard_scaler.py +++ b/inverse_covariance/two_way_standard_scaler.py @@ -200,7 +200,6 @@ def fit(self, X, y=None): y : Passthrough for ``Pipeline`` compatibility. Input is ignored. """ - # Reset internal state before fitting self._reset() return self.partial_fit(X, y) @@ -251,10 +250,12 @@ def partial_fit(self, X, y=None): def transform(self, X, copy=False): """Perform standardization by centering and scaling + Parameters ---------- X : array-like, shape [n_rows, n_cols] The data used to scale along the features axis. + copy : bool, optional (default: False) Copy the input X or not. """ From 1757216631ad8f7b6eba009ca7e36f910c51fa6a Mon Sep 17 00:00:00 2001 From: Jaska Date: Mon, 10 Sep 2018 08:00:28 -0700 Subject: [PATCH 28/31] Raise on inverse transform, remove code. --- inverse_covariance/two_way_standard_scaler.py | 33 +++---------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py index 0d2779f..0efadb8 100644 --- a/inverse_covariance/two_way_standard_scaler.py +++ b/inverse_covariance/two_way_standard_scaler.py @@ -126,10 +126,11 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin): Attributes ---------- - scale_ : ndarray, shape (n_features,) + row_scale_ : ndarray, shape (n_examples,) + Per feature relative scaling of the data. + + col_scale_ : ndarray, shape (n_features,) Per feature relative scaling of the data. - .. versionadded:: 0.17 - *scale_* row_mean_ : array of floats with shape [n_examples] The mean value for each feature in the training set. @@ -303,28 +304,4 @@ def inverse_transform(self, X, copy=False): "Input is sparse: Algorithm for sparse matrices currently not supported." ) - warnings.warn("Reversing two way transformation is not accurate.") - - # Q: Should ^ be a warning or should we just rais here and delete the - # rest of the code? - - if copy: - X = X.copy() - - X = X.T - - if self.with_std: - X *= self.row_scale_ - - if self.with_mean: - X += self.row_mean_ - - X = X.T - - if self.with_std: - X *= self.col_scale_ - - if self.with_mean: - X += self.col_mean_ - - return X + raise NotImplementedError("Reversing two way transformation is not accurate.") From 17806e85f69ee23048c2608f49f263cef3ca100c Mon Sep 17 00:00:00 2001 From: Jaska Date: Mon, 10 Sep 2018 13:14:58 -0700 Subject: [PATCH 29/31] Remove unneeded check. --- inverse_covariance/two_way_standard_scaler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py index 0efadb8..7321df0 100644 --- a/inverse_covariance/two_way_standard_scaler.py +++ b/inverse_covariance/two_way_standard_scaler.py @@ -298,7 +298,6 @@ def inverse_transform(self, X, copy=False): X_tr : array-like, shape [n_samples, n_features] Transformed array. """ - check_is_fitted(self, "row_scale_") if sparse.issparse(X): raise NotImplementedError( "Input is sparse: Algorithm for sparse matrices currently not supported." From f1f682e7842fcab4d73dc21cda4eb11d2b65bdee Mon Sep 17 00:00:00 2001 From: Jaska Date: Mon, 10 Sep 2018 13:28:34 -0700 Subject: [PATCH 30/31] Remove redundant raise. --- inverse_covariance/two_way_standard_scaler.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py index 7321df0..7969d85 100644 --- a/inverse_covariance/two_way_standard_scaler.py +++ b/inverse_covariance/two_way_standard_scaler.py @@ -298,9 +298,4 @@ def inverse_transform(self, X, copy=False): X_tr : array-like, shape [n_samples, n_features] Transformed array. """ - if sparse.issparse(X): - raise NotImplementedError( - "Input is sparse: Algorithm for sparse matrices currently not supported." - ) - raise NotImplementedError("Reversing two way transformation is not accurate.") From eb8c54b7fc6e66aeeac0f61caedb2f36be390881 Mon Sep 17 00:00:00 2001 From: Jaska Date: Mon, 10 Sep 2018 13:29:13 -0700 Subject: [PATCH 31/31] Remove unneeded comments. --- inverse_covariance/two_way_standard_scaler.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py index 7969d85..93412ff 100644 --- a/inverse_covariance/two_way_standard_scaler.py +++ b/inverse_covariance/two_way_standard_scaler.py @@ -283,19 +283,5 @@ def transform(self, X, copy=False): ) def inverse_transform(self, X, copy=False): - """Scale back the data to the original representation - - Parameters - ---------- - X : array-like, shape [n_samples, n_features] - The data used to scale along the features axis. - - copy : bool, optional (default: False) - Copy the input X or not. - - Returns - ------- - X_tr : array-like, shape [n_samples, n_features] - Transformed array. - """ + """Scale back the data to the original representation.""" raise NotImplementedError("Reversing two way transformation is not accurate.")