From 9e50115917c70ea35bdde87749b38f78668e6a75 Mon Sep 17 00:00:00 2001
From: "mnarayan (SMC 2)" <mnarayan@users.noreply.github.com>
Date: Mon, 9 Oct 2017 03:20:58 +0000
Subject: [PATCH 01/31] Added basic functions to test two-way center and
 scaling

---
 inverse_covariance/tests/clean_test.py | 36 ++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 inverse_covariance/tests/clean_test.py

diff --git a/inverse_covariance/tests/clean_test.py b/inverse_covariance/tests/clean_test.py
new file mode 100644
index 0000000..ffed484
--- /dev/null
+++ b/inverse_covariance/tests/clean_test.py
@@ -0,0 +1,36 @@
+import numpy as np
+from scipy import sparse
+import pytest
+
+from sklearn.utils.testing import assert_raises
+from sklearn.utils.testing import assert_allclose
+from sklearn import datasets
+
+from clean import (
+   twoway_standardize
+)
+
+def custom_init(n_rows, n_cols, with_mean=False):
+    prng = np.random.RandomState(1)
+    X = prng.normal(0, np.ones(shape=(n_rows,n_cols)))
+    if with_mean:
+        mu = np.ones(shape=(n_rows, 1)) * \
+                prng.randint(1, 5, size=(1, n_cols))
+    else:
+        mu = np.zeros(shape=(n_rows,n_cols))
+    var_rows = prng.lognormal(2, 1, size=(n_rows, 1))
+    sqcov_rows = np.diag(np.sqrt(var_rows))
+    var_cols = prng.lognormal(2, 1, size=(1, n_cols))
+    sqcov_cols = np.diag(np.sqrt(var_cols))
+    return mu + sqcov_rows * X * sqcov_cols
+
+def test_invalid_argument():
+    '''
+    Test behavior of invalid sparse inputs.
+    '''
+    X = np.zeros(shape=(10,10))
+    X_csc = sparse.csc_matrix(X)
+    assert_raises(TypeError, twoway_standardize(X_csc))
+    
+    X_csr = sparse.csr_matrix(X)
+    assert_raises(TypeError, twoway_standardize(X_csr))

From 2f74e1bd4a4e83f884bab7366828d1fd526404c6 Mon Sep 17 00:00:00 2001
From: "mnarayan (SMC 2)" <mnarayan@users.noreply.github.com>
Date: Mon, 9 Oct 2017 03:22:56 +0000
Subject: [PATCH 02/31] Added basic twoway standardization algorithm. Relevant
 to issue #93

---
 inverse_covariance/clean.py | 258 ++++++++++++++++++++++++++++++++++++
 1 file changed, 258 insertions(+)
 create mode 100644 inverse_covariance/clean.py

diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py
new file mode 100644
index 0000000..2cc01f4
--- /dev/null
+++ b/inverse_covariance/clean.py
@@ -0,0 +1,258 @@
+import numpy as np
+from scipy import sparse
+from scipy import stats
+
+from sklearn.preprocessing.data import scale
+from sklearn.base import BaseEstimator, TransformerMixin
+
+from sklearn.utils import check_array
+from sklearn.utils.extmath import row_norms
+from sklearn.utils.extmath import _incremental_mean_and_var
+from sklearn.utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1,
+                                      inplace_csr_row_normalize_l2)
+from sklearn.utils.sparsefuncs import (inplace_column_scale,
+                                 mean_variance_axis, incr_mean_variance_axis,
+                                 min_max_axis)
+from sklearn.utils.validation import (check_is_fitted, check_random_state,
+                                FLOAT_DTYPES)
+
+
+def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, max_iter=10):
+    """Standardize a two-dimensional data matrix along both axes.
+    Center to the mean and component wise scale to unit variance.
+    Read more in the :ref:`User Guide <preprocessing_scaler>`.
+    Parameters
+    ----------
+    X : {array-like, sparse matrix}
+        The data to center and scale.
+    axis : int (0 by default)
+        axis used to compute the means and standard deviations along. If 0,
+        independently standardize each feature, otherwise (if 1) standardize
+        each sample.
+    with_mean : boolean, True by default
+        Is always true for two-way standardize
+    with_std : boolean, True by default
+        If True, scale the data to unit variance (or equivalently,
+        unit standard deviation).
+    copy : boolean, optional, default True
+        set to False to perform inplace row normalization and avoid a
+        copy (if the input is already a numpy array or a scipy.sparse
+        CSC matrix and if axis is 1).
+    Notes
+    -----
+    This function invokes sklearn's scale function. Thus, the same restrictions
+    for scale, apply here as well.
+    This implementation will refuse to center scipy.sparse matrices
+    since it would make them non-sparse and would potentially crash the
+    program with memory exhaustion problems.
+    Instead the caller is expected to either set explicitly
+    `with_mean=False` (in that case, only variance scaling will be
+    performed on the features of the CSC matrix) or to call `X.toarray()`
+    if he/she expects the materialized dense array to fit in memory.
+    To avoid memory copy the caller should pass a CSC matrix.
+    For a comparison of the different scalers, transformers, and normalizers,
+    see sklearn documentation `examples/preprocessing/plot_all_scaling.py
+    See also
+    --------
+    StandardScaler: Performs scaling to unit variance using the``Transformer`` API
+        (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`).
+    """  # noqa
+
+    X = check_array(X, accept_sparse=None, warn_on_dtype=True,
+                        dtype=FLOAT_DTYPES
+                       )
+    Xrow_polish = np.copy(X)
+    Xcol_polish = np.copy(X)
+    
+    if sparse.issparse(X):
+        print('Input is sparse')
+        raise NotImplemented(
+                "Algorithm for sparse matrices currently not supported.")
+        
+    else:
+        n_iter = 0
+        while n_iter <= max_iter:
+            Xcol_polish = scale(Xrow_polish, axis=0,
+                                    with_mean=True,
+                                    with_std=with_std
+                                   )
+            Xrow_polish = scale(Xcol_polish, axis=1,
+                                    with_mean=True,
+                                    with_std=with_std
+                                   )
+            n_iter += 1
+            X = Xrow_polisy
+            
+    return X
+
+
+class TwoWayStandardScaler(BaseEstimator, TransformerMixin):
+    """Standardize features by removing the mean and scaling to unit variance 
+    in both row and column dimensions.
+    This is modeled after StandardScaler in scikit-learn.
+    Read more in the :ref:`User Guide <preprocessing_scaler>`.
+    Parameters
+    ----------
+    copy : boolean, optional, default True
+        If False, try to avoid a copy and do inplace scaling instead.
+        This is not guaranteed to always work inplace; e.g. if the data is
+        not a NumPy array or scipy.sparse CSR matrix, a copy may still be
+        returned.
+    with_mean : boolean, True by default
+        If True, center the data before scaling.
+        This does not work (and will raise an exception) when attempted on
+        sparse matrices, because centering them entails building a dense
+        matrix which in common use cases is likely to be too large to fit in
+        memory.
+    with_std : boolean, True by default
+        If True, scale the data to unit variance (or equivalently,
+        unit standard deviation).
+    Attributes
+    ----------
+    scale_ : ndarray, shape (n_features,)
+        Per feature relative scaling of the data.
+        .. versionadded:: 0.17
+           *scale_*
+    mean_ : array of floats with shape [n_features]
+        The mean value for each feature in the training set.
+    var_ : array of floats with shape [n_features]
+        The variance for each feature in the training set. Used to compute
+        `scale_`
+    n_samples_seen_ : int
+        The number of samples processed by the estimator. Will be reset on
+        new calls to fit, but increments across ``partial_fit`` calls.
+    Examples
+    --------
+    >>> from sklearn.preprocessing import StandardScaler
+    >>>
+    >>> data = [[0, 0], [0, 0], [1, 1], [1, 1]]
+    >>> scaler = StandardScaler()
+    >>> print(scaler.fit(data))
+    StandardScaler(copy=True, with_mean=True, with_std=True)
+    >>> print(scaler.mean_)
+    [ 0.5  0.5]
+    >>> print(scaler.transform(data))
+    [[-1. -1.]
+     [-1. -1.]
+     [ 1.  1.]
+     [ 1.  1.]]
+    >>> print(scaler.transform([[2, 2]]))
+    [[ 3.  3.]]
+    See also
+    --------
+    scale: Equivalent function without the estimator API.
+    :class:`sklearn.preprocessing.StandardScaler`
+    :class:`sklearn.decomposition.PCA`
+        Further removes the linear correlation across features with 'whiten=True'.
+    Notes
+    -----
+    See the implications of one-way vs. two-way standardization in here. TBD
+    
+    """  # noqa
+
+    def __init__(self, copy=True, with_mean=True, with_std=True):
+        self.with_mean = with_mean
+        self.with_std = with_std
+        self.copy = copy
+
+    def _reset(self):
+        """Reset internal data-dependent state of the scaler, if necessary.
+        __init__ parameters are not touched.
+        """
+
+        # Checking one attribute is enough, becase they are all set together
+        # in partial_fit
+        if hasattr(self, 'scale_'):
+            del self.scale_
+            del self.n_samples_seen_
+            del self.mean_
+            del self.var_
+
+    def fit(self, X, y=None):
+        """Compute the mean and std to be used for later scaling.
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape [n_samples, n_features]
+            The data used to compute the mean and standard deviation
+            used for later scaling along the features axis.
+        y : Passthrough for ``Pipeline`` compatibility.
+        """
+
+        # Reset internal state before fitting
+        self._reset()
+        return self.partial_fit(X, y)
+
+    def transform(self, X, y='deprecated', copy=None):
+        """Perform standardization by centering and scaling
+        Parameters
+        ----------
+        X : array-like, shape [n_samples, n_features]
+            The data used to scale along the features axis.
+        y : (ignored)
+            .. deprecated:: 0.19
+               This parameter will be removed in 0.21.
+        copy : bool, optional (default: None)
+            Copy the input X or not.
+        """
+        if not isinstance(y, string_types) or y != 'deprecated':
+            warnings.warn("The parameter y on transform() is "
+                          "deprecated since 0.19 and will be removed in 0.21",
+                          DeprecationWarning)
+
+        check_is_fitted(self, 'scale_')
+
+        copy = copy if copy is not None else self.copy
+        X = check_array(X, accept_sparse='csr', copy=copy, warn_on_dtype=True,
+                        estimator=self, dtype=FLOAT_DTYPES)
+
+        if sparse.issparse(X):
+            if self.with_mean:
+                raise ValueError(
+                    "Cannot center sparse matrices: pass `with_mean=False` "
+                    "instead. See docstring for motivation and alternatives.")
+            if self.scale_ is not None:
+                inplace_column_scale(X, 1 / self.scale_)
+        else:
+            if self.with_mean:
+                X -= self.mean_
+            if self.with_std:
+                X /= self.scale_
+        return X
+
+    def inverse_transform(self, X, copy=None):
+        """Scale back the data to the original representation
+        Parameters
+        ----------
+        X : array-like, shape [n_samples, n_features]
+            The data used to scale along the features axis.
+        copy : bool, optional (default: None)
+            Copy the input X or not.
+        Returns
+        -------
+        X_tr : array-like, shape [n_samples, n_features]
+            Transformed array.
+        """
+        check_is_fitted(self, 'scale_')
+
+        copy = copy if copy is not None else self.copy
+        if sparse.issparse(X):
+            if self.with_mean:
+                raise ValueError(
+                    "Cannot uncenter sparse matrices: pass `with_mean=False` "
+                    "instead See docstring for motivation and alternatives.")
+            if not sparse.isspmatrix_csr(X):
+                X = X.tocsr()
+                copy = False
+            if copy:
+                X = X.copy()
+            if self.scale_ is not None:
+                inplace_column_scale(X, self.scale_)
+        else:
+            X = np.asarray(X)
+            if copy:
+                X = X.copy()
+            if self.with_std:
+                X *= self.scale_
+            if self.with_mean:
+                X += self.mean_
+        return X
\ No newline at end of file

From f2a1b2059d4cb4dd2069d7a754dbf4be785515fe Mon Sep 17 00:00:00 2001
From: "mnarayan (SMC 2)" <mnarayan@users.noreply.github.com>
Date: Mon, 9 Oct 2017 05:30:40 +0000
Subject: [PATCH 03/31] Cleaned up TwoWayStandardScaler API. partial_fit not
 supported

---
 inverse_covariance/clean.py | 45 +++++++++++++------------------------
 1 file changed, 16 insertions(+), 29 deletions(-)

diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py
index 2cc01f4..ac27cfd 100644
--- a/inverse_covariance/clean.py
+++ b/inverse_covariance/clean.py
@@ -88,8 +88,8 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, max_
 
 class TwoWayStandardScaler(BaseEstimator, TransformerMixin):
     """Standardize features by removing the mean and scaling to unit variance 
-    in both row and column dimensions.
-    This is modeled after StandardScaler in scikit-learn.
+    in both row and column dimensions. 
+    This class is modeled after StandardScaler in scikit-learn.
     Read more in the :ref:`User Guide <preprocessing_scaler>`.
     Parameters
     ----------
@@ -123,24 +123,22 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin):
         new calls to fit, but increments across ``partial_fit`` calls.
     Examples
     --------
-    >>> from sklearn.preprocessing import StandardScaler
+    >>> from inverse_covariance.clean import TwoWayStandardScaler
     >>>
-    >>> data = [[0, 0], [0, 0], [1, 1], [1, 1]]
+    >>> data = [[1, 0], [1, 0], [2, 1], [2, 1]]
     >>> scaler = StandardScaler()
     >>> print(scaler.fit(data))
     StandardScaler(copy=True, with_mean=True, with_std=True)
     >>> print(scaler.mean_)
-    [ 0.5  0.5]
+    [ 3.0  0.5]
     >>> print(scaler.transform(data))
     [[-1. -1.]
      [-1. -1.]
      [ 1.  1.]
      [ 1.  1.]]
-    >>> print(scaler.transform([[2, 2]]))
-    [[ 3.  3.]]
     See also
     --------
-    scale: Equivalent function without the estimator API.
+    twoway_standardize: Equivalent function without the estimator API.
     :class:`sklearn.preprocessing.StandardScaler`
     :class:`sklearn.decomposition.PCA`
         Further removes the linear correlation across features with 'whiten=True'.
@@ -151,42 +149,31 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin):
     """  # noqa
 
     def __init__(self, copy=True, with_mean=True, with_std=True):
-        self.with_mean = with_mean
+        """Unlike StandardScaler, with_mean is always set to True, to ensure
+        that two-way standardization is always performed with centering. The 
+        argument `with_mean` is retained for the sake of model API compatibility.
+        """
+        self.with_mean = True
         self.with_std = with_std
         self.copy = copy
 
-    def _reset(self):
-        """Reset internal data-dependent state of the scaler, if necessary.
-        __init__ parameters are not touched.
-        """
-
-        # Checking one attribute is enough, becase they are all set together
-        # in partial_fit
-        if hasattr(self, 'scale_'):
-            del self.scale_
-            del self.n_samples_seen_
-            del self.mean_
-            del self.var_
-
     def fit(self, X, y=None):
-        """Compute the mean and std to be used for later scaling.
+        """Compute the mean and std for both row and column dimensions.
         Parameters
         ----------
-        X : {array-like, sparse matrix}, shape [n_samples, n_features]
+        X : {array-like}, shape [n_rows, n_cols]
             The data used to compute the mean and standard deviation
-            used for later scaling along the features axis.
-        y : Passthrough for ``Pipeline`` compatibility.
+            along both row and column axes
+        y : Passthrough for ``Pipeline`` compatibility. Input is ignored.
         """
 
-        # Reset internal state before fitting
-        self._reset()
         return self.partial_fit(X, y)
 
     def transform(self, X, y='deprecated', copy=None):
         """Perform standardization by centering and scaling
         Parameters
         ----------
-        X : array-like, shape [n_samples, n_features]
+        X : array-like, shape [n_rows, n_cols]
             The data used to scale along the features axis.
         y : (ignored)
             .. deprecated:: 0.19

From a30bc8e623c7bf3a9f9ad0b112fca516774b26b5 Mon Sep 17 00:00:00 2001
From: "mnarayan (SMC 2)" <mnarayan@users.noreply.github.com>
Date: Mon, 9 Oct 2017 05:43:21 +0000
Subject: [PATCH 04/31] Reset internal row,col attributes

---
 inverse_covariance/clean.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py
index ac27cfd..c364102 100644
--- a/inverse_covariance/clean.py
+++ b/inverse_covariance/clean.py
@@ -157,6 +157,21 @@ def __init__(self, copy=True, with_mean=True, with_std=True):
         self.with_std = with_std
         self.copy = copy
 
+    def _reset(self):
+        """Reset internal data-dependent state of the scaler, if necessary.
+        __init__ parameters are not touched.
+        """
+
+        # Checking one attribute is enough, becase they are all set together
+        # in partial_fit
+        if hasattr(self, 'col_scale_'):
+            del self.row_scale_
+            del self.row_mean_
+            del self.row_var_
+            del self.col_scale_
+            del self.col_mean_
+            del self.col_var_
+
     def fit(self, X, y=None):
         """Compute the mean and std for both row and column dimensions.
         Parameters

From ed6fd34066b5128abde9149b222111e5cb27c662 Mon Sep 17 00:00:00 2001
From: "mnarayan (SMC 2)" <mnarayan@users.noreply.github.com>
Date: Mon, 9 Oct 2017 17:46:05 +0000
Subject: [PATCH 05/31] Added basic structure for partial_fit

---
 inverse_covariance/clean.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py
index c364102..4210547 100644
--- a/inverse_covariance/clean.py
+++ b/inverse_covariance/clean.py
@@ -184,6 +184,42 @@ def fit(self, X, y=None):
 
         return self.partial_fit(X, y)
 
+    def partial_fit(self, X, y=None):
+        """Compute the mean and std for both row and column dimensions. 
+        Equivalent to fit. Online algorithm not supported at this time.
+        Parameters
+        ----------
+        X : {array-like}, shape [n_rows, n_cols]
+            The data used to compute the mean and standard deviation
+            used for later scaling along the features axis.
+        y : Passthrough for ``Pipeline`` compatibility.
+        """
+        X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
+                        warn_on_dtype=True, estimator=self, dtype=FLOAT_DTYPES)
+
+        if sparse.issparse(X):
+
+        else:
+            # First pass
+            if not hasattr(self, 'n_samples_seen_'):
+                self.mean_ = .0
+                self.n_samples_seen_ = 0
+                if self.with_std:
+                    self.var_ = .0
+                else:
+                    self.var_ = None
+
+            self.mean_, self.var_, self.n_samples_seen_ = \
+                            _incremental_mean_and_var(X, self.mean_, self.var_,
+                                                      self.n_samples_seen_)
+
+        if self.with_std:
+            self.scale_ = _handle_zeros_in_scale(np.sqrt(self.var_))
+        else:
+            self.scale_ = None
+
+        return self
+
     def transform(self, X, y='deprecated', copy=None):
         """Perform standardization by centering and scaling
         Parameters

From 47c87cb05f393f2aa43c9719350bb44fb62f45f2 Mon Sep 17 00:00:00 2001
From: "mnarayan (SMC 2)" <mnarayan@users.noreply.github.com>
Date: Wed, 11 Oct 2017 18:41:04 +0000
Subject: [PATCH 06/31] partial_fit now calculates row, col statistics

---
 inverse_covariance/clean.py | 42 +++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py
index 4210547..0855ac6 100644
--- a/inverse_covariance/clean.py
+++ b/inverse_covariance/clean.py
@@ -171,6 +171,8 @@ def _reset(self):
             del self.col_scale_
             del self.col_mean_
             del self.col_var_
+            del self.n_rows_seen_
+            del self.n_cols_seen_
 
     def fit(self, X, y=None):
         """Compute the mean and std for both row and column dimensions.
@@ -194,29 +196,43 @@ def partial_fit(self, X, y=None):
             used for later scaling along the features axis.
         y : Passthrough for ``Pipeline`` compatibility.
         """
-        X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
-                        warn_on_dtype=True, estimator=self, dtype=FLOAT_DTYPES)
+        X = check_array(X, accept_sparse=None, copy=self.copy, 
+                        warn_on_dtype=True, dtype=FLOAT_DTYPES)
 
         if sparse.issparse(X):
-
+            print('Input is sparse')
+            raise NotImplemented(
+                "Algorithm for sparse matrices currently not supported.")
         else:
             # First pass
-            if not hasattr(self, 'n_samples_seen_'):
-                self.mean_ = .0
-                self.n_samples_seen_ = 0
+            if not hasattr(self, 'n_rows_seen_'):
+                self.col_mean_ = .0
+                self.n_rows_seen_ = 0
                 if self.with_std:
-                    self.var_ = .0
+                    self.col_var_ = .0
                 else:
-                    self.var_ = None
+                    self.col_var_ = None
 
-            self.mean_, self.var_, self.n_samples_seen_ = \
-                            _incremental_mean_and_var(X, self.mean_, self.var_,
-                                                      self.n_samples_seen_)
+            self.col_mean_, self.col_var_, self.n_rows_seen_ = \
+                            _incremental_mean_and_var(X, self.col_mean_, self.col_var_,
+                                                      self.n_rows_seen_)
+            if not hasattr(self, 'n_cols_seen_'):
+                self.row_mean_ = .0
+                self.n_cols_seen_ = 0
+                if self.with_std:
+                    self.row_var_ = .0
+                else:
+                    self.row_var_ = None
+            self.row_mean_, self.row_var_, self.n_cols_seen_ = \
+                                        _incremental_mean_and_var(X, self.row_mean_, self.row_var_,
+                                                                  self.n_cols_seen_)
 
         if self.with_std:
-            self.scale_ = _handle_zeros_in_scale(np.sqrt(self.var_))
+            self.row_scale_ = _handle_zeros_in_scale(np.sqrt(self.row_var_))
+            self.col_scale_ = _handle_zeros_in_scale(np.sqrt(self.col_var_))
         else:
-            self.scale_ = None
+            self.row_scale_ = None
+            self.col_scale_ = None
 
         return self
 

From 2dcde0adf471d81219087f424bf04eab3cab1b6b Mon Sep 17 00:00:00 2001
From: "mnarayan (SMC 2)" <mnarayan@users.noreply.github.com>
Date: Wed, 11 Oct 2017 19:19:45 +0000
Subject: [PATCH 07/31] Added convergence checks. Algorithm completed

---
 inverse_covariance/clean.py | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py
index 0855ac6..8127d29 100644
--- a/inverse_covariance/clean.py
+++ b/inverse_covariance/clean.py
@@ -17,7 +17,8 @@
                                 FLOAT_DTYPES)
 
 
-def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, max_iter=10):
+def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, 
+                       max_iter=50, tol=1e-6, verbose=False):
     """Standardize a two-dimensional data matrix along both axes.
     Center to the mean and component wise scale to unit variance.
     Read more in the :ref:`User Guide <preprocessing_scaler>`.
@@ -38,6 +39,10 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, max_
         set to False to perform inplace row normalization and avoid a
         copy (if the input is already a numpy array or a scipy.sparse
         CSC matrix and if axis is 1).
+    max_iter : int, optional (50 by default)
+        Set the maximum number of iterations of successive normalization algorithm
+    tol : float, optional (1e-6 by default)
+        Set the convergence threshold for successive normalization
     Notes
     -----
     This function invokes sklearn's scale function. Thus, the same restrictions
@@ -58,11 +63,11 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, max_
         (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`).
     """  # noqa
 
-    X = check_array(X, accept_sparse=None, warn_on_dtype=True,
-                        dtype=FLOAT_DTYPES
-                       )
-    Xrow_polish = np.copy(X)
+    X = check_array(X, accept_sparse=None, copy=copy, warn_on_dtype=True,
+                        dtype=FLOAT_DTYPES)
+    Xrow_polish = np.copy(X.T)
     Xcol_polish = np.copy(X)
+    [n_rows,n_cols] = np.shape(X)
     
     if sparse.issparse(X):
         print('Input is sparse')
@@ -71,18 +76,28 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, max_
         
     else:
         n_iter = 0
-        while n_iter <= max_iter:
-            Xcol_polish = scale(Xrow_polish, axis=0,
+        err_norm = np.inf
+        oldXrow = np.copy(Xrow_polish)
+        oldXcol = np.copy(Xcol_polish)
+        while n_iter <= max_iter and err_norm > tol :
+            Xcol_polish = scale(Xrow_polish.T, axis=1,
                                     with_mean=True,
                                     with_std=with_std
                                    )
-            Xrow_polish = scale(Xcol_polish, axis=1,
+            Xrow_polish = scale(Xcol_polish.T, axis=1,
                                     with_mean=True,
                                     with_std=with_std
                                    )
             n_iter += 1
-            X = Xrow_polisy
-            
+            err_norm_row = np.linalg.norm(oldXrow-Xrow_polish,'fro')
+            err_norm_col = np.linalg.norm(oldXcol-Xcol_polish,'fro')
+            err_norm = .5 * err_norm_row/(n_rows*n_cols) + .5 * err_norm_col/(n_rows*n_cols)
+            if verbose:
+                print('Iteration: {}, Convergence Err: {}'.format(n_iter,err_norm))
+            oldXrow = np.copy(Xrow_polish)
+            oldXcol = np.copy(Xcol_polish)
+
+        X = Xrow_polish
     return X
 
 

From dd92d808c2b321e454453979ff6ae464de407f99 Mon Sep 17 00:00:00 2001
From: "mnarayan (SMC 2)" <mnarayan@users.noreply.github.com>
Date: Wed, 11 Oct 2017 19:20:27 +0000
Subject: [PATCH 08/31] Transform now calls twoway_standardize

---
 inverse_covariance/clean.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py
index 8127d29..784907d 100644
--- a/inverse_covariance/clean.py
+++ b/inverse_covariance/clean.py
@@ -268,24 +268,18 @@ def transform(self, X, y='deprecated', copy=None):
                           "deprecated since 0.19 and will be removed in 0.21",
                           DeprecationWarning)
 
-        check_is_fitted(self, 'scale_')
+        check_is_fitted(self, 'row_scale_')
 
         copy = copy if copy is not None else self.copy
-        X = check_array(X, accept_sparse='csr', copy=copy, warn_on_dtype=True,
+        X = check_array(X, accept_sparse=None, copy=copy, warn_on_dtype=True,
                         estimator=self, dtype=FLOAT_DTYPES)
 
         if sparse.issparse(X):
-            if self.with_mean:
-                raise ValueError(
-                    "Cannot center sparse matrices: pass `with_mean=False` "
-                    "instead. See docstring for motivation and alternatives.")
-            if self.scale_ is not None:
-                inplace_column_scale(X, 1 / self.scale_)
+            print('Input is sparse')
+            raise NotImplemented(
+                "Algorithm for sparse matrices currently not supported.")
         else:
-            if self.with_mean:
-                X -= self.mean_
-            if self.with_std:
-                X /= self.scale_
+            X = twoway_standardize(X)
         return X
 
     def inverse_transform(self, X, copy=None):

From a754f44687b22a7b8611304818e8e3f953950cbc Mon Sep 17 00:00:00 2001
From: "mnarayan (SMC 2)" <mnarayan@users.noreply.github.com>
Date: Wed, 11 Oct 2017 21:50:32 +0000
Subject: [PATCH 09/31] Updated algorithm. Test passes

---
 inverse_covariance/tests/clean_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/inverse_covariance/tests/clean_test.py b/inverse_covariance/tests/clean_test.py
index ffed484..eb22291 100644
--- a/inverse_covariance/tests/clean_test.py
+++ b/inverse_covariance/tests/clean_test.py
@@ -30,7 +30,7 @@ def test_invalid_argument():
     '''
     X = np.zeros(shape=(10,10))
     X_csc = sparse.csc_matrix(X)
-    assert_raises(TypeError, twoway_standardize(X_csc))
+    assert_raises(TypeError, twoway_standardize, X_csc)
     
     X_csr = sparse.csr_matrix(X)
-    assert_raises(TypeError, twoway_standardize(X_csr))
+    assert_raises(TypeError, twoway_standardize, X_csr)

From 4a3c038c33ca262b2a44cb2c98d4804f72b239ce Mon Sep 17 00:00:00 2001
From: "mnarayan (SMC 2)" <mnarayan@users.noreply.github.com>
Date: Wed, 11 Oct 2017 21:52:23 +0000
Subject: [PATCH 10/31] Fixed bug in transform()

---
 inverse_covariance/clean.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py
index 784907d..fde96a9 100644
--- a/inverse_covariance/clean.py
+++ b/inverse_covariance/clean.py
@@ -2,10 +2,15 @@
 from scipy import sparse
 from scipy import stats
 
-from sklearn.preprocessing.data import scale
+from sklearn.preprocessing.data import (
+    scale,
+    _handle_zeros_in_scale
+    )
 from sklearn.base import BaseEstimator, TransformerMixin
 
 from sklearn.utils import check_array
+from sklearn.externals import six
+from sklearn.externals.six import string_types
 from sklearn.utils.extmath import row_norms
 from sklearn.utils.extmath import _incremental_mean_and_var
 from sklearn.utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1,
@@ -68,12 +73,12 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True,
     Xrow_polish = np.copy(X.T)
     Xcol_polish = np.copy(X)
     [n_rows,n_cols] = np.shape(X)
-    
+
     if sparse.issparse(X):
         print('Input is sparse')
         raise NotImplemented(
                 "Algorithm for sparse matrices currently not supported.")
-        
+
     else:
         n_iter = 0
         err_norm = np.inf
@@ -102,7 +107,7 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True,
 
 
 class TwoWayStandardScaler(BaseEstimator, TransformerMixin):
-    """Standardize features by removing the mean and scaling to unit variance 
+    """Standardize features by removing the mean and scaling to unit variance
     in both row and column dimensions. 
     This class is modeled after StandardScaler in scikit-learn.
     Read more in the :ref:`User Guide <preprocessing_scaler>`.
@@ -165,7 +170,7 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin):
 
     def __init__(self, copy=True, with_mean=True, with_std=True):
         """Unlike StandardScaler, with_mean is always set to True, to ensure
-        that two-way standardization is always performed with centering. The 
+        that two-way standardization is always performed with centering. The
         argument `with_mean` is retained for the sake of model API compatibility.
         """
         self.with_mean = True
@@ -202,7 +207,7 @@ def fit(self, X, y=None):
         return self.partial_fit(X, y)
 
     def partial_fit(self, X, y=None):
-        """Compute the mean and std for both row and column dimensions. 
+        """Compute the mean and std for both row and column dimensions.
         Equivalent to fit. Online algorithm not supported at this time.
         Parameters
         ----------
@@ -211,7 +216,7 @@ def partial_fit(self, X, y=None):
             used for later scaling along the features axis.
         y : Passthrough for ``Pipeline`` compatibility.
         """
-        X = check_array(X, accept_sparse=None, copy=self.copy, 
+        X = check_array(X, accept_sparse=None, copy=self.copy,
                         warn_on_dtype=True, dtype=FLOAT_DTYPES)
 
         if sparse.issparse(X):
@@ -239,7 +244,7 @@ def partial_fit(self, X, y=None):
                 else:
                     self.row_var_ = None
             self.row_mean_, self.row_var_, self.n_cols_seen_ = \
-                                        _incremental_mean_and_var(X, self.row_mean_, self.row_var_,
+                                        _incremental_mean_and_var(X.T, self.row_mean_, self.row_var_,
                                                                   self.n_cols_seen_)
 
         if self.with_std:

From cc1d8d3390a220801c4d1417f1d55fc7e31a1198 Mon Sep 17 00:00:00 2001
From: "mnarayan (SMC 2)" <mnarayan@users.noreply.github.com>
Date: Wed, 11 Oct 2017 22:01:09 +0000
Subject: [PATCH 11/31] Return original dimensions

---
 inverse_covariance/clean.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py
index fde96a9..d94c025 100644
--- a/inverse_covariance/clean.py
+++ b/inverse_covariance/clean.py
@@ -102,7 +102,7 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True,
             oldXrow = np.copy(Xrow_polish)
             oldXcol = np.copy(Xcol_polish)
 
-        X = Xrow_polish
+        X = Xrow_polish.T
     return X
 
 

From 16b11164ad0bc8c5b5f1d7d32ae7221cb9ea9582 Mon Sep 17 00:00:00 2001
From: "mnarayan (SMC 2)" <mnarayan@users.noreply.github.com>
Date: Wed, 11 Oct 2017 22:21:45 +0000
Subject: [PATCH 12/31] inverse_transform completed, raises not implemented
 error

---
 inverse_covariance/clean.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py
index d94c025..766b655 100644
--- a/inverse_covariance/clean.py
+++ b/inverse_covariance/clean.py
@@ -300,27 +300,27 @@ def inverse_transform(self, X, copy=None):
         X_tr : array-like, shape [n_samples, n_features]
             Transformed array.
         """
-        check_is_fitted(self, 'scale_')
+        check_is_fitted(self, 'row_scale_')
 
         copy = copy if copy is not None else self.copy
         if sparse.issparse(X):
-            if self.with_mean:
-                raise ValueError(
-                    "Cannot uncenter sparse matrices: pass `with_mean=False` "
-                    "instead See docstring for motivation and alternatives.")
-            if not sparse.isspmatrix_csr(X):
-                X = X.tocsr()
-                copy = False
-            if copy:
-                X = X.copy()
-            if self.scale_ is not None:
-                inplace_column_scale(X, self.scale_)
+            print('Input is sparse')
+            raise NotImplementedError(
+                'Algorithm for sparse matrices currently not supported.')
         else:
+            raise NotImplementedError(
+                'Two Way standardization cannot currently be reversed with accuracy')
             X = np.asarray(X)
             if copy:
                 X = X.copy()
+            X = X.T
+            if self.with_std:
+                X *= self.row_scale_
+            if self.with_mean:
+                X += self.row_mean_
+            X = X.T
             if self.with_std:
-                X *= self.scale_
+                X *= self.col_scale_
             if self.with_mean:
-                X += self.mean_
+                X += self.col_mean_
         return X
\ No newline at end of file

From a58383a677647e3a645c2ff5d92bec33c9bb47bd Mon Sep 17 00:00:00 2001
From: "mnarayan (SMC 2)" <mnarayan@users.noreply.github.com>
Date: Wed, 11 Oct 2017 22:43:21 +0000
Subject: [PATCH 13/31] Delinting

---
 inverse_covariance/clean.py | 63 ++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 33 deletions(-)

diff --git a/inverse_covariance/clean.py b/inverse_covariance/clean.py
index 766b655..014b105 100644
--- a/inverse_covariance/clean.py
+++ b/inverse_covariance/clean.py
@@ -1,6 +1,5 @@
 import numpy as np
 from scipy import sparse
-from scipy import stats
 
 from sklearn.preprocessing.data import (
     scale,
@@ -9,20 +8,15 @@
 from sklearn.base import BaseEstimator, TransformerMixin
 
 from sklearn.utils import check_array
-from sklearn.externals import six
 from sklearn.externals.six import string_types
-from sklearn.utils.extmath import row_norms
 from sklearn.utils.extmath import _incremental_mean_and_var
-from sklearn.utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1,
-                                      inplace_csr_row_normalize_l2)
-from sklearn.utils.sparsefuncs import (inplace_column_scale,
-                                 mean_variance_axis, incr_mean_variance_axis,
-                                 min_max_axis)
-from sklearn.utils.validation import (check_is_fitted, check_random_state,
-                                FLOAT_DTYPES)
+from sklearn.utils.validation import (
+    check_is_fitted,
+    FLOAT_DTYPES
+    )
 
 
-def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True, 
+def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True,
                        max_iter=50, tol=1e-6, verbose=False):
     """Standardize a two-dimensional data matrix along both axes.
     Center to the mean and component wise scale to unit variance.
@@ -68,11 +62,11 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True,
         (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`).
     """  # noqa
 
-    X = check_array(X, accept_sparse=None, copy=copy, warn_on_dtype=True,
-                        dtype=FLOAT_DTYPES)
+    X = check_array(X, accept_sparse=None, copy=copy,
+                    warn_on_dtype=True, dtype=FLOAT_DTYPES)
     Xrow_polish = np.copy(X.T)
     Xcol_polish = np.copy(X)
-    [n_rows,n_cols] = np.shape(X)
+    [n_rows, n_cols] = np.shape(X)
 
     if sparse.issparse(X):
         print('Input is sparse')
@@ -84,21 +78,19 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True,
         err_norm = np.inf
         oldXrow = np.copy(Xrow_polish)
         oldXcol = np.copy(Xcol_polish)
-        while n_iter <= max_iter and err_norm > tol :
+        while n_iter <= max_iter and err_norm > tol:
             Xcol_polish = scale(Xrow_polish.T, axis=1,
-                                    with_mean=True,
-                                    with_std=with_std
-                                   )
+                                with_mean=True, with_std=with_std)
             Xrow_polish = scale(Xcol_polish.T, axis=1,
-                                    with_mean=True,
-                                    with_std=with_std
-                                   )
+                                with_mean=True, with_std=with_std)
             n_iter += 1
-            err_norm_row = np.linalg.norm(oldXrow-Xrow_polish,'fro')
-            err_norm_col = np.linalg.norm(oldXcol-Xcol_polish,'fro')
-            err_norm = .5 * err_norm_row/(n_rows*n_cols) + .5 * err_norm_col/(n_rows*n_cols)
+            err_norm_row = np.linalg.norm(oldXrow-Xrow_polish, 'fro')
+            err_norm_col = np.linalg.norm(oldXcol-Xcol_polish, 'fro')
+            err_norm = .5 * err_norm_row/(n_rows*n_cols) + \
+                .5 * err_norm_col/(n_rows*n_cols)
             if verbose:
-                print('Iteration: {}, Convergence Err: {}'.format(n_iter,err_norm))
+                print('Iteration: {}, Convergence Err: {}'.format(
+                        n_iter, err_norm))
             oldXrow = np.copy(Xrow_polish)
             oldXcol = np.copy(Xcol_polish)
 
@@ -171,7 +163,8 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin):
     def __init__(self, copy=True, with_mean=True, with_std=True):
         """Unlike StandardScaler, with_mean is always set to True, to ensure
         that two-way standardization is always performed with centering. The
-        argument `with_mean` is retained for the sake of model API compatibility.
+        argument `with_mean` is retained for the sake of sklearn
+        API compatibility.
         """
         self.with_mean = True
         self.with_std = with_std
@@ -234,8 +227,10 @@ def partial_fit(self, X, y=None):
                     self.col_var_ = None
 
             self.col_mean_, self.col_var_, self.n_rows_seen_ = \
-                            _incremental_mean_and_var(X, self.col_mean_, self.col_var_,
-                                                      self.n_rows_seen_)
+                _incremental_mean_and_var(X, self.col_mean_,
+                                          self.col_var_,
+                                          self.n_rows_seen_
+                                          )
             if not hasattr(self, 'n_cols_seen_'):
                 self.row_mean_ = .0
                 self.n_cols_seen_ = 0
@@ -244,8 +239,10 @@ def partial_fit(self, X, y=None):
                 else:
                     self.row_var_ = None
             self.row_mean_, self.row_var_, self.n_cols_seen_ = \
-                                        _incremental_mean_and_var(X.T, self.row_mean_, self.row_var_,
-                                                                  self.n_cols_seen_)
+                _incremental_mean_and_var(X.T, self.row_mean_,
+                                          self.row_var_,
+                                          self.n_cols_seen_
+                                          )
 
         if self.with_std:
             self.row_scale_ = _handle_zeros_in_scale(np.sqrt(self.row_var_))
@@ -269,7 +266,7 @@ def transform(self, X, y='deprecated', copy=None):
             Copy the input X or not.
         """
         if not isinstance(y, string_types) or y != 'deprecated':
-            warnings.warn("The parameter y on transform() is "
+            warnings.warn("The parameter y on transform() is " # noqa
                           "deprecated since 0.19 and will be removed in 0.21",
                           DeprecationWarning)
 
@@ -309,7 +306,7 @@ def inverse_transform(self, X, copy=None):
                 'Algorithm for sparse matrices currently not supported.')
         else:
             raise NotImplementedError(
-                'Two Way standardization cannot currently be reversed with accuracy')
+                'Two Way standardization not reversible with accuracy')
             X = np.asarray(X)
             if copy:
                 X = X.copy()
@@ -323,4 +320,4 @@ def inverse_transform(self, X, copy=None):
                 X *= self.col_scale_
             if self.with_mean:
                 X += self.col_mean_
-        return X
\ No newline at end of file
+        return X

From 5904f60c6214b8f0aa7ed799f724cf1b11b3aa03 Mon Sep 17 00:00:00 2001
From: "mnarayan (SMC 2)" <mnarayan@users.noreply.github.com>
Date: Wed, 11 Oct 2017 22:46:01 +0000
Subject: [PATCH 14/31] More delinting

---
 inverse_covariance/tests/clean_test.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/inverse_covariance/tests/clean_test.py b/inverse_covariance/tests/clean_test.py
index eb22291..0a7e893 100644
--- a/inverse_covariance/tests/clean_test.py
+++ b/inverse_covariance/tests/clean_test.py
@@ -1,36 +1,35 @@
 import numpy as np
 from scipy import sparse
-import pytest
 
 from sklearn.utils.testing import assert_raises
-from sklearn.utils.testing import assert_allclose
-from sklearn import datasets
 
 from clean import (
    twoway_standardize
 )
 
+
 def custom_init(n_rows, n_cols, with_mean=False):
     prng = np.random.RandomState(1)
-    X = prng.normal(0, np.ones(shape=(n_rows,n_cols)))
+    X = prng.normal(0, np.ones(shape=(n_rows, n_cols)))
     if with_mean:
         mu = np.ones(shape=(n_rows, 1)) * \
                 prng.randint(1, 5, size=(1, n_cols))
     else:
-        mu = np.zeros(shape=(n_rows,n_cols))
+        mu = np.zeros(shape=(n_rows, n_cols))
     var_rows = prng.lognormal(2, 1, size=(n_rows, 1))
     sqcov_rows = np.diag(np.sqrt(var_rows))
     var_cols = prng.lognormal(2, 1, size=(1, n_cols))
     sqcov_cols = np.diag(np.sqrt(var_cols))
     return mu + sqcov_rows * X * sqcov_cols
 
+
 def test_invalid_argument():
     '''
-    Test behavior of invalid sparse inputs.
+    Test behavior of invalid sparse data matrix inputs.
     '''
-    X = np.zeros(shape=(10,10))
+    X = np.zeros(shape=(10, 10))
     X_csc = sparse.csc_matrix(X)
     assert_raises(TypeError, twoway_standardize, X_csc)
-    
+
     X_csr = sparse.csr_matrix(X)
     assert_raises(TypeError, twoway_standardize, X_csr)

From a17a5309b202129df46406514ed3e1a2bcc51ebd Mon Sep 17 00:00:00 2001
From: "mnarayan (SMC 2)" <mnarayan@users.noreply.github.com>
Date: Thu, 12 Oct 2017 17:57:25 +0000
Subject: [PATCH 15/31] Fixed import error

---
 inverse_covariance/tests/clean_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inverse_covariance/tests/clean_test.py b/inverse_covariance/tests/clean_test.py
index 0a7e893..409979d 100644
--- a/inverse_covariance/tests/clean_test.py
+++ b/inverse_covariance/tests/clean_test.py
@@ -3,7 +3,7 @@
 
 from sklearn.utils.testing import assert_raises
 
-from clean import (
+from inverse_covariance.clean import (
    twoway_standardize
 )
 

From e5395bdfd6e97a87474fae4068dfce7838ee3937 Mon Sep 17 00:00:00 2001
From: "mnarayan (SMC 2)" <mnarayan@users.noreply.github.com>
Date: Thu, 12 Oct 2017 17:57:47 +0000
Subject: [PATCH 16/31] Added clean.py

---
 inverse_covariance/__init__.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/inverse_covariance/__init__.py b/inverse_covariance/__init__.py
index c9d58a7..1398af5 100644
--- a/inverse_covariance/__init__.py
+++ b/inverse_covariance/__init__.py
@@ -8,6 +8,10 @@
     QuicGraphLassoCV,
     QuicGraphLassoEBIC,
 )
+from .clean import (
+    twoway_standardize,
+    TwoWayStandardScaler,
+)
 from .metrics import (
     log_likelihood,
     kl_loss,
@@ -22,12 +26,15 @@
 from .adaptive_graph_lasso import AdaptiveGraphLasso
 from .cross_validation import RepeatedKFold
 
+
 __all__ = [
     'InverseCovarianceEstimator',
     'quic',
     'QuicGraphLasso',
     'QuicGraphLassoCV',
     'QuicGraphLassoEBIC',
+    'twoway_standardize',
+    'TwoWayStandardScaler',
     'log_likelihood',
     'kl_loss',
     'quadratic_loss',

From a2940dff6bdf91515d85f01491b653fdaf030ce0 Mon Sep 17 00:00:00 2001
From: Jaska <jlaska@gmail.com>
Date: Sun, 9 Sep 2018 11:27:49 -0700
Subject: [PATCH 17/31] Rename files from `clean` to `two_way_standard_scaler`

---
 inverse_covariance/__init__.py                         |  6 +++---
 .../{clean_test.py => two_way_standard_scaler_test.py} |  8 ++++----
 .../{clean.py => two_way_standard_scaler.py}           | 10 +++++-----
 3 files changed, 12 insertions(+), 12 deletions(-)
 rename inverse_covariance/tests/{clean_test.py => two_way_standard_scaler_test.py} (81%)
 rename inverse_covariance/{clean.py => two_way_standard_scaler.py} (98%)

diff --git a/inverse_covariance/__init__.py b/inverse_covariance/__init__.py
index c20068d..6f5ce1d 100644
--- a/inverse_covariance/__init__.py
+++ b/inverse_covariance/__init__.py
@@ -9,8 +9,8 @@
     QuicGraphicalLassoCV,
     QuicGraphicalLassoEBIC,
 )
-from .clean import (
-    twoway_standardize,
+from .two_way_standard_scaler import (
+    two_way_standardize,
     TwoWayStandardScaler,
 )
 from .metrics import (
@@ -37,7 +37,7 @@
     "QuicGraphicalLasso",
     "QuicGraphicalLassoCV",
     "QuicGraphicalLassoEBIC",
-    'twoway_standardize',
+    'two_way_standardize',
     'TwoWayStandardScaler',
     "log_likelihood",
     "kl_loss",
diff --git a/inverse_covariance/tests/clean_test.py b/inverse_covariance/tests/two_way_standard_scaler_test.py
similarity index 81%
rename from inverse_covariance/tests/clean_test.py
rename to inverse_covariance/tests/two_way_standard_scaler_test.py
index 409979d..2ca23b4 100644
--- a/inverse_covariance/tests/clean_test.py
+++ b/inverse_covariance/tests/two_way_standard_scaler_test.py
@@ -3,8 +3,8 @@
 
 from sklearn.utils.testing import assert_raises
 
-from inverse_covariance.clean import (
-   twoway_standardize
+from inverse_covariance.two_way_standard_scaler import (
+   two_way_standardize
 )
 
 
@@ -29,7 +29,7 @@ def test_invalid_argument():
     '''
     X = np.zeros(shape=(10, 10))
     X_csc = sparse.csc_matrix(X)
-    assert_raises(TypeError, twoway_standardize, X_csc)
+    assert_raises(TypeError, two_way_standardize, X_csc)
 
     X_csr = sparse.csr_matrix(X)
-    assert_raises(TypeError, twoway_standardize, X_csr)
+    assert_raises(TypeError, two_way_standardize, X_csr)
diff --git a/inverse_covariance/clean.py b/inverse_covariance/two_way_standard_scaler.py
similarity index 98%
rename from inverse_covariance/clean.py
rename to inverse_covariance/two_way_standard_scaler.py
index 014b105..f00c88f 100644
--- a/inverse_covariance/clean.py
+++ b/inverse_covariance/two_way_standard_scaler.py
@@ -16,7 +16,7 @@
     )
 
 
-def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True,
+def two_way_standardize(X, axis=0, with_mean=True, with_std=True, copy=True,
                        max_iter=50, tol=1e-6, verbose=False):
     """Standardize a two-dimensional data matrix along both axes.
     Center to the mean and component wise scale to unit variance.
@@ -100,7 +100,7 @@ def twoway_standardize(X, axis=0, with_mean=True, with_std=True, copy=True,
 
 class TwoWayStandardScaler(BaseEstimator, TransformerMixin):
     """Standardize features by removing the mean and scaling to unit variance
-    in both row and column dimensions. 
+    in both row and column dimensions.
     This class is modeled after StandardScaler in scikit-learn.
     Read more in the :ref:`User Guide <preprocessing_scaler>`.
     Parameters
@@ -150,14 +150,14 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin):
      [ 1.  1.]]
     See also
     --------
-    twoway_standardize: Equivalent function without the estimator API.
+    two_way_standardize: Equivalent function without the estimator API.
     :class:`sklearn.preprocessing.StandardScaler`
     :class:`sklearn.decomposition.PCA`
         Further removes the linear correlation across features with 'whiten=True'.
     Notes
     -----
     See the implications of one-way vs. two-way standardization in here. TBD
-    
+
     """  # noqa
 
     def __init__(self, copy=True, with_mean=True, with_std=True):
@@ -281,7 +281,7 @@ def transform(self, X, y='deprecated', copy=None):
             raise NotImplemented(
                 "Algorithm for sparse matrices currently not supported.")
         else:
-            X = twoway_standardize(X)
+            X = two_way_standardize(X)
         return X
 
     def inverse_transform(self, X, copy=None):

From 238f393c0b1b577ec74a222fc66dde2a1e66a93c Mon Sep 17 00:00:00 2001
From: Jaska <jlaska@gmail.com>
Date: Sun, 9 Sep 2018 11:29:58 -0700
Subject: [PATCH 18/31] Add estimator check

---
 inverse_covariance/tests/common_test.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/inverse_covariance/tests/common_test.py b/inverse_covariance/tests/common_test.py
index aadfd96..f7d1b4b 100644
--- a/inverse_covariance/tests/common_test.py
+++ b/inverse_covariance/tests/common_test.py
@@ -5,6 +5,7 @@
     QuicGraphicalLassoEBIC,
     AdaptiveGraphicalLasso,
     ModelAverage,
+    TwoWayStandardScaler,
 )
 
 
@@ -26,3 +27,7 @@ def test_adaptive_graphical_lasso():
 
 def test_model_average():
     return check_estimator(ModelAverage)
+
+
+def test_two_way_standard_scaler():
+    return check_estimator(TwoWayStandardScaler)

From 34dc9368b5ea9d57044fa7fccf015952944648e2 Mon Sep 17 00:00:00 2001
From: Jaska <jlaska@gmail.com>
Date: Sun, 9 Sep 2018 11:31:39 -0700
Subject: [PATCH 19/31] Rename commont_test to sklearn_test as is more
 descriptive of this test.

---
 inverse_covariance/tests/{common_test.py => sklearn_test.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename inverse_covariance/tests/{common_test.py => sklearn_test.py} (100%)

diff --git a/inverse_covariance/tests/common_test.py b/inverse_covariance/tests/sklearn_test.py
similarity index 100%
rename from inverse_covariance/tests/common_test.py
rename to inverse_covariance/tests/sklearn_test.py

From 78696598b31746a3e2631034a963705ee3f56c25 Mon Sep 17 00:00:00 2001
From: Jaska <jlaska@gmail.com>
Date: Sun, 9 Sep 2018 11:53:08 -0700
Subject: [PATCH 20/31] Address initial comments and some cleanup.

---
 inverse_covariance/two_way_standard_scaler.py | 162 +++++++-----------
 1 file changed, 58 insertions(+), 104 deletions(-)

diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py
index f00c88f..15e65eb 100644
--- a/inverse_covariance/two_way_standard_scaler.py
+++ b/inverse_covariance/two_way_standard_scaler.py
@@ -1,3 +1,4 @@
+import warnings
 import numpy as np
 from scipy import sparse
 
@@ -60,7 +61,7 @@ def two_way_standardize(X, axis=0, with_mean=True, with_std=True, copy=True,
     --------
     StandardScaler: Performs scaling to unit variance using the``Transformer`` API
         (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`).
-    """  # noqa
+    """
 
     X = check_array(X, accept_sparse=None, copy=copy,
                     warn_on_dtype=True, dtype=FLOAT_DTYPES)
@@ -86,16 +87,16 @@ def two_way_standardize(X, axis=0, with_mean=True, with_std=True, copy=True,
             n_iter += 1
             err_norm_row = np.linalg.norm(oldXrow-Xrow_polish, 'fro')
             err_norm_col = np.linalg.norm(oldXcol-Xcol_polish, 'fro')
-            err_norm = .5 * err_norm_row/(n_rows*n_cols) + \
-                .5 * err_norm_col/(n_rows*n_cols)
+            err_norm = .5 * err_norm_row/(n_rows*n_cols) + .5 * err_norm_col/(n_rows*n_cols)
+
             if verbose:
                 print('Iteration: {}, Convergence Err: {}'.format(
                         n_iter, err_norm))
+
             oldXrow = np.copy(Xrow_polish)
             oldXcol = np.copy(Xcol_polish)
 
-        X = Xrow_polish.T
-    return X
+    return Xrow_polish.T
 
 
 class TwoWayStandardScaler(BaseEstimator, TransformerMixin):
@@ -132,7 +133,7 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin):
         `scale_`
     n_samples_seen_ : int
         The number of samples processed by the estimator. Will be reset on
-        new calls to fit, but increments across ``partial_fit`` calls.
+        new calls to fit, but increments across ``fit`` calls.
     Examples
     --------
     >>> from inverse_covariance.clean import TwoWayStandardScaler
@@ -158,7 +159,7 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin):
     -----
     See the implications of one-way vs. two-way standardization in here. TBD
 
-    """  # noqa
+    """
 
     def __init__(self, copy=True, with_mean=True, with_std=True):
         """Unlike StandardScaler, with_mean is always set to True, to ensure
@@ -166,27 +167,10 @@ def __init__(self, copy=True, with_mean=True, with_std=True):
         argument `with_mean` is retained for the sake of sklearn
         API compatibility.
         """
-        self.with_mean = True
+        self.with_mean = with_mean
         self.with_std = with_std
         self.copy = copy
 
-    def _reset(self):
-        """Reset internal data-dependent state of the scaler, if necessary.
-        __init__ parameters are not touched.
-        """
-
-        # Checking one attribute is enough, becase they are all set together
-        # in partial_fit
-        if hasattr(self, 'col_scale_'):
-            del self.row_scale_
-            del self.row_mean_
-            del self.row_var_
-            del self.col_scale_
-            del self.col_mean_
-            del self.col_var_
-            del self.n_rows_seen_
-            del self.n_cols_seen_
-
     def fit(self, X, y=None):
         """Compute the mean and std for both row and column dimensions.
         Parameters
@@ -196,19 +180,6 @@ def fit(self, X, y=None):
             along both row and column axes
         y : Passthrough for ``Pipeline`` compatibility. Input is ignored.
         """
-
-        return self.partial_fit(X, y)
-
-    def partial_fit(self, X, y=None):
-        """Compute the mean and std for both row and column dimensions.
-        Equivalent to fit. Online algorithm not supported at this time.
-        Parameters
-        ----------
-        X : {array-like}, shape [n_rows, n_cols]
-            The data used to compute the mean and standard deviation
-            used for later scaling along the features axis.
-        y : Passthrough for ``Pipeline`` compatibility.
-        """
         X = check_array(X, accept_sparse=None, copy=self.copy,
                         warn_on_dtype=True, dtype=FLOAT_DTYPES)
 
@@ -217,80 +188,59 @@ def partial_fit(self, X, y=None):
             raise NotImplemented(
                 "Algorithm for sparse matrices currently not supported.")
         else:
-            # First pass
-            if not hasattr(self, 'n_rows_seen_'):
-                self.col_mean_ = .0
-                self.n_rows_seen_ = 0
-                if self.with_std:
-                    self.col_var_ = .0
-                else:
-                    self.col_var_ = None
-
-            self.col_mean_, self.col_var_, self.n_rows_seen_ = \
-                _incremental_mean_and_var(X, self.col_mean_,
-                                          self.col_var_,
-                                          self.n_rows_seen_
-                                          )
-            if not hasattr(self, 'n_cols_seen_'):
-                self.row_mean_ = .0
-                self.n_cols_seen_ = 0
-                if self.with_std:
-                    self.row_var_ = .0
-                else:
-                    self.row_var_ = None
-            self.row_mean_, self.row_var_, self.n_cols_seen_ = \
-                _incremental_mean_and_var(X.T, self.row_mean_,
-                                          self.row_var_,
-                                          self.n_cols_seen_
-                                          )
+            self.col_mean_ = 0.
+            self.n_rows_seen_ = 0
+
+            self.col_var_ = None
+            if self.with_std:
+                self.col_var_ = 0.
+
+            self.col_mean_, self.col_var_, self.n_rows_seen_ = _incremental_mean_and_var(X, self.col_mean_, self.col_var_, self.n_rows_seen_)
+
+            self.row_mean_ = 0.
+            self.n_cols_seen_ = 0
+
+            self.row_var_ = None
+            if self.with_std:
+                self.row_var_ = 0.
+
+            self.row_mean_, self.row_var_, self.n_cols_seen_ = _incremental_mean_and_var(X.T, self.row_mean_, self.row_var_, self.n_cols_seen_)
 
+        self.row_scale_ = None
+        self.col_scale_ = None
         if self.with_std:
             self.row_scale_ = _handle_zeros_in_scale(np.sqrt(self.row_var_))
             self.col_scale_ = _handle_zeros_in_scale(np.sqrt(self.col_var_))
-        else:
-            self.row_scale_ = None
-            self.col_scale_ = None
 
         return self
 
-    def transform(self, X, y='deprecated', copy=None):
+    def transform(self, X, copy=None):
         """Perform standardization by centering and scaling
         Parameters
         ----------
         X : array-like, shape [n_rows, n_cols]
             The data used to scale along the features axis.
-        y : (ignored)
-            .. deprecated:: 0.19
-               This parameter will be removed in 0.21.
         copy : bool, optional (default: None)
             Copy the input X or not.
         """
-        if not isinstance(y, string_types) or y != 'deprecated':
-            warnings.warn("The parameter y on transform() is " # noqa
-                          "deprecated since 0.19 and will be removed in 0.21",
-                          DeprecationWarning)
-
         check_is_fitted(self, 'row_scale_')
-
         copy = copy if copy is not None else self.copy
         X = check_array(X, accept_sparse=None, copy=copy, warn_on_dtype=True,
                         estimator=self, dtype=FLOAT_DTYPES)
 
         if sparse.issparse(X):
-            print('Input is sparse')
             raise NotImplemented(
-                "Algorithm for sparse matrices currently not supported.")
-        else:
-            X = two_way_standardize(X)
-        return X
+                "Input is sparse: Algorithm for sparse matrices currently not supported.")
 
-    def inverse_transform(self, X, copy=None):
+        return two_way_standardize(X)
+
+    def inverse_transform(self, X, copy=False):
         """Scale back the data to the original representation
         Parameters
         ----------
         X : array-like, shape [n_samples, n_features]
             The data used to scale along the features axis.
-        copy : bool, optional (default: None)
+        copy : bool, optional (default: False)
             Copy the input X or not.
         Returns
         -------
@@ -298,26 +248,30 @@ def inverse_transform(self, X, copy=None):
             Transformed array.
         """
         check_is_fitted(self, 'row_scale_')
-
-        copy = copy if copy is not None else self.copy
         if sparse.issparse(X):
-            print('Input is sparse')
-            raise NotImplementedError(
-                'Algorithm for sparse matrices currently not supported.')
-        else:
             raise NotImplementedError(
-                'Two Way standardization not reversible with accuracy')
-            X = np.asarray(X)
-            if copy:
-                X = X.copy()
-            X = X.T
-            if self.with_std:
-                X *= self.row_scale_
-            if self.with_mean:
-                X += self.row_mean_
-            X = X.T
-            if self.with_std:
-                X *= self.col_scale_
-            if self.with_mean:
-                X += self.col_mean_
+                'Input is sparse: Algorithm for sparse matrices currently not supported.')
+
+        warnings.warn('Two Way standardization not reversible with accuracy')
+
+        X = np.asarray(X)
+        if copy:
+            X = X.copy()
+
+        X = X.T
+
+        if self.with_std:
+            X *= self.row_scale_
+
+        if self.with_mean:
+            X += self.row_mean_
+
+        X = X.T
+
+        if self.with_std:
+            X *= self.col_scale_
+
+        if self.with_mean:
+            X += self.col_mean_
+
         return X

From 9cbb212e6c13fc43b56fd390340a881725e926d9 Mon Sep 17 00:00:00 2001
From: Jaska <jlaska@gmail.com>
Date: Sun, 9 Sep 2018 12:00:41 -0700
Subject: [PATCH 21/31] Black formatting and more simplification and cleanup.

---
 inverse_covariance/__init__.py                |  21 +--
 .../tests/two_way_standard_scaler_test.py     |  13 +-
 inverse_covariance/two_way_standard_scaler.py | 145 ++++++++++--------
 3 files changed, 90 insertions(+), 89 deletions(-)

diff --git a/inverse_covariance/__init__.py b/inverse_covariance/__init__.py
index 6f5ce1d..817ea30 100644
--- a/inverse_covariance/__init__.py
+++ b/inverse_covariance/__init__.py
@@ -9,20 +9,9 @@
     QuicGraphicalLassoCV,
     QuicGraphicalLassoEBIC,
 )
-from .two_way_standard_scaler import (
-    two_way_standardize,
-    TwoWayStandardScaler,
-)
-from .metrics import (
-    log_likelihood,
-    kl_loss,
-    quadratic_loss,
-    ebic,
-)
-from .rank_correlation import (
-    spearman_correlation,
-    kendalltau_correlation,
-)
+from .two_way_standard_scaler import two_way_standardize, TwoWayStandardScaler
+from .metrics import log_likelihood, kl_loss, quadratic_loss, ebic
+from .rank_correlation import spearman_correlation, kendalltau_correlation
 from .model_average import ModelAverage
 from .adaptive_graph_lasso import AdaptiveGraphLasso, AdaptiveGraphicalLasso
 from .cross_validation import RepeatedKFold
@@ -37,8 +26,8 @@
     "QuicGraphicalLasso",
     "QuicGraphicalLassoCV",
     "QuicGraphicalLassoEBIC",
-    'two_way_standardize',
-    'TwoWayStandardScaler',
+    "two_way_standardize",
+    "TwoWayStandardScaler",
     "log_likelihood",
     "kl_loss",
     "quadratic_loss",
diff --git a/inverse_covariance/tests/two_way_standard_scaler_test.py b/inverse_covariance/tests/two_way_standard_scaler_test.py
index 2ca23b4..93bcd19 100644
--- a/inverse_covariance/tests/two_way_standard_scaler_test.py
+++ b/inverse_covariance/tests/two_way_standard_scaler_test.py
@@ -3,30 +3,29 @@
 
 from sklearn.utils.testing import assert_raises
 
-from inverse_covariance.two_way_standard_scaler import (
-   two_way_standardize
-)
+from inverse_covariance.two_way_standard_scaler import two_way_standardize
 
 
 def custom_init(n_rows, n_cols, with_mean=False):
     prng = np.random.RandomState(1)
     X = prng.normal(0, np.ones(shape=(n_rows, n_cols)))
     if with_mean:
-        mu = np.ones(shape=(n_rows, 1)) * \
-                prng.randint(1, 5, size=(1, n_cols))
+        mu = np.ones(shape=(n_rows, 1)) * prng.randint(1, 5, size=(1, n_cols))
     else:
         mu = np.zeros(shape=(n_rows, n_cols))
+
     var_rows = prng.lognormal(2, 1, size=(n_rows, 1))
     sqcov_rows = np.diag(np.sqrt(var_rows))
     var_cols = prng.lognormal(2, 1, size=(1, n_cols))
     sqcov_cols = np.diag(np.sqrt(var_cols))
+
     return mu + sqcov_rows * X * sqcov_cols
 
 
 def test_invalid_argument():
-    '''
+    """
     Test behavior of invalid sparse data matrix inputs.
-    '''
+    """
     X = np.zeros(shape=(10, 10))
     X_csc = sparse.csc_matrix(X)
     assert_raises(TypeError, two_way_standardize, X_csc)
diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py
index 15e65eb..3f033ec 100644
--- a/inverse_covariance/two_way_standard_scaler.py
+++ b/inverse_covariance/two_way_standard_scaler.py
@@ -2,23 +2,25 @@
 import numpy as np
 from scipy import sparse
 
-from sklearn.preprocessing.data import (
-    scale,
-    _handle_zeros_in_scale
-    )
+from sklearn.preprocessing.data import scale, _handle_zeros_in_scale
 from sklearn.base import BaseEstimator, TransformerMixin
 
 from sklearn.utils import check_array
 from sklearn.externals.six import string_types
 from sklearn.utils.extmath import _incremental_mean_and_var
-from sklearn.utils.validation import (
-    check_is_fitted,
-    FLOAT_DTYPES
-    )
-
-
-def two_way_standardize(X, axis=0, with_mean=True, with_std=True, copy=True,
-                       max_iter=50, tol=1e-6, verbose=False):
+from sklearn.utils.validation import check_is_fitted, FLOAT_DTYPES
+
+
+def two_way_standardize(
+    X,
+    axis=0,
+    with_mean=True,
+    with_std=True,
+    copy=True,
+    max_iter=50,
+    tol=1e-6,
+    verbose=False,
+):
     """Standardize a two-dimensional data matrix along both axes.
     Center to the mean and component wise scale to unit variance.
     Read more in the :ref:`User Guide <preprocessing_scaler>`.
@@ -62,39 +64,37 @@ def two_way_standardize(X, axis=0, with_mean=True, with_std=True, copy=True,
     StandardScaler: Performs scaling to unit variance using the``Transformer`` API
         (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`).
     """
+    X = check_array(
+        X, accept_sparse=None, copy=copy, warn_on_dtype=True, dtype=FLOAT_DTYPES
+    )
+    if sparse.issparse(X):
+        raise NotImplemented(
+            "Input is sparse: Algorithm for sparse matrices currently not supported."
+        )
 
-    X = check_array(X, accept_sparse=None, copy=copy,
-                    warn_on_dtype=True, dtype=FLOAT_DTYPES)
     Xrow_polish = np.copy(X.T)
     Xcol_polish = np.copy(X)
     [n_rows, n_cols] = np.shape(X)
 
-    if sparse.issparse(X):
-        print('Input is sparse')
-        raise NotImplemented(
-                "Algorithm for sparse matrices currently not supported.")
+    n_iter = 0
+    err_norm = np.inf
+    oldXrow = np.copy(Xrow_polish)
+    oldXcol = np.copy(Xcol_polish)
+    while n_iter <= max_iter and err_norm > tol:
+        Xcol_polish = scale(Xrow_polish.T, axis=1, with_mean=True, with_std=with_std)
+        Xrow_polish = scale(Xcol_polish.T, axis=1, with_mean=True, with_std=with_std)
+        n_iter += 1
+        err_norm_row = np.linalg.norm(oldXrow - Xrow_polish, "fro")
+        err_norm_col = np.linalg.norm(oldXcol - Xcol_polish, "fro")
+        err_norm = .5 * err_norm_row / (n_rows * n_cols) + .5 * err_norm_col / (
+            n_rows * n_cols
+        )
+
+        if verbose:
+            print("Iteration: {}, Convergence Err: {}".format(n_iter, err_norm))
 
-    else:
-        n_iter = 0
-        err_norm = np.inf
         oldXrow = np.copy(Xrow_polish)
         oldXcol = np.copy(Xcol_polish)
-        while n_iter <= max_iter and err_norm > tol:
-            Xcol_polish = scale(Xrow_polish.T, axis=1,
-                                with_mean=True, with_std=with_std)
-            Xrow_polish = scale(Xcol_polish.T, axis=1,
-                                with_mean=True, with_std=with_std)
-            n_iter += 1
-            err_norm_row = np.linalg.norm(oldXrow-Xrow_polish, 'fro')
-            err_norm_col = np.linalg.norm(oldXcol-Xcol_polish, 'fro')
-            err_norm = .5 * err_norm_row/(n_rows*n_cols) + .5 * err_norm_col/(n_rows*n_cols)
-
-            if verbose:
-                print('Iteration: {}, Convergence Err: {}'.format(
-                        n_iter, err_norm))
-
-            oldXrow = np.copy(Xrow_polish)
-            oldXcol = np.copy(Xcol_polish)
 
     return Xrow_polish.T
 
@@ -180,31 +180,39 @@ def fit(self, X, y=None):
             along both row and column axes
         y : Passthrough for ``Pipeline`` compatibility. Input is ignored.
         """
-        X = check_array(X, accept_sparse=None, copy=self.copy,
-                        warn_on_dtype=True, dtype=FLOAT_DTYPES)
-
+        X = check_array(
+            X,
+            accept_sparse=None,
+            copy=self.copy,
+            warn_on_dtype=True,
+            dtype=FLOAT_DTYPES,
+        )
         if sparse.issparse(X):
-            print('Input is sparse')
             raise NotImplemented(
-                "Algorithm for sparse matrices currently not supported.")
-        else:
-            self.col_mean_ = 0.
-            self.n_rows_seen_ = 0
+                "Input is sparse: Algorithm for sparse matrices currently not supported."
+            )
 
-            self.col_var_ = None
-            if self.with_std:
-                self.col_var_ = 0.
+        self.col_mean_ = 0.
+        self.n_rows_seen_ = 0
 
-            self.col_mean_, self.col_var_, self.n_rows_seen_ = _incremental_mean_and_var(X, self.col_mean_, self.col_var_, self.n_rows_seen_)
+        self.col_var_ = None
+        if self.with_std:
+            self.col_var_ = 0.
 
-            self.row_mean_ = 0.
-            self.n_cols_seen_ = 0
+        self.col_mean_, self.col_var_, self.n_rows_seen_ = _incremental_mean_and_var(
+            X, self.col_mean_, self.col_var_, self.n_rows_seen_
+        )
 
-            self.row_var_ = None
-            if self.with_std:
-                self.row_var_ = 0.
+        self.row_mean_ = 0.
+        self.n_cols_seen_ = 0
 
-            self.row_mean_, self.row_var_, self.n_cols_seen_ = _incremental_mean_and_var(X.T, self.row_mean_, self.row_var_, self.n_cols_seen_)
+        self.row_var_ = None
+        if self.with_std:
+            self.row_var_ = 0.
+
+        self.row_mean_, self.row_var_, self.n_cols_seen_ = _incremental_mean_and_var(
+            X.T, self.row_mean_, self.row_var_, self.n_cols_seen_
+        )
 
         self.row_scale_ = None
         self.col_scale_ = None
@@ -214,23 +222,27 @@ def fit(self, X, y=None):
 
         return self
 
-    def transform(self, X, copy=None):
+    def transform(self, X, y=None, copy=False):
         """Perform standardization by centering and scaling
         Parameters
         ----------
         X : array-like, shape [n_rows, n_cols]
             The data used to scale along the features axis.
-        copy : bool, optional (default: None)
-            Copy the input X or not.
         """
-        check_is_fitted(self, 'row_scale_')
-        copy = copy if copy is not None else self.copy
-        X = check_array(X, accept_sparse=None, copy=copy, warn_on_dtype=True,
-                        estimator=self, dtype=FLOAT_DTYPES)
+        check_is_fitted(self, "row_scale_")
+        X = check_array(
+            X,
+            accept_sparse=None,
+            copy=copy,
+            warn_on_dtype=True,
+            estimator=self,
+            dtype=FLOAT_DTYPES,
+        )
 
         if sparse.issparse(X):
             raise NotImplemented(
-                "Input is sparse: Algorithm for sparse matrices currently not supported.")
+                "Input is sparse: Algorithm for sparse matrices currently not supported."
+            )
 
         return two_way_standardize(X)
 
@@ -247,12 +259,13 @@ def inverse_transform(self, X, copy=False):
         X_tr : array-like, shape [n_samples, n_features]
             Transformed array.
         """
-        check_is_fitted(self, 'row_scale_')
+        check_is_fitted(self, "row_scale_")
         if sparse.issparse(X):
             raise NotImplementedError(
-                'Input is sparse: Algorithm for sparse matrices currently not supported.')
+                "Input is sparse: Algorithm for sparse matrices currently not supported."
+            )
 
-        warnings.warn('Two Way standardization not reversible with accuracy')
+        warnings.warn("Two Way standardization not reversible with accuracy")
 
         X = np.asarray(X)
         if copy:

From a8e980ffe9114fd5e567a4c54c959308ba29e514 Mon Sep 17 00:00:00 2001
From: Jaska <jlaska@gmail.com>
Date: Sun, 9 Sep 2018 12:02:32 -0700
Subject: [PATCH 22/31] Black formatting and more simplification and cleanup.

---
 inverse_covariance/two_way_standard_scaler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py
index 3f033ec..d27d23f 100644
--- a/inverse_covariance/two_way_standard_scaler.py
+++ b/inverse_covariance/two_way_standard_scaler.py
@@ -240,7 +240,7 @@ def transform(self, X, y=None, copy=False):
         )
 
         if sparse.issparse(X):
-            raise NotImplemented(
+            raise NotImplementedError(
                 "Input is sparse: Algorithm for sparse matrices currently not supported."
             )
 
@@ -265,7 +265,7 @@ def inverse_transform(self, X, copy=False):
                 "Input is sparse: Algorithm for sparse matrices currently not supported."
             )
 
-        warnings.warn("Two Way standardization not reversible with accuracy")
+        warnings.warn("Reversing two way transformation is not accurate.")
 
         X = np.asarray(X)
         if copy:

From e864e7288728479fd483b6b2fc503cd2d31d6d1f Mon Sep 17 00:00:00 2001
From: Jaska <jlaska@gmail.com>
Date: Sun, 9 Sep 2018 12:21:43 -0700
Subject: [PATCH 23/31] Ensure interface can be validated.

---
 inverse_covariance/two_way_standard_scaler.py | 25 +++++++++----------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py
index d27d23f..9db570f 100644
--- a/inverse_covariance/two_way_standard_scaler.py
+++ b/inverse_covariance/two_way_standard_scaler.py
@@ -64,17 +64,9 @@ def two_way_standardize(
     StandardScaler: Performs scaling to unit variance using the``Transformer`` API
         (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`).
     """
-    X = check_array(
-        X, accept_sparse=None, copy=copy, warn_on_dtype=True, dtype=FLOAT_DTYPES
-    )
-    if sparse.issparse(X):
-        raise NotImplemented(
-            "Input is sparse: Algorithm for sparse matrices currently not supported."
-        )
-
     Xrow_polish = np.copy(X.T)
     Xcol_polish = np.copy(X)
-    [n_rows, n_cols] = np.shape(X)
+    n_rows, n_cols = np.shape(X)
 
     n_iter = 0
     err_norm = np.inf
@@ -182,16 +174,20 @@ def fit(self, X, y=None):
         """
         X = check_array(
             X,
-            accept_sparse=None,
+            accept_sparse=False,
             copy=self.copy,
             warn_on_dtype=True,
             dtype=FLOAT_DTYPES,
+            estimator=self,
+            ensure_min_features=2,
         )
         if sparse.issparse(X):
             raise NotImplemented(
                 "Input is sparse: Algorithm for sparse matrices currently not supported."
             )
 
+        self.n_rows_, self.n_cols_ = np.shape(X)
+
         self.col_mean_ = 0.
         self.n_rows_seen_ = 0
 
@@ -222,7 +218,7 @@ def fit(self, X, y=None):
 
         return self
 
-    def transform(self, X, y=None, copy=False):
+    def transform(self, X, copy=False):
         """Perform standardization by centering and scaling
         Parameters
         ----------
@@ -232,12 +228,15 @@ def transform(self, X, y=None, copy=False):
         check_is_fitted(self, "row_scale_")
         X = check_array(
             X,
-            accept_sparse=None,
+            accept_sparse=False,
             copy=copy,
             warn_on_dtype=True,
-            estimator=self,
             dtype=FLOAT_DTYPES,
+            estimator=self,
         )
+        n_rows, n_cols = np.shape(X)
+        if self.n_cols_ != n_cols:
+            raise ValueError("Number of features must be same as for fit().")
 
         if sparse.issparse(X):
             raise NotImplementedError(

From 7f86bb3f0f46b33cbb6ebf9a154da0e2eecb7832 Mon Sep 17 00:00:00 2001
From: Jaska <jlaska@gmail.com>
Date: Sun, 9 Sep 2018 12:30:20 -0700
Subject: [PATCH 24/31] More simplification.

---
 inverse_covariance/two_way_standard_scaler.py | 47 +++++++------------
 1 file changed, 17 insertions(+), 30 deletions(-)

diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py
index 9db570f..5d85a56 100644
--- a/inverse_covariance/two_way_standard_scaler.py
+++ b/inverse_covariance/two_way_standard_scaler.py
@@ -11,6 +11,7 @@
 from sklearn.utils.validation import check_is_fitted, FLOAT_DTYPES
 
 
+
 def two_way_standardize(
     X,
     axis=0,
@@ -64,31 +65,30 @@ def two_way_standardize(
     StandardScaler: Performs scaling to unit variance using the``Transformer`` API
         (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`).
     """
-    Xrow_polish = np.copy(X.T)
-    Xcol_polish = np.copy(X)
     n_rows, n_cols = np.shape(X)
 
-    n_iter = 0
     err_norm = np.inf
-    oldXrow = np.copy(Xrow_polish)
-    oldXcol = np.copy(Xcol_polish)
+    rows_X = np.copy(X.T)
+    cols_X = np.copy(X)
+    n_iter = 0
     while n_iter <= max_iter and err_norm > tol:
-        Xcol_polish = scale(Xrow_polish.T, axis=1, with_mean=True, with_std=with_std)
-        Xrow_polish = scale(Xcol_polish.T, axis=1, with_mean=True, with_std=with_std)
-        n_iter += 1
-        err_norm_row = np.linalg.norm(oldXrow - Xrow_polish, "fro")
-        err_norm_col = np.linalg.norm(oldXcol - Xcol_polish, "fro")
+        col_polish = scale(row_polish.T, axis=1, with_mean=True, with_std=with_std)
+        row_polish = scale(col_polish.T, axis=1, with_mean=True, with_std=with_std)
+
+        err_norm_row = np.linalg.norm(rows_X - row_polish, "fro")
+        err_norm_col = np.linalg.norm(cols_X - col_polish, "fro")
         err_norm = .5 * err_norm_row / (n_rows * n_cols) + .5 * err_norm_col / (
             n_rows * n_cols
         )
 
+        n_iter += 1
         if verbose:
             print("Iteration: {}, Convergence Err: {}".format(n_iter, err_norm))
 
-        oldXrow = np.copy(Xrow_polish)
-        oldXcol = np.copy(Xcol_polish)
+        rows_X = np.copy(row_polish)
+        cols_X = np.copy(col_polish)
 
-    return Xrow_polish.T
+    return row_polish.T
 
 
 class TwoWayStandardScaler(BaseEstimator, TransformerMixin):
@@ -188,26 +188,14 @@ def fit(self, X, y=None):
 
         self.n_rows_, self.n_cols_ = np.shape(X)
 
-        self.col_mean_ = 0.
-        self.n_rows_seen_ = 0
-
-        self.col_var_ = None
-        if self.with_std:
-            self.col_var_ = 0.
-
+        self.col_var_ = 0. if self.with_std else None
         self.col_mean_, self.col_var_, self.n_rows_seen_ = _incremental_mean_and_var(
-            X, self.col_mean_, self.col_var_, self.n_rows_seen_
+            X, 0.0, self.col_var_, 0
         )
 
-        self.row_mean_ = 0.
-        self.n_cols_seen_ = 0
-
-        self.row_var_ = None
-        if self.with_std:
-            self.row_var_ = 0.
-
+        self.row_var_ = 0. if self.with_std else None
         self.row_mean_, self.row_var_, self.n_cols_seen_ = _incremental_mean_and_var(
-            X.T, self.row_mean_, self.row_var_, self.n_cols_seen_
+            X.T, 0.0, self.row_var_, 0
         )
 
         self.row_scale_ = None
@@ -266,7 +254,6 @@ def inverse_transform(self, X, copy=False):
 
         warnings.warn("Reversing two way transformation is not accurate.")
 
-        X = np.asarray(X)
         if copy:
             X = X.copy()
 

From 748fe33d031b830e98116e568e3b20b8527fb344 Mon Sep 17 00:00:00 2001
From: Jaska <jlaska@gmail.com>
Date: Sun, 9 Sep 2018 12:32:02 -0700
Subject: [PATCH 25/31] Autoformat.

---
 inverse_covariance/two_way_standard_scaler.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py
index 5d85a56..eb7ca30 100644
--- a/inverse_covariance/two_way_standard_scaler.py
+++ b/inverse_covariance/two_way_standard_scaler.py
@@ -11,7 +11,6 @@
 from sklearn.utils.validation import check_is_fitted, FLOAT_DTYPES
 
 
-
 def two_way_standardize(
     X,
     axis=0,

From d2800fc78a0f384bd64f7096ed5cfd0067215ee0 Mon Sep 17 00:00:00 2001
From: Jaska <jlaska@gmail.com>
Date: Sun, 9 Sep 2018 15:34:01 -0700
Subject: [PATCH 26/31] Bring back partial_fit capability, add tests, ask
 questions.

---
 .../tests/quic_graph_lasso_test.py            |   3 +-
 .../tests/two_way_standard_scaler_test.py     | 189 +++++++++++++++++-
 inverse_covariance/two_way_standard_scaler.py | 144 ++++++++-----
 3 files changed, 281 insertions(+), 55 deletions(-)

diff --git a/inverse_covariance/tests/quic_graph_lasso_test.py b/inverse_covariance/tests/quic_graph_lasso_test.py
index dec6b63..a45bb98 100644
--- a/inverse_covariance/tests/quic_graph_lasso_test.py
+++ b/inverse_covariance/tests/quic_graph_lasso_test.py
@@ -1,8 +1,7 @@
 import numpy as np
 import pytest
 
-from sklearn.utils.testing import assert_raises
-from sklearn.utils.testing import assert_allclose
+from sklearn.utils.testing import assert_raises, assert_allclose
 from sklearn import datasets
 
 from inverse_covariance import (
diff --git a/inverse_covariance/tests/two_way_standard_scaler_test.py b/inverse_covariance/tests/two_way_standard_scaler_test.py
index 93bcd19..e677a29 100644
--- a/inverse_covariance/tests/two_way_standard_scaler_test.py
+++ b/inverse_covariance/tests/two_way_standard_scaler_test.py
@@ -1,9 +1,13 @@
+import pytest
 import numpy as np
 from scipy import sparse
+from sklearn.utils.testing import assert_raises, assert_allclose
+from sklearn.exceptions import NotFittedError
 
-from sklearn.utils.testing import assert_raises
-
-from inverse_covariance.two_way_standard_scaler import two_way_standardize
+from inverse_covariance.two_way_standard_scaler import (
+    two_way_standardize,
+    TwoWayStandardScaler,
+)
 
 
 def custom_init(n_rows, n_cols, with_mean=False):
@@ -22,13 +26,182 @@ def custom_init(n_rows, n_cols, with_mean=False):
     return mu + sqcov_rows * X * sqcov_cols
 
 
-def test_invalid_argument():
+def test_fit_exception_on_sparse_input():
+    """
+    Test behavior of invalid sparse data matrix inputs.
+    """
+    X = np.zeros(shape=(10, 10))
+    sparse_Xs = [sparse.csc_matrix(X), sparse.csr_matrix(X)]
+    for sparse_X in sparse_Xs:
+        scaler = TwoWayStandardScaler()
+        assert_raises(TypeError, scaler.fit, sparse_X)
+
+
+def test_transform_exception_not_fitted():
+    """
+    Test behavior of invalid sparse data matrix inputs.
+    """
+    scaler = TwoWayStandardScaler()
+    assert_raises(NotFittedError, scaler.transform, np.zeros(shape=(10, 10)))
+
+
+def test_transform_exception_on_sparse_input():
     """
     Test behavior of invalid sparse data matrix inputs.
     """
     X = np.zeros(shape=(10, 10))
-    X_csc = sparse.csc_matrix(X)
-    assert_raises(TypeError, two_way_standardize, X_csc)
+    sparse_Xs = [sparse.csc_matrix(X), sparse.csr_matrix(X)]
+    for sparse_X in sparse_Xs:
+        scaler = TwoWayStandardScaler()
+        scaler.fit(X)
+        assert_raises(TypeError, scaler.transform, sparse_X)
+
+
+@pytest.mark.parametrize(
+    "data, with_std, expected",
+    [
+        (
+            [[1, 0], [1, 0], [2, 1], [2, 1]],
+            True,  # with_std=True
+            [
+                [0.5, 0.5, 1.5, 1.5],  # row_mean_
+                [0.25, 0.25, 0.25, 0.25],  # row_var_
+                [1.5, 0.5],  # col_mean_
+                [0.25, 0.25],  # col_var_
+                [0.5, 0.5, 0.5, 0.5],  # row_scale_
+                [0.5, 0.5],  # col_scale_
+                [4],  # [n_rows_seen_]
+                [2],  # [n_cols_seen_]
+            ],
+        ),
+        (
+            [[1, 0], [1, 0], [2, 1], [2, 1]],
+            False,  # with_std=False
+            [
+                [0.5, 0.5, 1.5, 1.5],  # row_mean_
+                None,  # row_var_
+                [1.5, 0.5],  # col_mean_
+                None,  # col_var_
+                None,  # row_scale_
+                None,  # col_scale_
+                [4],  # [n_rows_seen_]
+                [2],  # [n_cols_seen_]
+            ],
+        ),
+    ],
+)
+def test_fit(data, with_std, expected):
+    scaler = TwoWayStandardScaler(with_std=with_std)
+    scaler.fit(data)
+    result = [
+        scaler.row_mean_,
+        scaler.row_var_,
+        scaler.col_mean_,
+        scaler.col_var_,
+        scaler.row_scale_,
+        scaler.col_scale_,
+        [scaler.n_rows_seen_],
+        [scaler.n_cols_seen_],
+    ]
+    assert_allclose(
+        [i for e in expected if e is not None for i in e],
+        [j for r in result if r is not None for j in r],
+    )
+
+
+@pytest.mark.parametrize(
+    "data, with_std, expected",
+    [
+        (
+            [
+                [[1, 0], [1, 0], [2, 1], [2, 1]],
+                [[1, 0], [1, 0], [2, 1], [2, 1]],
+                [[1, 0], [1, 0], [2, 1], [2, 1]],
+            ],  # multiple data examples for "online" estimation
+            True,  # with_std=True
+            [
+                [0.5, 0.5, 1.5, 1.5],  # row_mean_
+                [0.25, 0.25, 0.25, 0.25],  # row_var_
+                [1.5, 0.5],  # col_mean_
+                [0.25, 0.25],  # col_var_
+                [0.5, 0.5, 0.5, 0.5],  # row_scale_
+                [0.5, 0.5],  # col_scale_
+                [12],  # [n_rows_seen_]
+                [6],  # [n_cols_seen_]
+            ],
+        )
+    ],
+)
+def test_partial_fit(data, with_std, expected):
+    scaler = TwoWayStandardScaler(with_std=with_std)
+    for d in data:
+        scaler.partial_fit(d)
+
+    result = [
+        scaler.row_mean_,
+        scaler.row_var_,
+        scaler.col_mean_,
+        scaler.col_var_,
+        scaler.row_scale_,
+        scaler.col_scale_,
+        [scaler.n_rows_seen_],
+        [scaler.n_cols_seen_],
+    ]
+    print(result)
+    assert_allclose(
+        [i for e in expected if e is not None for i in e],
+        [j for r in result if r is not None for j in r],
+    )
+
 
-    X_csr = sparse.csr_matrix(X)
-    assert_raises(TypeError, two_way_standardize, X_csr)
+@pytest.mark.parametrize(
+    "n_rows, n_cols, with_mean, with_std, expected",
+    [
+        (
+            6,  # n_rows
+            2,  # n_cols
+            False,  # with_mean
+            True,  # with_std
+            [
+                [1.24852525, -0.47021609],
+                [-1.66629192, -3.38503326],
+                [0.46966753, -1.24907381],
+                [1.1966711, -0.52207024],
+                [0.96470187, -0.75403946],
+                [0.71346052, -1.00528082],
+            ],
+        ),
+        (
+            6,  # n_rows
+            2,  # n_cols
+            True,  # with_mean
+            False,  # with_std
+            [
+                [1.85393809, -1.85393809],
+                [-19.71891691, 19.71891691],
+                [13.72700391, -13.72700391],
+                [5.29676963, -5.29676963],
+                [-19.41773454, 19.41773454],
+                [18.25893982, -18.25893982],
+            ],
+        ),
+        (
+            6,  # n_rows
+            2,  # n_cols
+            True,  # with_mean
+            True,  # with_std
+            [
+                [-1.41421356, 1.41421356],
+                [-1.41421356, 1.41421356],
+                [0.70710678, -0.70710678],
+                [0.70710678, -0.70710678],
+                [0.70710678, -0.70710678],
+                [0.70710678, -0.70710678],
+            ],
+        ),
+    ],
+)
+def test_two_way_standardize(n_rows, n_cols, with_mean, with_std, expected):
+    X = custom_init(n_rows, n_cols, with_mean=with_mean)
+    result = two_way_standardize(X, with_mean=with_mean, with_std=with_std)
+    assert_allclose(result, expected)
diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py
index eb7ca30..176eb4a 100644
--- a/inverse_covariance/two_way_standard_scaler.py
+++ b/inverse_covariance/two_way_standard_scaler.py
@@ -24,61 +24,69 @@ def two_way_standardize(
     """Standardize a two-dimensional data matrix along both axes.
     Center to the mean and component wise scale to unit variance.
     Read more in the :ref:`User Guide <preprocessing_scaler>`.
+
     Parameters
     ----------
     X : {array-like, sparse matrix}
         The data to center and scale.
+
     axis : int (0 by default)
         axis used to compute the means and standard deviations along. If 0,
         independently standardize each feature, otherwise (if 1) standardize
         each sample.
+
     with_mean : boolean, True by default
         Is always true for two-way standardize
+
     with_std : boolean, True by default
         If True, scale the data to unit variance (or equivalently,
         unit standard deviation).
+
     copy : boolean, optional, default True
         set to False to perform inplace row normalization and avoid a
         copy (if the input is already a numpy array or a scipy.sparse
         CSC matrix and if axis is 1).
+
     max_iter : int, optional (50 by default)
         Set the maximum number of iterations of successive normalization algorithm
+
     tol : float, optional (1e-6 by default)
         Set the convergence threshold for successive normalization
+
     Notes
     -----
-    This function invokes sklearn's scale function. Thus, the same restrictions
+    This function invokes sklearn's scale function, thus the same restrictions
     for scale, apply here as well.
-    This implementation will refuse to center scipy.sparse matrices
-    since it would make them non-sparse and would potentially crash the
-    program with memory exhaustion problems.
-    Instead the caller is expected to either set explicitly
-    `with_mean=False` (in that case, only variance scaling will be
-    performed on the features of the CSC matrix) or to call `X.toarray()`
-    if he/she expects the materialized dense array to fit in memory.
-    To avoid memory copy the caller should pass a CSC matrix.
+
+    The caller should pass a CSC matrix. The caller is expected to either set
+    explicitly `with_mean=False`(in that case, only variance scaling will be
+    performed on the features of the CSC matrix) or to call `X.toarray()` if
+    the array fits in memory.
+
     For a comparison of the different scalers, transformers, and normalizers,
     see sklearn documentation `examples/preprocessing/plot_all_scaling.py
+
     See also
     --------
-    StandardScaler: Performs scaling to unit variance using the``Transformer`` API
-        (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`).
+    StandardScaler: Performs scaling to unit variance using the``Transformer`` API,
+    e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`.
     """
     n_rows, n_cols = np.shape(X)
 
-    err_norm = np.inf
+    row_polish = np.copy(X.T)
     rows_X = np.copy(X.T)
     cols_X = np.copy(X)
+
+    err_norm = np.inf
     n_iter = 0
+
     while n_iter <= max_iter and err_norm > tol:
-        col_polish = scale(row_polish.T, axis=1, with_mean=True, with_std=with_std)
-        row_polish = scale(col_polish.T, axis=1, with_mean=True, with_std=with_std)
+        col_polish = scale(row_polish.T, axis=1, with_mean=with_mean, with_std=with_std)
+        row_polish = scale(col_polish.T, axis=1, with_mean=with_mean, with_std=with_std)
 
-        err_norm_row = np.linalg.norm(rows_X - row_polish, "fro")
-        err_norm_col = np.linalg.norm(cols_X - col_polish, "fro")
-        err_norm = .5 * err_norm_row / (n_rows * n_cols) + .5 * err_norm_col / (
-            n_rows * n_cols
-        )
+        err_row = np.linalg.norm(rows_X - row_polish, "fro")
+        err_col = np.linalg.norm(cols_X - col_polish, "fro")
+        err_norm = .5 * err_row / (n_rows * n_cols) + .5 * err_col / (n_rows * n_cols)
 
         n_iter += 1
         if verbose:
@@ -93,8 +101,10 @@ def two_way_standardize(
 class TwoWayStandardScaler(BaseEstimator, TransformerMixin):
     """Standardize features by removing the mean and scaling to unit variance
     in both row and column dimensions.
+
     This class is modeled after StandardScaler in scikit-learn.
     Read more in the :ref:`User Guide <preprocessing_scaler>`.
+
     Parameters
     ----------
     copy : boolean, optional, default True
@@ -102,54 +112,51 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin):
         This is not guaranteed to always work inplace; e.g. if the data is
         not a NumPy array or scipy.sparse CSR matrix, a copy may still be
         returned.
+
     with_mean : boolean, True by default
         If True, center the data before scaling.
         This does not work (and will raise an exception) when attempted on
         sparse matrices, because centering them entails building a dense
         matrix which in common use cases is likely to be too large to fit in
         memory.
+
     with_std : boolean, True by default
         If True, scale the data to unit variance (or equivalently,
         unit standard deviation).
+
     Attributes
     ----------
     scale_ : ndarray, shape (n_features,)
         Per feature relative scaling of the data.
         .. versionadded:: 0.17
            *scale_*
-    mean_ : array of floats with shape [n_features]
+
+    row_mean_ : array of floats with shape [n_examples]
+        The mean value for each feature in the training set.
+
+    col_mean_ : array of floats with shape [n_features]
         The mean value for each feature in the training set.
-    var_ : array of floats with shape [n_features]
+
+    row_var_ : array of floats with shape [n_examples]
+        The variance for each feature in the training set. Used to compute
+        `scale_`
+
+    col_var_ : array of floats with shape [n_features]
         The variance for each feature in the training set. Used to compute
         `scale_`
+
     n_samples_seen_ : int
         The number of samples processed by the estimator. Will be reset on
         new calls to fit, but increments across ``fit`` calls.
-    Examples
-    --------
-    >>> from inverse_covariance.clean import TwoWayStandardScaler
-    >>>
-    >>> data = [[1, 0], [1, 0], [2, 1], [2, 1]]
-    >>> scaler = StandardScaler()
-    >>> print(scaler.fit(data))
-    StandardScaler(copy=True, with_mean=True, with_std=True)
-    >>> print(scaler.mean_)
-    [ 3.0  0.5]
-    >>> print(scaler.transform(data))
-    [[-1. -1.]
-     [-1. -1.]
-     [ 1.  1.]
-     [ 1.  1.]]
+
     See also
     --------
-    two_way_standardize: Equivalent function without the estimator API.
     :class:`sklearn.preprocessing.StandardScaler`
     :class:`sklearn.decomposition.PCA`
-        Further removes the linear correlation across features with 'whiten=True'.
+
     Notes
     -----
     See the implications of one-way vs. two-way standardization in here. TBD
-
     """
 
     def __init__(self, copy=True, with_mean=True, with_std=True):
@@ -162,15 +169,43 @@ def __init__(self, copy=True, with_mean=True, with_std=True):
         self.with_std = with_std
         self.copy = copy
 
+    def _reset(self):
+        fit_attrs = [
+            "n_rows_",
+            "n_cols_",
+            "col_var_",
+            "col_mean_",
+            "row_var_",
+            "row_mean_",
+            "row_scale_",
+            "col_scale_",
+            "n_rows_seen_",
+            "n_cols_seen_",
+        ]
+        for attr in fit_attrs:
+            if hasattr(self, attr):
+                delattr(self, attr)
+
+    def _initial_state(self):
+        return not hasattr(self, "n_rows_seen_")
+
     def fit(self, X, y=None):
         """Compute the mean and std for both row and column dimensions.
+
         Parameters
         ----------
         X : {array-like}, shape [n_rows, n_cols]
             The data used to compute the mean and standard deviation
             along both row and column axes
+
         y : Passthrough for ``Pipeline`` compatibility. Input is ignored.
         """
+        # Reset internal state before fitting
+        self._reset()
+        return self.partial_fit(X, y)
+
+    def partial_fit(self, X, y=None):
+        """Online computation of mean and std on X for later scaling."""
         X = check_array(
             X,
             accept_sparse=False,
@@ -187,14 +222,23 @@ def fit(self, X, y=None):
 
         self.n_rows_, self.n_cols_ = np.shape(X)
 
-        self.col_var_ = 0. if self.with_std else None
+        # Q: This doesnt seem to actually get used in the transform, only in
+        #    the inverse transform which it sounds like we should not support.
+
+        # initialize variables on first pass
+        if self._initial_state():
+            self.col_mean_ = 0.
+            self.col_var_ = 0. if self.with_std else None
+            self.n_cols_seen_ = 0
+            self.row_mean_ = 0.
+            self.row_var_ = 0. if self.with_std else None
+            self.n_rows_seen_ = 0
+
         self.col_mean_, self.col_var_, self.n_rows_seen_ = _incremental_mean_and_var(
-            X, 0.0, self.col_var_, 0
+            X, self.col_mean_, self.col_var_, self.n_rows_seen_
         )
-
-        self.row_var_ = 0. if self.with_std else None
         self.row_mean_, self.row_var_, self.n_cols_seen_ = _incremental_mean_and_var(
-            X.T, 0.0, self.row_var_, 0
+            X.T, self.row_mean_, self.row_var_, self.n_cols_seen_
         )
 
         self.row_scale_ = None
@@ -211,6 +255,8 @@ def transform(self, X, copy=False):
         ----------
         X : array-like, shape [n_rows, n_cols]
             The data used to scale along the features axis.
+        copy : bool, optional (default: False)
+            Copy the input X or not.
         """
         check_is_fitted(self, "row_scale_")
         X = check_array(
@@ -230,16 +276,21 @@ def transform(self, X, copy=False):
                 "Input is sparse: Algorithm for sparse matrices currently not supported."
             )
 
-        return two_way_standardize(X)
+        return two_way_standardize(
+            X, with_mean=self.with_mean, with_std=self.with_std, copy=self.copy
+        )
 
     def inverse_transform(self, X, copy=False):
         """Scale back the data to the original representation
+
         Parameters
         ----------
         X : array-like, shape [n_samples, n_features]
             The data used to scale along the features axis.
+
         copy : bool, optional (default: False)
             Copy the input X or not.
+
         Returns
         -------
         X_tr : array-like, shape [n_samples, n_features]
@@ -253,6 +304,9 @@ def inverse_transform(self, X, copy=False):
 
         warnings.warn("Reversing two way transformation is not accurate.")
 
+        # Q: Should ^ be a warning or should we just rais here and delete the
+        #    rest of the code?
+
         if copy:
             X = X.copy()
 

From 7c370307252f9a333e9848f076735376a1d2662a Mon Sep 17 00:00:00 2001
From: Jaska <jlaska@gmail.com>
Date: Sun, 9 Sep 2018 15:39:23 -0700
Subject: [PATCH 27/31] Minor cleanup.

---
 inverse_covariance/two_way_standard_scaler.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py
index 176eb4a..0d2779f 100644
--- a/inverse_covariance/two_way_standard_scaler.py
+++ b/inverse_covariance/two_way_standard_scaler.py
@@ -200,7 +200,6 @@ def fit(self, X, y=None):
 
         y : Passthrough for ``Pipeline`` compatibility. Input is ignored.
         """
-        # Reset internal state before fitting
         self._reset()
         return self.partial_fit(X, y)
 
@@ -251,10 +250,12 @@ def partial_fit(self, X, y=None):
 
     def transform(self, X, copy=False):
         """Perform standardization by centering and scaling
+
         Parameters
         ----------
         X : array-like, shape [n_rows, n_cols]
             The data used to scale along the features axis.
+
         copy : bool, optional (default: False)
             Copy the input X or not.
         """

From 1757216631ad8f7b6eba009ca7e36f910c51fa6a Mon Sep 17 00:00:00 2001
From: Jaska <jlaska@gmail.com>
Date: Mon, 10 Sep 2018 08:00:28 -0700
Subject: [PATCH 28/31] Raise on inverse transform, remove code.

---
 inverse_covariance/two_way_standard_scaler.py | 33 +++----------------
 1 file changed, 5 insertions(+), 28 deletions(-)

diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py
index 0d2779f..0efadb8 100644
--- a/inverse_covariance/two_way_standard_scaler.py
+++ b/inverse_covariance/two_way_standard_scaler.py
@@ -126,10 +126,11 @@ class TwoWayStandardScaler(BaseEstimator, TransformerMixin):
 
     Attributes
     ----------
-    scale_ : ndarray, shape (n_features,)
+    row_scale_ : ndarray, shape (n_examples,)
+        Per feature relative scaling of the data.
+
+    col_scale_ : ndarray, shape (n_features,)
         Per feature relative scaling of the data.
-        .. versionadded:: 0.17
-           *scale_*
 
     row_mean_ : array of floats with shape [n_examples]
         The mean value for each feature in the training set.
@@ -303,28 +304,4 @@ def inverse_transform(self, X, copy=False):
                 "Input is sparse: Algorithm for sparse matrices currently not supported."
             )
 
-        warnings.warn("Reversing two way transformation is not accurate.")
-
-        # Q: Should ^ be a warning or should we just rais here and delete the
-        #    rest of the code?
-
-        if copy:
-            X = X.copy()
-
-        X = X.T
-
-        if self.with_std:
-            X *= self.row_scale_
-
-        if self.with_mean:
-            X += self.row_mean_
-
-        X = X.T
-
-        if self.with_std:
-            X *= self.col_scale_
-
-        if self.with_mean:
-            X += self.col_mean_
-
-        return X
+        raise NotImplementedError("Reversing two way transformation is not accurate.")

From 17806e85f69ee23048c2608f49f263cef3ca100c Mon Sep 17 00:00:00 2001
From: Jaska <jlaska@gmail.com>
Date: Mon, 10 Sep 2018 13:14:58 -0700
Subject: [PATCH 29/31] Remove unneeded check.

---
 inverse_covariance/two_way_standard_scaler.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py
index 0efadb8..7321df0 100644
--- a/inverse_covariance/two_way_standard_scaler.py
+++ b/inverse_covariance/two_way_standard_scaler.py
@@ -298,7 +298,6 @@ def inverse_transform(self, X, copy=False):
         X_tr : array-like, shape [n_samples, n_features]
             Transformed array.
         """
-        check_is_fitted(self, "row_scale_")
         if sparse.issparse(X):
             raise NotImplementedError(
                 "Input is sparse: Algorithm for sparse matrices currently not supported."

From f1f682e7842fcab4d73dc21cda4eb11d2b65bdee Mon Sep 17 00:00:00 2001
From: Jaska <jlaska@gmail.com>
Date: Mon, 10 Sep 2018 13:28:34 -0700
Subject: [PATCH 30/31] Remove redundant raise.

---
 inverse_covariance/two_way_standard_scaler.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py
index 7321df0..7969d85 100644
--- a/inverse_covariance/two_way_standard_scaler.py
+++ b/inverse_covariance/two_way_standard_scaler.py
@@ -298,9 +298,4 @@ def inverse_transform(self, X, copy=False):
         X_tr : array-like, shape [n_samples, n_features]
             Transformed array.
         """
-        if sparse.issparse(X):
-            raise NotImplementedError(
-                "Input is sparse: Algorithm for sparse matrices currently not supported."
-            )
-
         raise NotImplementedError("Reversing two way transformation is not accurate.")

From eb8c54b7fc6e66aeeac0f61caedb2f36be390881 Mon Sep 17 00:00:00 2001
From: Jaska <jlaska@gmail.com>
Date: Mon, 10 Sep 2018 13:29:13 -0700
Subject: [PATCH 31/31] Remove unneeded comments.

---
 inverse_covariance/two_way_standard_scaler.py | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/inverse_covariance/two_way_standard_scaler.py b/inverse_covariance/two_way_standard_scaler.py
index 7969d85..93412ff 100644
--- a/inverse_covariance/two_way_standard_scaler.py
+++ b/inverse_covariance/two_way_standard_scaler.py
@@ -283,19 +283,5 @@ def transform(self, X, copy=False):
         )
 
     def inverse_transform(self, X, copy=False):
-        """Scale back the data to the original representation
-
-        Parameters
-        ----------
-        X : array-like, shape [n_samples, n_features]
-            The data used to scale along the features axis.
-
-        copy : bool, optional (default: False)
-            Copy the input X or not.
-
-        Returns
-        -------
-        X_tr : array-like, shape [n_samples, n_features]
-            Transformed array.
-        """
+        """Scale back the data to the original representation."""
         raise NotImplementedError("Reversing two way transformation is not accurate.")