From d0a31851f499fef5d61db155e35e01c998bc51d9 Mon Sep 17 00:00:00 2001 From: Sebastin Santy Date: Thu, 13 Jul 2017 21:07:25 +0530 Subject: [PATCH 01/86] [MRG+1] AffinityPropagation damping factor not explained (#9335) * AffinityPropagation damping factor not explained * Added for API also * Add equation for damping * formatting text * Add suggestions --- doc/modules/clustering.rst | 11 ++++++++++- sklearn/cluster/affinity_propagation_.py | 6 +++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index f7977845a8ce2..7189474752005 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -301,7 +301,9 @@ is given. Affinity Propagation can be interesting as it chooses the number of clusters based on the data provided. For this purpose, the two important parameters are the *preference*, which controls how many exemplars are -used, and the *damping factor*. +used, and the *damping factor* which damps the responsibility and +availability messages to avoid numerical oscillations when updating these +messages. The main drawback of Affinity Propagation is its complexity. The algorithm has a time complexity of the order :math:`O(N^2 T)`, where :math:`N` @@ -350,6 +352,13 @@ to be the exemplar of sample :math:`i` is given by: To begin with, all values for :math:`r` and :math:`a` are set to zero, and the calculation of each iterates until convergence. +As discussed above, in order to avoid numerical oscillations when updating the +messages, the damping factor :math:`\lambda` is introduced to iteration process: + +.. math:: r_{t+1}(i, k) = \lambda\cdot r_{t}(i, k) + (1-\lambda)\cdot r_{t+1}(i, k) +.. math:: a_{t+1}(i, k) = \lambda\cdot a_{t}(i, k) + (1-\lambda)\cdot a_{t+1}(i, k) + +where :math:`t` indicates the iteration times. .. _mean_shift: diff --git a/sklearn/cluster/affinity_propagation_.py b/sklearn/cluster/affinity_propagation_.py index 398529793880f..8bf94cee95cda 100644 --- a/sklearn/cluster/affinity_propagation_.py +++ b/sklearn/cluster/affinity_propagation_.py @@ -197,7 +197,11 @@ class AffinityPropagation(BaseEstimator, ClusterMixin): Parameters ---------- damping : float, optional, default: 0.5 - Damping factor between 0.5 and 1. + Damping factor (between 0.5 and 1) is the extent to + which the current value is maintained relative to + incoming values (weighted 1 - damping). This in order + to avoid numerical oscillations when updating these + values (messages). max_iter : int, optional, default: 200 Maximum number of iterations. From 2cc0673f118fdf1afb4708d96e6c51cb1533549a Mon Sep 17 00:00:00 2001 From: Sebastin Santy Date: Thu, 13 Jul 2017 21:13:02 +0530 Subject: [PATCH 02/86] covariance.graph_lasso does not pass eps to linear_model.lars_path (#9346) --- sklearn/covariance/graph_lasso_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py index 3345f5193e598..2cae73de9b6c2 100644 --- a/sklearn/covariance/graph_lasso_.py +++ b/sklearn/covariance/graph_lasso_.py @@ -221,7 +221,7 @@ def graph_lasso(emp_cov, alpha, cov_init=None, mode='cd', tol=1e-4, _, _, coefs = lars_path( sub_covariance, row, Xy=row, Gram=sub_covariance, alpha_min=alpha / (n_features - 1), copy_Gram=True, - method='lars', return_path=False) + eps=eps, method='lars', return_path=False) # Update the precision matrix precision_[idx, idx] = ( 1. / (covariance_[idx, idx] From 66ef768c60364439bad06b2166bf3a10b1d9a433 Mon Sep 17 00:00:00 2001 From: pravarmahajan Date: Thu, 13 Jul 2017 12:13:26 -0700 Subject: [PATCH 03/86] [MRG+1] Deprecating the use of size_threshold parameter in manhattan_distances (#9295) * deprecating size_threshold in manhattan_distances * fixing a minor pep8 error * version number for deprecation --- sklearn/metrics/pairwise.py | 7 ++++++- sklearn/metrics/tests/test_pairwise.py | 9 +++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 4e82328f6fc53..0fa3ad793524a 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -11,6 +11,7 @@ import itertools from functools import partial +import warnings import numpy as np from scipy.spatial import distance @@ -467,7 +468,7 @@ def pairwise_distances_argmin(X, Y, axis=1, metric="euclidean", def manhattan_distances(X, Y=None, sum_over_features=True, - size_threshold=5e8): + size_threshold=None): """ Compute the L1 distances between the vectors in X and Y. With sum_over_features equal to False it returns the componentwise @@ -520,6 +521,10 @@ def manhattan_distances(X, Y=None, sum_over_features=True, array([[ 1., 1.], [ 1., 1.]]...) """ + if size_threshold is not None: + warnings.warn('Use of the "size_threshold" is deprecated ' + 'in 0.19 and it will be removed version ' + '0.21 of scikit-learn', DeprecationWarning) X, Y = check_pairwise_arrays(X, Y) if issparse(X) or issparse(Y): diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py index d8b64b58ca481..242523034e7af 100644 --- a/sklearn/metrics/tests/test_pairwise.py +++ b/sklearn/metrics/tests/test_pairwise.py @@ -12,6 +12,7 @@ from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regexp from sklearn.utils.testing import assert_true +from sklearn.utils.testing import assert_warns from sklearn.utils.testing import ignore_warnings from sklearn.externals.six import iteritems @@ -74,10 +75,10 @@ def test_pairwise_distances(): assert_equal(S.shape[0], X.shape[0]) assert_equal(S.shape[1], Y.shape[0]) assert_array_almost_equal(S, S2) - # Low-level function for manhattan can divide in blocks to avoid - # using too much memory during the broadcasting - S3 = manhattan_distances(X, Y, size_threshold=10) - assert_array_almost_equal(S, S3) + # Using size_threshold argument should raise + # a deprecation warning + assert_warns(DeprecationWarning, + manhattan_distances, X, Y, size_threshold=10) # Test cosine as a string metric versus cosine callable # The string "cosine" uses sklearn.metric, # while the function cosine is scipy.spatial From 981af69a383e63664fc0d578410b9933588fbace Mon Sep 17 00:00:00 2001 From: Sebastin Santy Date: Fri, 14 Jul 2017 03:18:04 +0530 Subject: [PATCH 04/86] [MRG + 1] Too few arguments in formatting call (#9298) * Too few arguments in formatting call * Add test * Covered with tests --- sklearn/ensemble/bagging.py | 4 ++-- sklearn/ensemble/tests/test_bagging.py | 8 ++++++++ sklearn/multiclass.py | 2 +- sklearn/tests/test_multiclass.py | 3 +++ 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py index cc7e1b95e89b3..7ea3030bdf120 100644 --- a/sklearn/ensemble/bagging.py +++ b/sklearn/ensemble/bagging.py @@ -773,8 +773,8 @@ def decision_function(self, X): if self.n_features_ != X.shape[1]: raise ValueError("Number of features of the model must " - "match the input. Model n_features is {1} and " - "input n_features is {2} " + "match the input. Model n_features is {0} and " + "input n_features is {1} " "".format(self.n_features_, X.shape[1])) # Parallel loop diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py index c0a46d6c15036..e71462daa3a14 100644 --- a/sklearn/ensemble/tests/test_bagging.py +++ b/sklearn/ensemble/tests/test_bagging.py @@ -19,6 +19,7 @@ from sklearn.utils.testing import assert_false from sklearn.utils.testing import assert_warns from sklearn.utils.testing import assert_warns_message +from sklearn.utils.testing import assert_raise_message from sklearn.dummy import DummyClassifier, DummyRegressor from sklearn.model_selection import GridSearchCV, ParameterGrid @@ -449,6 +450,13 @@ def test_parallel_classification(): decisions2 = ensemble.decision_function(X_test) assert_array_almost_equal(decisions1, decisions2) + X_err = np.hstack((X_test, np.zeros((X_test.shape[0], 1)))) + assert_raise_message(ValueError, "Number of features of the model " + "must match the input. Model n_features is {0} " + "and input n_features is {1} " + "".format(X_test.shape[1], X_err.shape[1]), + ensemble.decision_function, X_err) + ensemble = BaggingClassifier(SVC(decision_function_shape='ovr'), n_jobs=1, random_state=0).fit(X_train, y_train) diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py index 3ca3b1ad42a28..a8510cf0a0a85 100644 --- a/sklearn/multiclass.py +++ b/sklearn/multiclass.py @@ -721,7 +721,7 @@ def fit(self, X, y): """ X, y = check_X_y(X, y) if self.code_size <= 0: - raise ValueError("code_size should be greater than 0, got {1}" + raise ValueError("code_size should be greater than 0, got {0}" "".format(self.code_size)) _check_estimator(self.estimator) diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py index 7008fff41aaa1..45222a1c12a68 100644 --- a/sklearn/tests/test_multiclass.py +++ b/sklearn/tests/test_multiclass.py @@ -704,6 +704,9 @@ def test_ecoc_float_y(): ovo = OutputCodeClassifier(LinearSVC()) assert_raise_message(ValueError, "Unknown label type", ovo.fit, X, y) + ovo = OutputCodeClassifier(LinearSVC(), code_size=-1) + assert_raise_message(ValueError, "code_size should be greater than 0," + " got -1", ovo.fit, X, y) def test_pairwise_indices(): From 1a4e37c953fdf946bc014b7fd47bcf4cfd6a3db4 Mon Sep 17 00:00:00 2001 From: Minghui Liu Date: Sat, 15 Jul 2017 07:32:14 -0700 Subject: [PATCH 05/86] [MRG+1] supress deprecation warnings for non_negative option (#9356) * supress deprecation warnings for non_negative option * more non_negative option deprecation warnings * fix flake8 warning --- sklearn/feature_extraction/tests/test_feature_hasher.py | 1 + sklearn/feature_extraction/tests/test_text.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sklearn/feature_extraction/tests/test_feature_hasher.py b/sklearn/feature_extraction/tests/test_feature_hasher.py index 0204910607f32..d258625897e27 100644 --- a/sklearn/feature_extraction/tests/test_feature_hasher.py +++ b/sklearn/feature_extraction/tests/test_feature_hasher.py @@ -20,6 +20,7 @@ def test_feature_hasher_dicts(): assert_array_equal(X1.toarray(), X2.toarray()) +@ignore_warnings(category=DeprecationWarning) def test_feature_hasher_strings(): # mix byte and Unicode strings; note that "foo" is a duplicate in row 0 raw_X = [["foo", "bar", "baz", "foo".encode("ascii")], diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py index de6674646c981..11060007b8355 100644 --- a/sklearn/feature_extraction/tests/test_text.py +++ b/sklearn/feature_extraction/tests/test_text.py @@ -28,7 +28,8 @@ assert_not_equal, assert_almost_equal, assert_in, assert_less, assert_greater, assert_warns_message, assert_raise_message, - clean_warning_registry, SkipTest) + clean_warning_registry, ignore_warnings, + SkipTest) from collections import defaultdict, Mapping from functools import partial @@ -480,6 +481,7 @@ def test_tfidf_vectorizer_setters(): assert_true(tv._tfidf.sublinear_tf) +@ignore_warnings(category=DeprecationWarning) def test_hashing_vectorizer(): v = HashingVectorizer() X = v.transform(ALL_FOOD_DOCS) @@ -651,6 +653,7 @@ def test_count_binary_occurrences(): assert_equal(X_sparse.dtype, np.float32) +@ignore_warnings(category=DeprecationWarning) def test_hashed_binary_occurrences(): # by default multiple occurrences are counted as longs test_data = ['aaabc', 'abbde'] @@ -784,6 +787,7 @@ def test_vectorizer_pipeline_cross_validation(): assert_array_equal(cv_scores, [1., 1., 1.]) +@ignore_warnings(category=DeprecationWarning) def test_vectorizer_unicode(): # tests that the count vectorizer works with cyrillic. document = ( From bc40aae850c32177b452f6a4eccc20a05bfdd3d4 Mon Sep 17 00:00:00 2001 From: Melanie Goetz Date: Sat, 15 Jul 2017 16:31:20 -0500 Subject: [PATCH 06/86] Adding note to the docstring that BayesianGaussianMixture parameter weight_concentration_prior is commonly called gamma in the literature, per https://github.com/scikit-learn/scikit-learn/issues/8631 (#9371) [MRG+2] BayesianGaussianMixture docstring change: weight_concentration_prior is commonly called gamma --- sklearn/mixture/bayesian_mixture.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/mixture/bayesian_mixture.py b/sklearn/mixture/bayesian_mixture.py index 51c57c7c475a1..642c0aade30d0 100644 --- a/sklearn/mixture/bayesian_mixture.py +++ b/sklearn/mixture/bayesian_mixture.py @@ -131,7 +131,8 @@ class BayesianGaussianMixture(BaseMixture): weight_concentration_prior : float | None, optional. The dirichlet concentration of each component on the weight - distribution (Dirichlet). The higher concentration puts more mass in + distribution (Dirichlet). This is commonly called gamma in the + literature. The higher concentration puts more mass in the center and will lead to more components being active, while a lower concentration parameter will lead to more mass at the edge of the mixture weights simplex. The value of the parameter must be greater From 7330bde29431032707a28015459208b1170ce172 Mon Sep 17 00:00:00 2001 From: Vlad Niculae Date: Sat, 15 Jul 2017 18:26:52 -0400 Subject: [PATCH 07/86] [MRG + 1] DOC developer quality of life notes (#9082) * DOC developer quality of life notes * rename debugging.rst to tips.rst * more tips, better internal consistency --- doc/developers/debugging.rst | 51 --------------- doc/developers/index.rst | 2 +- doc/developers/tips.rst | 119 +++++++++++++++++++++++++++++++++++ 3 files changed, 120 insertions(+), 52 deletions(-) delete mode 100644 doc/developers/debugging.rst create mode 100644 doc/developers/tips.rst diff --git a/doc/developers/debugging.rst b/doc/developers/debugging.rst deleted file mode 100644 index f3e28110f1da8..0000000000000 --- a/doc/developers/debugging.rst +++ /dev/null @@ -1,51 +0,0 @@ -.. _developers-debugging: - -============================== -Developers' Tips for Debugging -============================== - -Memory errors: debugging Cython with valgrind -============================================= - -While python/numpy's built-in memory management is relatively robust, it can -lead to performance penalties for some routines. For this reason, much of -the high-performance code in scikit-learn in written in cython. This -performance gain comes with a tradeoff, however: it is very easy for memory -bugs to crop up in cython code, especially in situations where that code -relies heavily on pointer arithmetic. - -Memory errors can manifest themselves a number of ways. The easiest ones to -debug are often segmentation faults and related glibc errors. Uninitialized -variables can lead to unexpected behavior that is difficult to track down. -A very useful tool when debugging these sorts of errors is -valgrind_. - - -Valgrind is a command-line tool that can trace memory errors in a variety of -code. Follow these steps: - - 1. Install `valgrind`_ on your system. - - 2. Download the python valgrind suppression file: `valgrind-python.supp`_. - - 3. Follow the directions in the `README.valgrind`_ file to customize your - python suppressions. If you don't, you will have spurious output coming - related to the python interpreter instead of your own code. - - 4. Run valgrind as follows:: - - $> valgrind -v --suppressions=valgrind-python.supp python my_test_script.py - -.. _valgrind: http://valgrind.org -.. _`README.valgrind`: http://svn.python.org/projects/python/trunk/Misc/README.valgrind -.. _`valgrind-python.supp`: http://svn.python.org/projects/python/trunk/Misc/valgrind-python.supp - - -The result will be a list of all the memory-related errors, which reference -lines in the C-code generated by cython from your .pyx file. If you examine -the referenced lines in the .c file, you will see comments which indicate the -corresponding location in your .pyx source file. Hopefully the output will -give you clues as to the source of your memory error. - -For more information on valgrind and the array of options it has, see the -tutorials and documentation on the `valgrind web site `_. diff --git a/doc/developers/index.rst b/doc/developers/index.rst index 5ac2d4f202bb6..4463bf50d8b50 100644 --- a/doc/developers/index.rst +++ b/doc/developers/index.rst @@ -10,7 +10,7 @@ Developer's Guide .. toctree:: contributing - debugging + tips utilities performance advanced_installation diff --git a/doc/developers/tips.rst b/doc/developers/tips.rst new file mode 100644 index 0000000000000..bbf46965d379c --- /dev/null +++ b/doc/developers/tips.rst @@ -0,0 +1,119 @@ +.. _developers-tips: + +=========================== +Developers' Tips and Tricks +=========================== + +Productivity and sanity-preserving tips +======================================= + +In this section we gather some useful advice and tools that may increase your +quality-of-life when reviewing pull requests, running unit tests, and so forth. +Some of these tricks consist of userscripts that require a browser extension +such as `TamperMonkey`_ or `GreaseMonkey`_; to set up userscripts you must have +one of these extensions installed, enabled and running. We provide userscripts +as GitHub gists; to install them, click on the "Raw" button on the gist page. + +.. _TamperMonkey: https://tampermonkey.net +.. _GreaseMonkey: http://www.greasespot.net + +Viewing the rendered HTML documentation for a pull request +---------------------------------------------------------- + +We use CircleCI to build the HTML documentation for every pull request. To +access that documentation, we provide a redirect as described in the +:ref:`documentation section of the contributor guide +`. Instead of typing the address by hand, we provide a +`userscript `_ +that adds a button to every PR. After installing the userscript, navigate to any +GitHub PR; a new button labeled "See CircleCI doc for this PR" should appear in +the top-right area. + +Folding and unfolding outdated diffs on pull requests +----------------------------------------------------- + +GitHub hides discussions on PRs when the corresponding lines of code have been +changed in the mean while. This `userscript +`_ provides a button to +unfold all such hidden discussions at once, so you can catch up. + +Checking out pull requests as remote-tracking branches +------------------------------------------------------ + +In your local fork, add to your ``.git/config``, under the ``[remote +"upstream"]`` heading, the line:: + + fetch = +refs/pull/*/head:refs/remotes/upstream/pr/* + +You may then use ``git checkout pr/PR_NUMBER`` to navigate to the code of the +pull-request with the given number. (`Read more in this gist. +`_) + +Display code coverage in pull requests +-------------------------------------- + +To overlay the code coverage reports generated by the CodeCov continuous +integration, consider `this browser extension +`_. The coverage of each line +will be displayed as a color background behind the line number. + +Useful pytest aliases and flags +------------------------------- + +We recommend using pytest to run unit tests. When a unit tests fail, the +following tricks can make debugging easier: + + 1. The command line argument ``pytest -l`` instructs pytest to print the local + variables when a failure occurs. + + 2. The argument ``pytest --pdb`` drops into the Python debugger on failure. To + instead drop into the rich IPython debugger ``ipdb``, you may set up a + shell alias to:: + + pytest --pdbcls=IPython.terminal.debugger:TerminalPdb --capture no + +Debugging memory errors in Cython with valgrind +=============================================== + +While python/numpy's built-in memory management is relatively robust, it can +lead to performance penalties for some routines. For this reason, much of +the high-performance code in scikit-learn in written in cython. This +performance gain comes with a tradeoff, however: it is very easy for memory +bugs to crop up in cython code, especially in situations where that code +relies heavily on pointer arithmetic. + +Memory errors can manifest themselves a number of ways. The easiest ones to +debug are often segmentation faults and related glibc errors. Uninitialized +variables can lead to unexpected behavior that is difficult to track down. +A very useful tool when debugging these sorts of errors is +valgrind_. + + +Valgrind is a command-line tool that can trace memory errors in a variety of +code. Follow these steps: + + 1. Install `valgrind`_ on your system. + + 2. Download the python valgrind suppression file: `valgrind-python.supp`_. + + 3. Follow the directions in the `README.valgrind`_ file to customize your + python suppressions. If you don't, you will have spurious output coming + related to the python interpreter instead of your own code. + + 4. Run valgrind as follows:: + + $> valgrind -v --suppressions=valgrind-python.supp python my_test_script.py + +.. _valgrind: http://valgrind.org +.. _`README.valgrind`: http://svn.python.org/projects/python/trunk/Misc/README.valgrind +.. _`valgrind-python.supp`: http://svn.python.org/projects/python/trunk/Misc/valgrind-python.supp + + +The result will be a list of all the memory-related errors, which reference +lines in the C-code generated by cython from your .pyx file. If you examine +the referenced lines in the .c file, you will see comments which indicate the +corresponding location in your .pyx source file. Hopefully the output will +give you clues as to the source of your memory error. + +For more information on valgrind and the array of options it has, see the +tutorials and documentation on the `valgrind web site `_. From f91d2261d315f8a37104bc167dea1bb667e96b83 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Sat, 15 Jul 2017 18:09:06 -0500 Subject: [PATCH 08/86] minor sphinx fixes (#9370) --- doc/modules/model_evaluation.rst | 4 ---- doc/modules/multiclass.rst | 5 +++-- doc/whats_new.rst | 4 ++-- sklearn/multioutput.py | 8 +++----- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 813a39339e848..d20716c528697 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -670,10 +670,6 @@ binary classification and multilabel indicator format. for an example of :func:`precision_recall_curve` usage to evaluate classifier output quality. - * See :ref:`sphx_glr_auto_examples_linear_model_plot_sparse_recovery.py` - for an example of :func:`precision_recall_curve` usage to select - features for sparse linear models. - Binary classification ^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/modules/multiclass.rst b/doc/modules/multiclass.rst index 5094372aca960..983fd416b5a05 100644 --- a/doc/modules/multiclass.rst +++ b/doc/modules/multiclass.rst @@ -353,7 +353,7 @@ Classifier Chain Classifier chains (see :class:`ClassifierChain`) are a way of combining a number of binary classifiers into a single multi-label model that is capable - of exploiting correlations among targets. +of exploiting correlations among targets. For a multi-label classification problem with N classes, N binary classifiers are assigned an integer between 0 and N-1. These integers @@ -373,5 +373,6 @@ typically many randomly ordered chains are fit and their predictions are averaged together. .. topic:: References: + Jesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, - "Classifier Chains for Multi-label Classification", 2009. \ No newline at end of file + "Classifier Chains for Multi-label Classification", 2009. diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 9e89422fde331..57b331cab8700 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -4348,7 +4348,7 @@ Highlights - :ref:`out_of_bag` of generalization error for :ref:`ensemble` by `Andreas Müller`_. - - :ref:`randomized_l1`: Randomized sparse linear models for feature + - Randomized sparse linear models for feature selection, by `Alexandre Gramfort`_ and `Gael Varoquaux`_ - :ref:`label_propagation` for semi-supervised learning, by Clay @@ -4809,7 +4809,7 @@ Changelog `Mathieu Blondel`_ and `Lars Buitinck`_ - Documentation improvements: thumbnails in - :ref:`example gallery ` by `Fabian Pedregosa`_. + example gallery by `Fabian Pedregosa`_. - Important bugfixes in :ref:`svm` module (segfaults, bad performance) by `Fabian Pedregosa`_. diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 6906d95869f2b..a84a6ce36b218 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -14,8 +14,6 @@ # # License: BSD 3 clause -from abc import ABCMeta - import numpy as np import scipy.sparse as sp from abc import ABCMeta, abstractmethod @@ -309,7 +307,7 @@ class MultiOutputClassifier(MultiOutputEstimator, ClassifierMixin): Attributes ---------- - estimators_ : list of `n_output` estimators + estimators_ : list of ``n_output`` estimators Estimators used for predictions. """ @@ -420,7 +418,7 @@ class ClassifierChain(BaseEstimator): Attributes ---------- classes_ : list - A list of arrays of length len(estimators_) containing the + A list of arrays of length ``len(estimators_)`` containing the class labels for each estimator in the chain. estimators_ : list @@ -456,7 +454,7 @@ def fit(self, X, Y): self : object Returns self. """ - X, Y = check_X_y(X, Y, multi_output=True, accept_sparse=True) + X, Y = check_X_y(X, Y, multi_output=True, accept_sparse=True) random_state = check_random_state(self.random_state) check_array(X, accept_sparse=True) From 8282e4a46b864449d6639e09d3beeda9dfcddecc Mon Sep 17 00:00:00 2001 From: Dmitry Petrov Date: Sat, 15 Jul 2017 23:54:24 -0500 Subject: [PATCH 09/86] [MRG+3] Added examples to RandomForestClassifier and RandomForestRegressor (#9368) * added examples to RandomForestClassifier and RandomForestRegressor * changed example for RandomForestClassifier using make_classification * changed example for RandomForestRegressor using make_regression * made more clear which features are important in examples --- sklearn/ensemble/forest.py | 41 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index 51792383eb0cb..53538866be1fc 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -922,6 +922,27 @@ class labels (multi-output problem). was never left out during the bootstrap. In this case, `oob_decision_function_` might contain NaN. + Examples + -------- + >>> from sklearn.ensemble import RandomForestClassifier + >>> from sklearn.datasets import make_classification + >>> + >>> X, y = make_classification(n_samples=1000, n_features=4, + ... n_informative=2, n_redundant=0, + ... random_state=0, shuffle=False) + >>> clf = RandomForestClassifier(max_depth=2, random_state=0) + >>> clf.fit(X, y) + RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', + max_depth=2, max_features='auto', max_leaf_nodes=None, + min_impurity_decrease=0.0, min_impurity_split=None, + min_samples_leaf=1, min_samples_split=2, + min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1, + oob_score=False, random_state=0, verbose=0, warm_start=False) + >>> print(clf.feature_importances_) + [ 0.17287856 0.80608704 0.01884792 0.00218648] + >>> print(clf.predict([[0, 0, 0, 0]])) + [1] + Notes ----- The default values for the parameters controlling the size of the trees @@ -1142,6 +1163,26 @@ class RandomForestRegressor(ForestRegressor): oob_prediction_ : array of shape = [n_samples] Prediction computed with out-of-bag estimate on the training set. + Examples + -------- + >>> from sklearn.ensemble import RandomForestRegressor + >>> from sklearn.datasets import make_regression + >>> + >>> X, y = make_regression(n_features=4, n_informative=2, + ... random_state=0, shuffle=False) + >>> regr = RandomForestRegressor(max_depth=2, random_state=0) + >>> regr.fit(X, y) + RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=2, + max_features='auto', max_leaf_nodes=None, + min_impurity_decrease=0.0, min_impurity_split=None, + min_samples_leaf=1, min_samples_split=2, + min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1, + oob_score=False, random_state=0, verbose=0, warm_start=False) + >>> print(regr.feature_importances_) + [ 0.17339552 0.81594114 0. 0.01066333] + >>> print(regr.predict([[0, 0, 0, 0]])) + [-2.50699856] + Notes ----- The default values for the parameters controlling the size of the trees From 8d6439b03b56eb81e5a988e46eff6921f0c5aec6 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Mon, 17 Jul 2017 11:27:02 +1000 Subject: [PATCH 10/86] DOC markup fixes and grammar --- doc/modules/cross_validation.rst | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index ab7d2227447b1..a242cf9ab62bb 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -275,7 +275,7 @@ validation strategies. Cross-validation iterators for i.i.d. data ========================================== -Assuming that some data is Independent Identically Distributed (i.i.d.) is +Assuming that some data is Independent and Identically Distributed (i.i.d.) is making the assumption that all samples stem from the same generative process and that the generative process is assumed to have no memory of past generated samples. @@ -287,10 +287,10 @@ The following cross-validators can be used in such cases. While i.i.d. data is a common assumption in machine learning theory, it rarely holds in practice. If one knows that the samples have been generated using a time-dependent process, it's safer to -use a `time-series aware cross-validation scheme ` +use a :ref:`time-series aware cross-validation scheme ` Similarly if we know that the generative process has a group structure (samples from collected from different subjects, experiments, measurement -devices) it safer to use `group-wise cross-validation `. +devices) it safer to use :ref:`group-wise cross-validation `. K-fold @@ -613,8 +613,6 @@ Example of Leave-2-Group Out:: Group Shuffle Split ------------------- -:class:`GroupShuffleSplit` - The :class:`GroupShuffleSplit` iterator behaves as a combination of :class:`ShuffleSplit` and :class:`LeavePGroupsOut`, and generates a sequence of randomized partitions in which a subset of groups are held From cb359807c0a86f4635a2f318e22363a402c32550 Mon Sep 17 00:00:00 2001 From: Dmitry Petrov Date: Sun, 16 Jul 2017 07:18:47 -0500 Subject: [PATCH 11/86] added examples to docstrings of PassiveAgressiveClassifier and PassiveAggresiveRegressor (#9373) --- sklearn/linear_model/passive_aggressive.py | 38 ++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/sklearn/linear_model/passive_aggressive.py b/sklearn/linear_model/passive_aggressive.py index 183049e4fdb55..a82b1c12ffdb6 100644 --- a/sklearn/linear_model/passive_aggressive.py +++ b/sklearn/linear_model/passive_aggressive.py @@ -105,6 +105,25 @@ class PassiveAggressiveClassifier(BaseSGDClassifier): The actual number of iterations to reach the stopping criterion. For multiclass fits, it is the maximum over every binary fit. + Examples + -------- + >>> from sklearn.linear_model import PassiveAggressiveClassifier + >>> from sklearn.datasets import make_classification + >>> + >>> X, y = make_classification(n_features=4, random_state=0) + >>> clf = PassiveAggressiveClassifier(random_state=0) + >>> clf.fit(X, y) + PassiveAggressiveClassifier(C=1.0, average=False, class_weight=None, + fit_intercept=True, loss='hinge', max_iter=5, n_iter=None, + n_jobs=1, random_state=0, shuffle=True, tol=None, verbose=0, + warm_start=False) + >>> print(clf.coef_) + [[ 0.49324685 1.0552176 1.49519589 1.33798314]] + >>> print(clf.intercept_) + [ 2.18438388] + >>> print(clf.predict([[0, 0, 0, 0]])) + [1] + See also -------- @@ -291,6 +310,25 @@ class PassiveAggressiveRegressor(BaseSGDRegressor): n_iter_ : int The actual number of iterations to reach the stopping criterion. + Examples + -------- + >>> from sklearn.linear_model import PassiveAggressiveRegressor + >>> from sklearn.datasets import make_regression + >>> + >>> X, y = make_regression(n_features=4, random_state=0) + >>> regr = PassiveAggressiveRegressor(random_state=0) + >>> regr.fit(X, y) + PassiveAggressiveRegressor(C=1.0, average=False, epsilon=0.1, + fit_intercept=True, loss='epsilon_insensitive', max_iter=5, + n_iter=None, random_state=0, shuffle=True, tol=None, + verbose=0, warm_start=False) + >>> print(regr.coef_) + [ 20.48736655 34.18818427 67.59122734 87.94731329] + >>> print(regr.intercept_) + [-0.02306214] + >>> print(regr.predict([[0, 0, 0, 0]])) + [-0.02306214] + See also -------- From 807b5267fefa6a71dd93966dacc82980b905436e Mon Sep 17 00:00:00 2001 From: Dmitry Petrov Date: Sun, 16 Jul 2017 07:19:35 -0500 Subject: [PATCH 12/86] added examples to docstrings of LinearSVC and LinearSVR (#9375) --- sklearn/svm/classes.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py index 4833042827361..7c6642a504ad1 100644 --- a/sklearn/svm/classes.py +++ b/sklearn/svm/classes.py @@ -108,6 +108,24 @@ class LinearSVC(BaseEstimator, LinearClassifierMixin, intercept_ : array, shape = [1] if n_classes == 2 else [n_classes] Constants in decision function. + Examples + -------- + >>> from sklearn.svm import LinearSVC + >>> from sklearn.datasets import make_classification + >>> X, y = make_classification(n_features=4, random_state=0) + >>> clf = LinearSVC(random_state=0) + >>> clf.fit(X, y) + LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True, + intercept_scaling=1, loss='squared_hinge', max_iter=1000, + multi_class='ovr', penalty='l2', random_state=0, tol=0.0001, + verbose=0) + >>> print(clf.coef_) + [[ 0.08551385 0.39414796 0.49847831 0.37513797]] + >>> print(clf.intercept_) + [ 0.28418066] + >>> print(clf.predict([[0, 0, 0, 0]])) + [1] + Notes ----- The underlying C implementation uses a random number generator to @@ -302,6 +320,22 @@ class LinearSVR(LinearModel, RegressorMixin): intercept_ : array, shape = [1] if n_classes == 2 else [n_classes] Constants in decision function. + Examples + -------- + >>> from sklearn.svm import LinearSVR + >>> from sklearn.datasets import make_regression + >>> X, y = make_regression(n_features=4, random_state=0) + >>> regr = LinearSVR(random_state=0) + >>> regr.fit(X, y) + LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True, + intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000, + random_state=0, tol=0.0001, verbose=0) + >>> print(regr.coef_) + [ 16.35750999 26.91499923 42.30652207 60.47843124] + >>> print(regr.intercept_) + [-4.29756543] + >>> print(regr.predict([[0, 0, 0, 0]])) + [-4.29756543] See also -------- From a92fc7ac4097246f21df01c9651e18d57b7d4994 Mon Sep 17 00:00:00 2001 From: Balakumaran Manoharan Date: Sun, 16 Jul 2017 07:20:45 -0500 Subject: [PATCH 13/86] [MRG+1] copy not passed from linear_model/base.py:_pre_fit to _preprocess_data (#9347) * copy not passed from linear_model/base.py:_pre_fit to _preprocess_data in the sparse case * Pass copy as False for sparse matrix to _preprocess_data * Add comment with reason for copy=False --- sklearn/linear_model/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py index 2d003429815c9..08a18deef577a 100644 --- a/sklearn/linear_model/base.py +++ b/sklearn/linear_model/base.py @@ -520,10 +520,11 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy): n_samples, n_features = X.shape if sparse.isspmatrix(X): + # copy was not done as X is not modified inplace when X is sparse precompute = False X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, fit_intercept=fit_intercept, normalize=normalize, - return_mean=True) + copy=False, return_mean=True) else: # copy was done in fit if necessary X, y, X_offset, y_offset, X_scale = _preprocess_data( From b2fd6835e79708d783a819b581dee37f95e87dad Mon Sep 17 00:00:00 2001 From: Alexandre Gramfort Date: Sun, 16 Jul 2017 14:33:29 +0200 Subject: [PATCH 14/86] misc --- sklearn/linear_model/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py index 08a18deef577a..6bcdd624083e9 100644 --- a/sklearn/linear_model/base.py +++ b/sklearn/linear_model/base.py @@ -105,8 +105,8 @@ def sparse_center_data(X, y, fit_intercept, normalize=False): return X, y, X_offset, y_offset, X_std -@deprecated("center_data was deprecated in version 0.18 and will be removed in " - "0.20. Use utilities in preprocessing.data instead") +@deprecated("center_data was deprecated in version 0.18 and will be removed " + "in 0.20. Use utilities in preprocessing.data instead") def center_data(X, y, fit_intercept, normalize=False, copy=True, sample_weight=None): """ @@ -520,7 +520,7 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy): n_samples, n_features = X.shape if sparse.isspmatrix(X): - # copy was not done as X is not modified inplace when X is sparse + # copy is not needed here as X is not modified inplace when X is sparse precompute = False X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, fit_intercept=fit_intercept, normalize=normalize, From a19f0df0c4b570a7558d389ba953387cd76d723e Mon Sep 17 00:00:00 2001 From: "(Venkat) Raghav, Rajagopalan" Date: Sun, 16 Jul 2017 17:58:59 -0500 Subject: [PATCH 15/86] [MRG] Add few more tests + Documentation for re-entrant cross-validation estimators (#7823) * DOC Add NOTE that unless random_state is set, split will not be identical * TST use np.testing.assert_equal for nested lists/arrays * TST Make sure cv param can be a generator * DOC rank_ becomes a link when rendered * Use test_... * Remove blank line; Add if shuffle is True * Fix tests * Explicitly test for GeneratorType * TST Add the else clause * TST Add comment on usage of np.testing.assert_array_equal * TYPO * MNT Remove if ; * Address Joel's comments * merge the identical points in doc * DOC address Andy's comments * Move comment to before the check for generator type --- doc/modules/cross_validation.rst | 3 +- sklearn/model_selection/_search.py | 2 +- sklearn/model_selection/_split.py | 41 +++++++++++++++++-- sklearn/model_selection/tests/test_search.py | 42 +++++++++++++++----- sklearn/model_selection/tests/test_split.py | 7 +++- 5 files changed, 76 insertions(+), 19 deletions(-) diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index a242cf9ab62bb..a43c5cf675cb8 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -723,8 +723,7 @@ to shuffle the data indices before splitting them. Note that: shuffling will be different every time ``KFold(..., shuffle=True)`` is iterated. However, ``GridSearchCV`` will use the same shuffling for each set of parameters validated by a single call to its ``fit`` method. -* To ensure results are repeatable (*on the same platform*), use a fixed value - for ``random_state``. +* To get identical results for each split, set ``random_state`` to an integer. Cross validation and model selection ==================================== diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 17c588c293eda..db41c19218fa7 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -924,7 +924,7 @@ class GridSearchCV(BaseSearchCV): For instance the below given table +------------+-----------+------------+-----------------+---+---------+ - |param_kernel|param_gamma|param_degree|split0_test_score|...|..rank...| + |param_kernel|param_gamma|param_degree|split0_test_score|...|rank_t...| +============+===========+============+=================+===+=========+ | 'poly' | -- | 2 | 0.8 |...| 2 | +------------+-----------+------------+-----------------+---+---------+ diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 4bcc0ae1c5349..386d439184117 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -83,6 +83,12 @@ def split(self, X, y=None, groups=None): test : ndarray The testing set indices for that split. + + Note + ---- + Randomized CV splitters may return different results for each call of + split. You can make the results identical by setting ``random_state`` + to an integer. """ X, y, groups = indexable(X, y, groups) indices = np.arange(_num_samples(X)) @@ -308,6 +314,12 @@ def split(self, X, y=None, groups=None): test : ndarray The testing set indices for that split. + + Note + ---- + Randomized CV splitters may return different results for each call of + split. You can make the results identical by setting ``random_state`` + to an integer. """ X, y, groups = indexable(X, y, groups) n_samples = _num_samples(X) @@ -567,10 +579,7 @@ def __init__(self, n_splits=3, shuffle=False, random_state=None): super(StratifiedKFold, self).__init__(n_splits, shuffle, random_state) def _make_test_folds(self, X, y=None): - if self.shuffle: - rng = check_random_state(self.random_state) - else: - rng = self.random_state + rng = self.random_state y = np.asarray(y) n_samples = y.shape[0] unique_y, y_inversed = np.unique(y, return_inverse=True) @@ -645,6 +654,12 @@ def split(self, X, y, groups=None): test : ndarray The testing set indices for that split. + + Note + ---- + Randomized CV splitters may return different results for each call of + split. You can make the results identical by setting ``random_state`` + to an integer. """ y = check_array(y, ensure_2d=False, dtype=None) return super(StratifiedKFold, self).split(X, y, groups) @@ -726,6 +741,12 @@ def split(self, X, y=None, groups=None): test : ndarray The testing set indices for that split. + + Note + ---- + Randomized CV splitters may return different results for each call of + split. You can make the results identical by setting ``random_state`` + to an integer. """ X, y, groups = indexable(X, y, groups) n_samples = _num_samples(X) @@ -1164,6 +1185,12 @@ def split(self, X, y=None, groups=None): test : ndarray The testing set indices for that split. + + Note + ---- + Randomized CV splitters may return different results for each call of + split. You can make the results identical by setting ``random_state`` + to an integer. """ X, y, groups = indexable(X, y, groups) for train, test in self._iter_indices(X, y, groups): @@ -1578,6 +1605,12 @@ def split(self, X, y, groups=None): test : ndarray The testing set indices for that split. + + Note + ---- + Randomized CV splitters may return different results for each call of + split. You can make the results identical by setting ``random_state`` + to an integer. """ y = check_array(y, ensure_2d=False, dtype=None) return super(StratifiedShuffleSplit, self).split(X, y, groups) diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index 9dfd49714ee08..5e667727d9dda 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -7,6 +7,7 @@ from itertools import chain, product import pickle import sys +from types import GeneratorType import re import numpy as np @@ -1070,16 +1071,10 @@ def test_search_cv_results_rank_tie_breaking(): cv_results['mean_test_score'][1]) assert_almost_equal(cv_results['mean_train_score'][0], cv_results['mean_train_score'][1]) - try: - assert_almost_equal(cv_results['mean_test_score'][1], - cv_results['mean_test_score'][2]) - except AssertionError: - pass - try: - assert_almost_equal(cv_results['mean_train_score'][1], - cv_results['mean_train_score'][2]) - except AssertionError: - pass + assert_false(np.allclose(cv_results['mean_test_score'][1], + cv_results['mean_test_score'][2])) + assert_false(np.allclose(cv_results['mean_train_score'][1], + cv_results['mean_train_score'][2])) # 'min' rank should be assigned to the tied candidates assert_almost_equal(search.cv_results_['rank_test_score'], [1, 1, 3]) @@ -1421,6 +1416,33 @@ def test_grid_search_cv_splits_consistency(): cv=KFold(n_splits=n_splits)) gs2.fit(X, y) + # Give generator as a cv parameter + assert_true(isinstance(KFold(n_splits=n_splits, + shuffle=True, random_state=0).split(X, y), + GeneratorType)) + gs3 = GridSearchCV(LinearSVC(random_state=0), + param_grid={'C': [0.1, 0.2, 0.3]}, + cv=KFold(n_splits=n_splits, shuffle=True, + random_state=0).split(X, y)) + gs3.fit(X, y) + + gs4 = GridSearchCV(LinearSVC(random_state=0), + param_grid={'C': [0.1, 0.2, 0.3]}, + cv=KFold(n_splits=n_splits, shuffle=True, + random_state=0)) + gs4.fit(X, y) + + def _pop_time_keys(cv_results): + for key in ('mean_fit_time', 'std_fit_time', + 'mean_score_time', 'std_score_time'): + cv_results.pop(key) + return cv_results + + # Check if generators are supported as cv and + # that the splits are consistent + np.testing.assert_equal(_pop_time_keys(gs3.cv_results_), + _pop_time_keys(gs4.cv_results_)) + # OneTimeSplitter is a non-re-entrant cv where split can be called only # once if ``cv.split`` is called once per param setting in GridSearchCV.fit # the 2nd and 3rd parameter will not be evaluated as no train/test indices diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py index b1bb44efe59c2..300bb8953efae 100644 --- a/sklearn/model_selection/tests/test_split.py +++ b/sklearn/model_selection/tests/test_split.py @@ -446,9 +446,11 @@ def test_shuffle_kfold_stratifiedkfold_reproducibility(): for cv in (kf, skf): for data in zip((X, X2), (y, y2)): + # Test if the two splits are different + # numpy's assert_equal properly compares nested lists try: - np.testing.assert_equal(list(cv.split(*data)), - list(cv.split(*data))) + np.testing.assert_array_equal(list(cv.split(*data)), + list(cv.split(*data))) except AssertionError: pass else: @@ -1188,6 +1190,7 @@ def test_cv_iterable_wrapper(): # results kf_randomized_iter = KFold(n_splits=5, shuffle=True).split(X, y) kf_randomized_iter_wrapped = check_cv(kf_randomized_iter) + # numpy's assert_array_equal properly compares nested lists np.testing.assert_equal(list(kf_randomized_iter_wrapped.split(X, y)), list(kf_randomized_iter_wrapped.split(X, y))) From 28f51235c8618214d3030392677da27a29a35557 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Mon, 17 Jul 2017 11:41:47 +1000 Subject: [PATCH 16/86] DOC Move some things around in related projects --- doc/related_projects.rst | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/doc/related_projects.rst b/doc/related_projects.rst index 877a6beeed60e..70971e934ccac 100644 --- a/doc/related_projects.rst +++ b/doc/related_projects.rst @@ -43,9 +43,6 @@ enhance the functionality of scikit-learn's estimators. **Experimentation frameworks** -- `PyMC `_ Bayesian statistical models and - fitting algorithms. - - `REP `_ Environment for conducting data-driven research in a consistent and reproducible way @@ -222,18 +219,19 @@ Other packages useful for data analysis and machine learning. statistical models. More focused on statistical tests and less on prediction than scikit-learn. +- `PyMC `_ Bayesian statistical models and + fitting algorithms. + - `Sacred `_ Tool to help you configure, organize, log and reproduce experiments -- `gensim `_ A library for topic modelling, - document indexing and similarity retrieval - - `Seaborn `_ Visualization library based on matplotlib. It provides a high-level interface for drawing attractive statistical graphics. - `Deep Learning `_ A curated list of deep learning software libraries. + Domain specific packages ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -243,6 +241,9 @@ Domain specific packages - `Natural language toolkit (nltk) `_ Natural language processing and some machine learning. +- `gensim `_ A library for topic modelling, + document indexing and similarity retrieval + - `NiLearn `_ Machine learning for neuro-imaging. - `AstroML `_ Machine learning for astronomy. From 4cce37441835a650019259932919ba6b9af5301a Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Mon, 17 Jul 2017 12:40:06 +1000 Subject: [PATCH 17/86] DOC Use - instead of * for bullets --- doc/whats_new.rst | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 57b331cab8700..3469811416e03 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -49,21 +49,21 @@ parameters, may produce different models from the previous version. This often occurs due to changes in the modelling logic (bug fixes or enhancements), or in random sampling procedures. - * :class:`cluster.KMeans` with sparse X and initial centroids given (bug fix) - * :class:`cross_decomposition.PLSRegression` + - :class:`cluster.KMeans` with sparse X and initial centroids given (bug fix) + - :class:`cross_decomposition.PLSRegression` with ``scale=True`` (bug fix) - * :class:`ensemble.GradientBoostingClassifier` and + - :class:`ensemble.GradientBoostingClassifier` and :class:`ensemble.GradientBoostingRegressor` where ``min_impurity_split`` is used (bug fix) - * gradient boosting ``loss='quantile'`` (bug fix) - * :class:`ensemble.IsolationForest` (bug fix) - * :class:`feature_selection.SelectFdr` (bug fix) - * :class:`linear_model.RANSACRegressor` (bug fix) - * :class:`linear_model.LassoLars` (bug fix) - * :class:`linear_model.LassoLarsIC` (bug fix) - * :class:`manifold.TSNE` (bug fix) - * :class:`semi_supervised.LabelSpreading` (bug fix) - * :class:`semi_supervised.LabelPropagation` (bug fix) - * tree based models where ``min_weight_fraction_leaf`` is used (enhancement) + - gradient boosting ``loss='quantile'`` (bug fix) + - :class:`ensemble.IsolationForest` (bug fix) + - :class:`feature_selection.SelectFdr` (bug fix) + - :class:`linear_model.RANSACRegressor` (bug fix) + - :class:`linear_model.LassoLars` (bug fix) + - :class:`linear_model.LassoLarsIC` (bug fix) + - :class:`manifold.TSNE` (bug fix) + - :class:`semi_supervised.LabelSpreading` (bug fix) + - :class:`semi_supervised.LabelPropagation` (bug fix) + - tree based models where ``min_weight_fraction_leaf`` is used (enhancement) Details are listed in the changelog below. From 5d0b93e3f04a87ff6d3842aa42c1b19872926c86 Mon Sep 17 00:00:00 2001 From: Taehoon Lee Date: Mon, 17 Jul 2017 14:51:46 +0900 Subject: [PATCH 18/86] DOC Fix typos (#9386) --- doc/tutorial/machine_learning_map/svg2imagemap.py | 2 +- examples/covariance/plot_covariance_estimation.py | 2 +- sklearn/mixture/gmm.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/tutorial/machine_learning_map/svg2imagemap.py b/doc/tutorial/machine_learning_map/svg2imagemap.py index c2e592d5232fb..c34bf17fab0ef 100644 --- a/doc/tutorial/machine_learning_map/svg2imagemap.py +++ b/doc/tutorial/machine_learning_map/svg2imagemap.py @@ -4,7 +4,7 @@ This script converts a subset of SVG into an HTML imagemap Note *subset*. It only handles elements, for which it only pays -attention to the M and L commands. Futher, it only notices the "translate" +attention to the M and L commands. Further, it only notices the "translate" transform. It was written to generate the examples in the documentation for maphilight, diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py index adb57f003cfbb..d33b77d68a438 100644 --- a/examples/covariance/plot_covariance_estimation.py +++ b/examples/covariance/plot_covariance_estimation.py @@ -98,7 +98,7 @@ # Plot results fig = plt.figure() plt.title("Regularized covariance: likelihood and shrinkage coefficient") -plt.xlabel('Regularizaton parameter: shrinkage coefficient') +plt.xlabel('Regularization parameter: shrinkage coefficient') plt.ylabel('Error: negative log-likelihood on test data') # range shrinkage curve plt.loglog(shrinkages, negative_logliks, label="Negative log-likelihood") diff --git a/sklearn/mixture/gmm.py b/sklearn/mixture/gmm.py index 79ff8d169dcd8..2c90cb7b92fdf 100644 --- a/sklearn/mixture/gmm.py +++ b/sklearn/mixture/gmm.py @@ -781,7 +781,7 @@ def _validate_covars(covars, covariance_type, n_components): "'spherical', 'tied', 'diag', 'full'") -@deprecated("The functon distribute_covar_matrix_to_match_covariance_type" +@deprecated("The function distribute_covar_matrix_to_match_covariance_type" "is deprecated in 0.18 and will be removed in 0.20.") def distribute_covar_matrix_to_match_covariance_type( tied_cv, covariance_type, n_components): From fb5c85ee1b63769fe36740d9549038c808fdb09f Mon Sep 17 00:00:00 2001 From: Gael Varoquaux Date: Mon, 17 Jul 2017 14:55:14 +0200 Subject: [PATCH 19/86] MISC: typo in rst --- examples/multioutput/plot_classifier_chain_yeast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/multioutput/plot_classifier_chain_yeast.py b/examples/multioutput/plot_classifier_chain_yeast.py index af649268a6151..4fcdaaf150512 100644 --- a/examples/multioutput/plot_classifier_chain_yeast.py +++ b/examples/multioutput/plot_classifier_chain_yeast.py @@ -5,7 +5,7 @@ Example of using classifier chain on a multilabel dataset. For this example we will use the `yeast -http://mldata.org/repository/data/viewslug/yeast/`_ dataset which +`_ dataset which contains 2417 datapoints each with 103 features and 14 possible labels. Each datapoint has at least one label. As a baseline we first train a logistic regression classifier for each of the 14 labels. To evaluate the performance From 565bc393cefcede56e13c9366b1ed0b8d24823a7 Mon Sep 17 00:00:00 2001 From: Warut Vijitbenjaronk Date: Mon, 17 Jul 2017 12:02:18 -0500 Subject: [PATCH 20/86] [MRG] Add Explanation of MSE vs Friedman MSE vs MAE criterion in Regression Tree Building (#9367) * clarified documentation for regression tree criterion * added explanation on doc/modules/tree.rst --- doc/modules/tree.rst | 5 ++++- sklearn/tree/tree.py | 7 +++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst index f793c34b7f53d..3f577795e24be 100644 --- a/doc/modules/tree.rst +++ b/doc/modules/tree.rst @@ -481,7 +481,10 @@ Regression criteria If the target is a continuous value, then for node :math:`m`, representing a region :math:`R_m` with :math:`N_m` observations, common -criteria to minimise are +criteria to minimise as for determining locations for future +splits are Mean Squared Error, which minimizes the L2 error +using mean values at terminal nodes, and Mean Absolute Error, which +minimizes the L1 error using median values at terminal nodes. Mean Squared Error: diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py index 93db4eb98f34e..099f3da39a45b 100644 --- a/sklearn/tree/tree.py +++ b/sklearn/tree/tree.py @@ -879,8 +879,11 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin): criterion : string, optional (default="mse") The function to measure the quality of a split. Supported criteria are "mse" for the mean squared error, which is equal to variance - reduction as feature selection criterion, and "mae" for the mean - absolute error. + reduction as feature selection criterion and minimizes the L2 loss + using the mean of each terminal node, "friedman_mse", which uses mean + squared error with Friedman's improvement score for potential splits, + and "mae" for the mean absolute error, which minimizes the L1 loss + using the median of each terminal node. .. versionadded:: 0.18 Mean Absolute Error (MAE) criterion. From e37504ce8f29da4cf6e8a162db5e2a745ffbc6d5 Mon Sep 17 00:00:00 2001 From: David Nicholson Date: Mon, 17 Jul 2017 16:47:34 -0500 Subject: [PATCH 21/86] use ignore_warnings to catch FutueWarning (#9374) --- sklearn/utils/tests/test_estimator_checks.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index 8ac31764e89ad..1b3a1ea7e597a 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -6,7 +6,7 @@ from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.testing import (assert_raises_regex, assert_true, - assert_equal) + assert_equal, ignore_warnings) from sklearn.utils.estimator_checks import check_estimator from sklearn.utils.estimator_checks import set_random_state from sklearn.utils.estimator_checks import set_checking_parameters @@ -203,7 +203,9 @@ def test_check_estimator_clones(): for Estimator in [GaussianMixture, LinearRegression, RandomForestClassifier, NMF, SGDClassifier, MiniBatchKMeans]: - est = Estimator() + with ignore_warnings(category=FutureWarning): + # when 'est = SGDClassifier()' + est = Estimator() set_checking_parameters(est) set_random_state(est) # without fitting @@ -211,7 +213,9 @@ def test_check_estimator_clones(): check_estimator(est) assert_equal(old_hash, joblib.hash(est)) - est = Estimator() + with ignore_warnings(category=FutureWarning): + # when 'est = SGDClassifier()' + est = Estimator() set_checking_parameters(est) set_random_state(est) # with fitting From 54232c9df7e1d9f67d856eaf7ea373a2ade54a0b Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Tue, 18 Jul 2017 15:09:50 +1000 Subject: [PATCH 22/86] Markup in release notes --- doc/whats_new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 3469811416e03..61ee24234d8df 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -312,7 +312,7 @@ Model evaluation and meta-estimators - :class:`multioutput.MultiOutputRegressor` and :class:`multioutput.MultiOutputClassifier` now support online learning using ``partial_fit``. - :issue: `8053` by :user:`Peng Yu `. + :issue:`8053` by :user:`Peng Yu `. - Add ``max_train_size`` parameter to :class:`model_selection.TimeSeriesSplit` :issue:`8282` by :user:`Aman Dalmia `. From ecdc76c0b82ac2180b2643f8c3bf49a7e038f54b Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Tue, 18 Jul 2017 15:12:11 +1000 Subject: [PATCH 23/86] DOC reorder what's new paragraphs --- doc/whats_new.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 61ee24234d8df..50685087a593f 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -20,18 +20,18 @@ algorithms in existing estimators, such as multiplicative update in :class:`decomposition.NMF` and multinomial :class:`linear_model.LogisticRegression` with L1 loss (use ``solver='saga'``). -You can also learn faster. For instance, the :ref:`new option to cache -transformations ` in :class:`pipeline.Pipeline` makes grid -search over pipelines including slow transformations much more efficient. And -you can predict faster: if you're sure you know what you're doing, you can turn -off validating that the input is finite using :func:`config_context`. - Cross validation is now able to return the results from multiple metric evaluations. The new :func:`model_selection.cross_validate` can return many scores on the test data as well as training set performance and timings, and we have extended the ``scoring`` and ``refit`` parameters for grid/randomized search :ref:`to handle multiple metrics `. +You can also learn faster. For instance, the :ref:`new option to cache +transformations ` in :class:`pipeline.Pipeline` makes grid +search over pipelines including slow transformations much more efficient. And +you can predict faster: if you're sure you know what you're doing, you can turn +off validating that the input is finite using :func:`config_context`. + We've made some important fixes too. We've fixed a longstanding implementation error in :func:`metrics.average_precision_score`, so please be cautious with prior results reported from that function. A number of errors in the From 9aeab464b417e97dfad9f23c97b9f5858fab1495 Mon Sep 17 00:00:00 2001 From: Sharan Yalburgi Date: Tue, 18 Jul 2017 15:48:51 +0530 Subject: [PATCH 24/86] [MRG+1] - DeprecationWarning for n_components parameter for linkage_tree (#9309) * Depreciation warning for n_components in sklearn/cluster/hierarchical.py * typo fix * Whitespace fix * Update hierarchical.py * Added test * Added test-v2 * Test for deprecation * Test function name change * Updated test * Update test_hierarchical.py * fixing flake8 errors * removed blank line * modifying test * Change condition for deprecation warning * Change indentation and compare function output * fix flake8 errors * Update test_hierarchical.py --- sklearn/cluster/hierarchical.py | 6 +++++- sklearn/cluster/tests/test_hierarchical.py | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py index 29d725bd8ce54..100b25d5271f3 100644 --- a/sklearn/cluster/hierarchical.py +++ b/sklearn/cluster/hierarchical.py @@ -289,7 +289,7 @@ def ward_tree(X, connectivity=None, n_clusters=None, return_distance=False): # average and complete linkage -def linkage_tree(X, connectivity=None, n_components=None, +def linkage_tree(X, connectivity=None, n_components='deprecated', n_clusters=None, linkage='complete', affinity="euclidean", return_distance=False): """Linkage agglomerative clustering based on a Feature matrix. @@ -368,6 +368,10 @@ def linkage_tree(X, connectivity=None, n_components=None, -------- ward_tree : hierarchical clustering with ward linkage """ + if n_components != 'deprecated': + warnings.warn("n_components was deprecated in 0.18" + "will be removed in 0.21", DeprecationWarning) + X = np.asarray(X) if X.ndim == 1: X = np.reshape(X, (-1, 1)) diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index 986b92e0ce9f4..b9ca301971715 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -36,6 +36,20 @@ from sklearn.utils.testing import assert_warns +def test_deprecation_of_n_components_in_linkage_tree(): + rng = np.random.RandomState(0) + X = rng.randn(50, 100) + # Test for warning of deprecation of n_components in linkage_tree + children, n_nodes, n_leaves, parent = assert_warns(DeprecationWarning, + linkage_tree, + X.T, + n_components=10) + children_t, n_nodes_t, n_leaves_t, parent_t = linkage_tree(X.T) + assert_array_equal(children, children_t) + assert_equal(n_nodes, n_nodes_t) + assert_equal(n_leaves, n_leaves_t) + assert_equal(parent, parent_t) + def test_linkage_misc(): # Misc tests on linkage rng = np.random.RandomState(42) From 6dd0ab91474697979adc459dd39afbf1bfc95be5 Mon Sep 17 00:00:00 2001 From: Breno Freitas Date: Tue, 18 Jul 2017 06:24:21 -0400 Subject: [PATCH 25/86] Pass affinity to fix connectivity in linkage tree (#9357) --- sklearn/cluster/hierarchical.py | 9 ++++---- sklearn/cluster/tests/test_hierarchical.py | 27 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py index 100b25d5271f3..b7560ce970b90 100644 --- a/sklearn/cluster/hierarchical.py +++ b/sklearn/cluster/hierarchical.py @@ -30,8 +30,7 @@ # For non fully-connected graphs -def _fix_connectivity(X, connectivity, n_components=None, - affinity="euclidean"): +def _fix_connectivity(X, connectivity, affinity): """ Fixes the connectivity matrix @@ -190,7 +189,8 @@ def ward_tree(X, connectivity=None, n_clusters=None, return_distance=False): else: return children_, 1, n_samples, None - connectivity, n_components = _fix_connectivity(X, connectivity) + connectivity, n_components = _fix_connectivity(X, connectivity, + affinity='euclidean') if n_clusters is None: n_nodes = 2 * n_samples - 1 else: @@ -422,7 +422,8 @@ def linkage_tree(X, connectivity=None, n_components='deprecated', return children_, 1, n_samples, None, distances return children_, 1, n_samples, None - connectivity, n_components = _fix_connectivity(X, connectivity) + connectivity, n_components = _fix_connectivity(X, connectivity, + affinity=affinity) connectivity = connectivity.tocoo() # Put the diagonal to zero diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index b9ca301971715..c4534663236b0 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -532,3 +532,30 @@ def test_agg_n_clusters(): msg = ("n_clusters should be an integer greater than 0." " %s was provided." % str(agc.n_clusters)) assert_raise_message(ValueError, msg, agc.fit, X) + + +def test_affinity_passed_to_fix_connectivity(): + # Test that the affinity parameter is actually passed to the pairwise + # function + + size = 2 + rng = np.random.RandomState(0) + X = rng.randn(size, size) + mask = np.array([True, False, False, True]) + + connectivity = grid_to_graph(n_x=size, n_y=size, + mask=mask, return_as=np.ndarray) + + class FakeAffinity: + def __init__(self): + self.counter = 0 + + def increment(self, *args, **kwargs): + self.counter += 1 + return self.counter + + fa = FakeAffinity() + + linkage_tree(X, connectivity=connectivity, affinity=fa.increment) + + assert_equal(fa.counter, 3) From afa26da8c01abc6dec944bb6d3d9fa7d00b014e8 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 18 Jul 2017 13:26:27 +0200 Subject: [PATCH 26/86] [MRG + 1] FIX gil acquisition in dist_metric (#9311) --- sklearn/neighbors/dist_metrics.pyx | 33 ++++++++++++++++++------------ 1 file changed, 20 insertions(+), 13 deletions(-) mode change 100644 => 100755 sklearn/neighbors/dist_metrics.pyx diff --git a/sklearn/neighbors/dist_metrics.pyx b/sklearn/neighbors/dist_metrics.pyx old mode 100644 new mode 100755 index 4a76a9eb63476..eb4b292dbdc38 --- a/sklearn/neighbors/dist_metrics.pyx +++ b/sklearn/neighbors/dist_metrics.pyx @@ -1093,22 +1093,29 @@ cdef class PyFuncDistance(DistanceMetric): self.func = func self.kwargs = kwargs + # in cython < 0.26, GIL was required to be acquired during definition of + # the function and inside the body of the function. This behaviour is not + # allowed in cython >= 0.26 since it is a redundant GIL acquisition. The + # only way to be back compatible is to inherit `dist` from the base class + # without GIL and called an inline `_dist` which acquire GIL. cdef inline DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2, - ITYPE_t size) except -1 with gil: + ITYPE_t size) nogil except -1: + return self._dist(x1, x2, size) + + cdef inline DTYPE_t _dist(self, DTYPE_t* x1, DTYPE_t* x2, + ITYPE_t size) except -1 with gil: cdef np.ndarray x1arr cdef np.ndarray x2arr - with gil: - x1arr = _buffer_to_ndarray(x1, size) - x2arr = _buffer_to_ndarray(x2, size) - d = self.func(x1arr, x2arr, **self.kwargs) - try: - # Cython generates code here that results in a TypeError - # if d is the wrong type. - return d - except TypeError: - raise TypeError("Custom distance function must accept two " - "vectors and return a float.") - + x1arr = _buffer_to_ndarray(x1, size) + x2arr = _buffer_to_ndarray(x2, size) + d = self.func(x1arr, x2arr, **self.kwargs) + try: + # Cython generates code here that results in a TypeError + # if d is the wrong type. + return d + except TypeError: + raise TypeError("Custom distance function must accept two " + "vectors and return a float.") cdef inline double fmax(double a, double b) nogil: From da16809a73bb6ec3bf06319d3c44c888ca13ab0d Mon Sep 17 00:00:00 2001 From: Dmitry Petrov Date: Tue, 18 Jul 2017 15:13:10 -0700 Subject: [PATCH 27/86] [MRG+1] Added examples to docstrings of MinMaxScaler and StandardScaler (#9380) [MRG+2] Added examples to docstrings of MinMaxScaler and StandardScaler --- sklearn/preprocessing/data.py | 36 +++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index eb19494c83b75..b1c767eedb364 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -244,6 +244,24 @@ class MinMaxScaler(BaseEstimator, TransformerMixin): .. versionadded:: 0.17 *data_range_* + Examples + -------- + >>> from sklearn.preprocessing import MinMaxScaler + >>> + >>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]] + >>> scaler = MinMaxScaler() + >>> print(scaler.fit(data)) + MinMaxScaler(copy=True, feature_range=(0, 1)) + >>> print(scaler.data_max_) + [ 1. 18.] + >>> print(scaler.transform(data)) + [[ 0. 0. ] + [ 0.25 0.25] + [ 0.5 0.5 ] + [ 1. 1. ]] + >>> print(scaler.transform([[2, 2]])) + [[ 1.5 0. ]] + See also -------- minmax_scale: Equivalent function without the estimator API. @@ -504,6 +522,24 @@ class StandardScaler(BaseEstimator, TransformerMixin): The number of samples processed by the estimator. Will be reset on new calls to fit, but increments across ``partial_fit`` calls. + Examples + -------- + >>> from sklearn.preprocessing import StandardScaler + >>> + >>> data = [[0, 0], [0, 0], [1, 1], [1, 1]] + >>> scaler = StandardScaler() + >>> print(scaler.fit(data)) + StandardScaler(copy=True, with_mean=True, with_std=True) + >>> print(scaler.mean_) + [ 0.5 0.5] + >>> print(scaler.transform(data)) + [[-1. -1.] + [-1. -1.] + [ 1. 1.] + [ 1. 1.]] + >>> print(scaler.transform([[2, 2]])) + [[ 3. 3.]] + See also -------- scale: Equivalent function without the estimator API. From 8c08f58a563e85b6427358615c553be54c8e57c1 Mon Sep 17 00:00:00 2001 From: Balakumaran Manoharan Date: Tue, 18 Jul 2017 18:25:27 -0500 Subject: [PATCH 28/86] [MRG + 1] Fix wrong error message in StratifiedKFold (#9396) * Fix wrong error message in StratifiedKFold * Remove groups in warning message --- sklearn/model_selection/_split.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 386d439184117..85ba2c086c25c 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -586,13 +586,13 @@ def _make_test_folds(self, X, y=None): y_counts = np.bincount(y_inversed) min_groups = np.min(y_counts) if np.all(self.n_splits > y_counts): - raise ValueError("All the n_groups for individual classes" - " are less than n_splits=%d." + raise ValueError("n_splits=%d cannot be greater than the" + " number of members in each class." % (self.n_splits)) if self.n_splits > min_groups: warnings.warn(("The least populated class in y has only %d" " members, which is too few. The minimum" - " number of groups for any class cannot" + " number of members in any class cannot" " be less than n_splits=%d." % (min_groups, self.n_splits)), Warning) From f5c2a831edf59b9ee45719f4d87e1f69b49fc5f2 Mon Sep 17 00:00:00 2001 From: Balakumaran Manoharan Date: Wed, 19 Jul 2017 02:57:33 -0500 Subject: [PATCH 29/86] DOC Fix multi metric link in model selection (#9410) --- doc/modules/grid_search.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst index 1867a66594ad4..3851392ed2d88 100644 --- a/doc/modules/grid_search.rst +++ b/doc/modules/grid_search.rst @@ -84,7 +84,7 @@ evaluated and the best combination is retained. dataset. This is the best practice for evaluating the performance of a model with grid search. - - See :ref:`sphx_glr_auto_examples_model_selection_plot_multi_metric_evaluation` + - See :ref:`sphx_glr_auto_examples_model_selection_plot_multi_metric_evaluation.py` for an example of :class:`GridSearchCV` being used to evaluate multiple metrics simultaneously. @@ -183,7 +183,7 @@ the ``best_estimator_`` on the whole dataset. If the search should not be refit, set ``refit=False``. Leaving refit to the default value ``None`` will result in an error when using multiple metrics. -See :ref:`sphx_glr_auto_examples_model_selection_plot_multi_metric_evaluation` +See :ref:`sphx_glr_auto_examples_model_selection_plot_multi_metric_evaluation.py` for an example usage. Composite estimators and parameter spaces From 4641cfc0d791bbd1cd09ab1958fe5e3f86099f1b Mon Sep 17 00:00:00 2001 From: Sailesh Choyal Date: Wed, 19 Jul 2017 22:34:57 +0530 Subject: [PATCH 30/86] [MRG+1] Add links for [RW2006] (#9412) * Add links for [RW2006] * Update [RW2006] reference link --- doc/modules/gaussian_process.rst | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/doc/modules/gaussian_process.rst b/doc/modules/gaussian_process.rst index 7fae49349f342..94cca8999e489 100644 --- a/doc/modules/gaussian_process.rst +++ b/doc/modules/gaussian_process.rst @@ -601,12 +601,7 @@ shown in the following figure: References ---------- - * `[RW2006] - `_ - **Gaussian Processes for Machine Learning**, - Carl Eduard Rasmussen and Christopher K.I. Williams, MIT Press 2006. - Link to an official complete PDF version of the book - `here `_ . +.. [RW2006] Carl Eduard Rasmussen and Christopher K.I. Williams, "Gaussian Processes for Machine Learning", MIT Press 2006, Link to an official complete PDF version of the book `here `_ . .. currentmodule:: sklearn.gaussian_process From 1dcaeb829eb7b16e9b0c8f2e1c0eeaa91d54dacd Mon Sep 17 00:00:00 2001 From: Sebastin Santy Date: Thu, 20 Jul 2017 01:03:14 +0530 Subject: [PATCH 31/86] [MRG] Formatting error in cross_validation.rst (#9415) * Formatting error in cross_validation.rst * Formatting error in cross_validation.rst * Minor change --- doc/modules/cross_validation.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index a43c5cf675cb8..b47726979351f 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -270,7 +270,7 @@ The following sections list utilities to generate indices that can be used to generate dataset splits according to different cross validation strategies. -.. _iid_cv +.. _iid_cv: Cross-validation iterators for i.i.d. data ========================================== @@ -287,7 +287,7 @@ The following cross-validators can be used in such cases. While i.i.d. data is a common assumption in machine learning theory, it rarely holds in practice. If one knows that the samples have been generated using a time-dependent process, it's safer to -use a :ref:`time-series aware cross-validation scheme ` +use a :ref:`time-series aware cross-validation scheme ` Similarly if we know that the generative process has a group structure (samples from collected from different subjects, experiments, measurement devices) it safer to use :ref:`group-wise cross-validation `. @@ -506,7 +506,7 @@ Stratified Shuffle Split stratified splits, *i.e* which creates splits by preserving the same percentage for each target class as in the complete set. -.. _group_cv +.. _group_cv: Cross-validation iterators for grouped data. ============================================ @@ -532,11 +532,11 @@ parameter. Group k-fold ------------ -class:GroupKFold is a variation of k-fold which ensures that the same group is +:class:`GroupKFold` is a variation of k-fold which ensures that the same group is not represented in both testing and training sets. For example if the data is obtained from different subjects with several samples per-subject and if the model is flexible enough to learn from highly person specific features it -could fail to generalize to new subjects. class:GroupKFold makes it possible +could fail to generalize to new subjects. :class:`GroupKFold` makes it possible to detect this kind of overfitting situations. Imagine you have three subjects, each with an associated number from 1 to 3:: @@ -653,7 +653,7 @@ e.g. when searching for hyperparameters. For example, when using a validation set, set the ``test_fold`` to 0 for all samples that are part of the validation set, and to -1 for all other samples. -.. _timeseries_cv +.. _timeseries_cv: Cross validation of time series data ==================================== From 0d5d315ae3d73c60682d7b2fd6b0d6224c928aab Mon Sep 17 00:00:00 2001 From: Clement Joudet Date: Thu, 20 Jul 2017 01:01:11 +0200 Subject: [PATCH 32/86] [MRG+1] Docstring parameters improvements for cross_decomposition and discriminant_analysis (#9392) * Fixing cross_decomposition docstring parameters * Fixing discriminant analysis docstring parameters * Fixing indentation and improving error message * Fixing docstring inconsistency * Fixing capitalised parameter * Removing semantics from type specification * Moving attributes to parameters * Fixing typo * Fixing syntax for pep8 --- sklearn/cross_decomposition/pls_.py | 93 ++++++++++++++-------- sklearn/discriminant_analysis.py | 26 +++--- sklearn/tests/test_docstring_parameters.py | 25 +++--- 3 files changed, 83 insertions(+), 61 deletions(-) diff --git a/sklearn/cross_decomposition/pls_.py b/sklearn/cross_decomposition/pls_.py index 266aa64facfbb..8ee7a128cb93f 100644 --- a/sklearn/cross_decomposition/pls_.py +++ b/sklearn/cross_decomposition/pls_.py @@ -235,11 +235,11 @@ def fit(self, X, Y): Parameters ---------- X : array-like, shape = [n_samples, n_features] - Training vectors, where n_samples in the number of samples and + Training vectors, where n_samples is the number of samples and n_features is the number of predictors. - Y : array-like of response, shape = [n_samples, n_targets] - Target vectors, where n_samples in the number of samples and + Y : array-like, shape = [n_samples, n_targets] + Target vectors, where n_samples is the number of samples and n_targets is the number of response variables. """ @@ -374,13 +374,13 @@ def transform(self, X, Y=None, copy=True): Parameters ---------- - X : array-like of predictors, shape = [n_samples, p] - Training vectors, where n_samples in the number of samples and - p is the number of predictors. + X : array-like, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples and + n_features is the number of predictors. - Y : array-like of response, shape = [n_samples, q], optional - Training vectors, where n_samples in the number of samples and - q is the number of response variables. + Y : array-like, shape = [n_samples, n_targets] + Target vectors, where n_samples is the number of samples and + n_targets is the number of response variables. copy : boolean, default True Whether to copy X and Y, or perform in-place normalization. @@ -412,9 +412,9 @@ def predict(self, X, copy=True): Parameters ---------- - X : array-like of predictors, shape = [n_samples, p] - Training vectors, where n_samples in the number of samples and - p is the number of predictors. + X : array-like, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples and + n_features is the number of predictors. copy : boolean, default True Whether to copy X and Y, or perform in-place normalization. @@ -432,27 +432,24 @@ def predict(self, X, copy=True): Ypred = np.dot(X, self.coef_) return Ypred + self.y_mean_ - def fit_transform(self, X, y=None, **fit_params): + def fit_transform(self, X, y=None): """Learn and apply the dimension reduction on the train data. Parameters ---------- - X : array-like of predictors, shape = [n_samples, p] - Training vectors, where n_samples in the number of samples and - p is the number of predictors. - - Y : array-like of response, shape = [n_samples, q], optional - Training vectors, where n_samples in the number of samples and - q is the number of response variables. + X : array-like, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples and + n_features is the number of predictors. - copy : boolean, default True - Whether to copy X and Y, or perform in-place normalization. + y : array-like, shape = [n_samples, n_targets] + Target vectors, where n_samples is the number of samples and + n_targets is the number of response variables. Returns ------- x_scores if Y is not given, (x_scores, y_scores) otherwise. """ - return self.fit(X, y, **fit_params).transform(X, y) + return self.fit(X, y).transform(X, y) class PLSRegression(_PLS): @@ -607,7 +604,11 @@ class PLSCanonical(_PLS): Parameters ---------- - scale : boolean, scale data? (default True) + n_components : int, (default 2). + Number of components to keep + + scale : boolean, (default True) + Option to scale data algorithm : string, "nipals" or "svd" The algorithm used to estimate the weights. It will be called @@ -624,8 +625,6 @@ class PLSCanonical(_PLS): Whether the deflation should be done on a copy. Let the default value to True unless you don't care about side effect - n_components : int, number of components to keep. (default 2). - Attributes ---------- x_weights_ : array, shape = [p, n_components] @@ -784,6 +783,18 @@ def __init__(self, n_components=2, scale=True, copy=True): self.copy = copy def fit(self, X, Y): + """Fit model to data. + + Parameters + ---------- + X : array-like, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples and + n_features is the number of predictors. + + Y : array-like, shape = [n_samples, n_targets] + Target vectors, where n_samples is the number of samples and + n_targets is the number of response variables. + """ # copy since this will contains the centered data check_consistent_length(X, Y) X = check_array(X, dtype=np.float64, copy=self.copy) @@ -820,7 +831,19 @@ def fit(self, X, Y): return self def transform(self, X, Y=None): - """Apply the dimension reduction learned on the train data.""" + """ + Apply the dimension reduction learned on the train data. + + Parameters + ---------- + X : array-like, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples and + n_features is the number of predictors. + + Y : array-like, shape = [n_samples, n_targets] + Target vectors, where n_samples is the number of samples and + n_targets is the number of response variables. + """ check_is_fitted(self, 'x_mean_') X = check_array(X, dtype=np.float64) Xr = (X - self.x_mean_) / self.x_std_ @@ -833,21 +856,21 @@ def transform(self, X, Y=None): return x_scores, y_scores return x_scores - def fit_transform(self, X, y=None, **fit_params): + def fit_transform(self, X, y=None): """Learn and apply the dimension reduction on the train data. Parameters ---------- - X : array-like of predictors, shape = [n_samples, p] - Training vectors, where n_samples in the number of samples and - p is the number of predictors. + X : array-like, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples and + n_features is the number of predictors. - Y : array-like of response, shape = [n_samples, q], optional - Training vectors, where n_samples in the number of samples and - q is the number of response variables. + y : array-like, shape = [n_samples, n_targets] + Target vectors, where n_samples is the number of samples and + n_targets is the number of response variables. Returns ------- x_scores if Y is not given, (x_scores, y_scores) otherwise. """ - return self.fit(X, y, **fit_params).transform(X, y) + return self.fit(X, y).transform(X, y) diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index a646e9e6ba0a5..8506d35a76c9a 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -174,7 +174,7 @@ class LinearDiscriminantAnalysis(BaseEstimator, LinearClassifierMixin, .. versionadded:: 0.17 - tol : float, optional + tol : float, optional, (default 1.0e-4) Threshold used for rank estimation in SVD solver. .. versionadded:: 0.17 @@ -554,6 +554,17 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin): Regularizes the covariance estimate as ``(1-reg_param)*Sigma + reg_param*np.eye(n_features)`` + store_covariances : boolean + If True the covariance matrices are computed and stored in the + `self.covariances_` attribute. + + .. versionadded:: 0.17 + + tol : float, optional, default 1.0e-4 + Threshold used for rank estimation. + + .. versionadded:: 0.17 + Attributes ---------- covariances_ : list of array-like, shape = [n_features, n_features] @@ -576,17 +587,6 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin): of the Gaussian distributions along its principal axes, i.e. the variance in the rotated coordinate system. - store_covariances : boolean - If True the covariance matrices are computed and stored in the - `self.covariances_` attribute. - - .. versionadded:: 0.17 - - tol : float, optional, default 1.0e-4 - Threshold used for rank estimation. - - .. versionadded:: 0.17 - Examples -------- >>> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis @@ -626,7 +626,7 @@ def fit(self, X, y): Parameters ---------- X : array-like, shape = [n_samples, n_features] - Training vector, where n_samples in the number of samples and + Training vector, where n_samples is the number of samples and n_features is the number of features. y : array, shape = [n_samples] diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py index 584c4f2e7ceed..7a0894e1ea2de 100644 --- a/sklearn/tests/test_docstring_parameters.py +++ b/sklearn/tests/test_docstring_parameters.py @@ -20,15 +20,14 @@ from sklearn.utils.testing import ignore_warnings from sklearn.utils.deprecation import _is_deprecated -PUBLIC_MODULES = set(['sklearn.' + pckg[1] - for pckg in walk_packages('sklearn.*') - if not pckg[1].startswith('_')]) +PUBLIC_MODULES = set(['sklearn.' + modname + for _, modname, _ in walk_packages(sklearn.__path__) + if not modname.startswith('_') and + '.tests.' not in modname]) # TODO Uncomment all modules and fix doc inconsistencies everywhere # The list of modules that are not tested for now PUBLIC_MODULES -= set([ - 'sklearn.cross_decomposition', - 'sklearn.discriminant_analysis', 'sklearn.ensemble', 'sklearn.feature_selection', 'sklearn.kernel_approximation', @@ -54,12 +53,12 @@ # Methods where y param should be ignored if y=None by default _METHODS_IGNORE_NONE_Y = [ - 'fit', - 'score', - 'fit_predict', - 'fit_transform', - 'partial_fit', - 'predict' + 'fit', + 'score', + 'fit_predict', + 'fit_transform', + 'partial_fit', + 'predict' ] @@ -71,8 +70,8 @@ def test_docstring_parameters(): import numpydoc # noqa assert sys.version_info >= (3, 5) except (ImportError, AssertionError): - raise SkipTest( - "numpydoc is required to test the docstrings") + raise SkipTest("numpydoc is required to test the docstrings, " + "as well as python version >= 3.5") from numpydoc import docscrape From d25d8f7ab0a502b78ea739eaf09488becb80dab2 Mon Sep 17 00:00:00 2001 From: Balakumaran Manoharan Date: Wed, 19 Jul 2017 22:26:54 -0500 Subject: [PATCH 33/86] [MRG] DOC Fix known issues link in faq (#9418) * Fix known issues link in faq * Realign to keep 80 chars per line --- doc/faq.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/faq.rst b/doc/faq.rst index f11f1e013d434..dcaee6da8b928 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -24,9 +24,9 @@ Apart from scikit-learn, another popular one is `scikit-image `_. Please do not contact the contributors of scikit-learn directly -regarding contributing to scikit-learn. +usually a major and lengthy undertaking, it is recommended to start with +:ref:`known issues `. Please do not contact the contributors +of scikit-learn directly regarding contributing to scikit-learn. What's the best way to get help on scikit-learn usage? -------------------------------------------------------------- From d4cd4015316a6719e652ecc6ec7d88e0d35936eb Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 20 Jul 2017 08:01:30 -0400 Subject: [PATCH 34/86] Note->Notes, fix underline in multioutput examples (#9416) --- examples/multioutput/README.txt | 4 ++-- sklearn/model_selection/_split.py | 24 ++++++++++++------------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/examples/multioutput/README.txt b/examples/multioutput/README.txt index 57adada325e43..6121721d5fc73 100644 --- a/examples/multioutput/README.txt +++ b/examples/multioutput/README.txt @@ -1,6 +1,6 @@ .. _multioutput_examples: Multioutput methods ----------------- +------------------- -Examples concerning the :mod:`sklearn.multioutput` module. \ No newline at end of file +Examples concerning the :mod:`sklearn.multioutput` module. diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 85ba2c086c25c..fbc00f3069e51 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -84,8 +84,8 @@ def split(self, X, y=None, groups=None): test : ndarray The testing set indices for that split. - Note - ---- + Notes + ----- Randomized CV splitters may return different results for each call of split. You can make the results identical by setting ``random_state`` to an integer. @@ -315,8 +315,8 @@ def split(self, X, y=None, groups=None): test : ndarray The testing set indices for that split. - Note - ---- + Notes + ----- Randomized CV splitters may return different results for each call of split. You can make the results identical by setting ``random_state`` to an integer. @@ -655,8 +655,8 @@ def split(self, X, y, groups=None): test : ndarray The testing set indices for that split. - Note - ---- + Notes + ----- Randomized CV splitters may return different results for each call of split. You can make the results identical by setting ``random_state`` to an integer. @@ -742,8 +742,8 @@ def split(self, X, y=None, groups=None): test : ndarray The testing set indices for that split. - Note - ---- + Notes + ----- Randomized CV splitters may return different results for each call of split. You can make the results identical by setting ``random_state`` to an integer. @@ -1186,8 +1186,8 @@ def split(self, X, y=None, groups=None): test : ndarray The testing set indices for that split. - Note - ---- + Notes + ----- Randomized CV splitters may return different results for each call of split. You can make the results identical by setting ``random_state`` to an integer. @@ -1606,8 +1606,8 @@ def split(self, X, y, groups=None): test : ndarray The testing set indices for that split. - Note - ---- + Notes + ----- Randomized CV splitters may return different results for each call of split. You can make the results identical by setting ``random_state`` to an integer. From 61874269fe48e28d23a50ca8dbc53bb86e97d504 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 20 Jul 2017 14:10:46 +0200 Subject: [PATCH 35/86] [MRG] DOC add non support of COO safe indexing (#9423) --- sklearn/utils/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index fc71c387903a3..332e856c641db 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -135,6 +135,11 @@ def safe_indexing(X, indices): ------- subset Subset of X on first axis + + Notes + ----- + CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are + not supported. """ if hasattr(X, "iloc"): # Pandas Dataframes and Series From 32f452d3aa2c848e139a71dc1289624621173ad7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 20 Jul 2017 17:34:35 +0200 Subject: [PATCH 36/86] Add download_if_missing argument to fetch_20newsgroups_vectorized (#9425) --- sklearn/datasets/twenty_newsgroups.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py index fe838b1be5fd0..ec6b698dad645 100644 --- a/sklearn/datasets/twenty_newsgroups.py +++ b/sklearn/datasets/twenty_newsgroups.py @@ -283,7 +283,8 @@ def fetch_20newsgroups(data_home=None, subset='train', categories=None, return data -def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None): +def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None, + download_if_missing=True): """Load the 20 newsgroups dataset and transform it into tf-idf vectors. This is a convenience function; the tf-idf transformation is done using the @@ -313,6 +314,10 @@ def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None): Specify an download and cache folder for the datasets. If None, all scikit-learn data is stored in '~/scikit_learn_data' subfolders. + download_if_missing : optional, True by default + If False, raise an IOError if the data is not locally available + instead of trying to download the data from the source site. + Returns ------- bunch : Bunch object @@ -332,14 +337,16 @@ def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None): categories=None, shuffle=True, random_state=12, - remove=remove) + remove=remove, + download_if_missing=download_if_missing) data_test = fetch_20newsgroups(data_home=data_home, subset='test', categories=None, shuffle=True, random_state=12, - remove=remove) + remove=remove, + download_if_missing=download_if_missing) if os.path.exists(target_file): X_train, X_test = joblib.load(target_file) From 477225efafe3b644d0e9f40d1bcb207f375dabfe Mon Sep 17 00:00:00 2001 From: filipj8 Date: Thu, 20 Jul 2017 18:04:51 -0400 Subject: [PATCH 37/86] Fix: typo in DistanceMetric docstring example (#9427) --- sklearn/neighbors/dist_metrics.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/neighbors/dist_metrics.pyx b/sklearn/neighbors/dist_metrics.pyx index eb4b292dbdc38..29c83a341b7ba 100755 --- a/sklearn/neighbors/dist_metrics.pyx +++ b/sklearn/neighbors/dist_metrics.pyx @@ -114,7 +114,7 @@ cdef class DistanceMetric: >>> dist = DistanceMetric.get_metric('euclidean') >>> X = [[0, 1, 2], - [3, 4, 5]]) + [3, 4, 5]] >>> dist.pairwise(X) array([[ 0. , 5.19615242], [ 5.19615242, 0. ]]) From c1cf87e8b87b04000d76659b8a7998104622ae42 Mon Sep 17 00:00:00 2001 From: RAKOTOARISON Herilalaina Date: Fri, 21 Jul 2017 20:30:41 +0200 Subject: [PATCH 38/86] [MRG+1] TST Add test coverage for countVectorizer with ngram_range > 1 (#9318) * Add coverage countVectorizer * Add test for analyser=word * remove redundant test * Update test * Change index * Remove indexing --- sklearn/feature_extraction/tests/test_text.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py index 11060007b8355..9e613b1bca8c1 100644 --- a/sklearn/feature_extraction/tests/test_text.py +++ b/sklearn/feature_extraction/tests/test_text.py @@ -223,6 +223,25 @@ def test_char_wb_ngram_analyzer(): assert_equal(cnga(text)[:6], expected) +def test_word_ngram_analyzer(): + cnga = CountVectorizer(analyzer='word', strip_accents='unicode', + ngram_range=(3, 6)).build_analyzer() + + text = "This \n\tis a test, really.\n\n I met Harry yesterday" + expected = ['this is test', 'is test really', 'test really met'] + assert_equal(cnga(text)[:3], expected) + + expected = ['test really met harry yesterday', + 'this is test really met harry', + 'is test really met harry yesterday'] + assert_equal(cnga(text)[-3:], expected) + + cnga_file = CountVectorizer(input='file', analyzer='word', + ngram_range=(3, 6)).build_analyzer() + file = StringIO(text) + assert_equal(cnga_file(file), cnga(text)) + + def test_countvectorizer_custom_vocabulary(): vocab = {"pizza": 0, "beer": 1} terms = set(vocab.keys()) From d6ff52d62f8588414a20f918215aeade1e99b68a Mon Sep 17 00:00:00 2001 From: RAKOTOARISON Herilalaina Date: Fri, 21 Jul 2017 21:49:36 +0200 Subject: [PATCH 39/86] [MRG+1] - Voting classifier flatten transform (Continuation) (#9188) * flatten_transform parameter added to VotingClassifier * Regression test added * What's new section added * flake8 fix * Improve test and docstring * Add what's new entry * default value flatten_transofrm * Add test for warning msg * Fix bug in assert_warns_message * Move warn msg into transform * Add deprecation warning * Merge warning * Change warn msg * Move what's content into Trees and ensembles * Fixes minor bug * update what's new * update test --- doc/whats_new.rst | 5 +++ .../ensemble/tests/test_voting_classifier.py | 38 ++++++++++++++++++- sklearn/ensemble/voting_classifier.py | 38 ++++++++++++++++--- 3 files changed, 75 insertions(+), 6 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 50685087a593f..cd78a7e48c002 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -169,6 +169,11 @@ Trees and ensembles - :func:`tree.export_graphviz` now shows configurable number of decimal places. :issue:`8698` by :user:`Guillaume Lemaitre `. + + - Added ``flatten_transform`` parameter to :class:`ensemble.VotingClassifier` + to change output shape of `transform` method to 2 dimensional. + :issue:`7794` by :user:`Ibraim Ganiev ` and + :user:`Herilalaina Rakotoarison `. Linear, kernelized and related models diff --git a/sklearn/ensemble/tests/test_voting_classifier.py b/sklearn/ensemble/tests/test_voting_classifier.py index d61d8bfac62be..4765d0e32d0bb 100644 --- a/sklearn/ensemble/tests/test_voting_classifier.py +++ b/sklearn/ensemble/tests/test_voting_classifier.py @@ -4,6 +4,7 @@ from sklearn.utils.testing import assert_almost_equal, assert_array_equal from sklearn.utils.testing import assert_equal, assert_true, assert_false from sklearn.utils.testing import assert_raise_message +from sklearn.utils.testing import assert_warns_message from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import GaussianNB @@ -223,7 +224,7 @@ def test_gridsearch(): grid.fit(iris.data, iris.target) -def test_parallel_predict(): +def test_parallel_fit(): """Check parallel backend of VotingClassifier on toy dataset.""" clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) @@ -364,3 +365,38 @@ def test_estimator_weights_format(): eclf1.fit(X, y) eclf2.fit(X, y) assert_array_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) + + +def test_transform(): + """Check transform method of VotingClassifier on toy dataset.""" + clf1 = LogisticRegression(random_state=123) + clf2 = RandomForestClassifier(random_state=123) + clf3 = GaussianNB() + X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]]) + y = np.array([1, 1, 2, 2]) + + eclf1 = VotingClassifier(estimators=[ + ('lr', clf1), ('rf', clf2), ('gnb', clf3)], + voting='soft').fit(X, y) + eclf2 = VotingClassifier(estimators=[ + ('lr', clf1), ('rf', clf2), ('gnb', clf3)], + voting='soft', + flatten_transform=True).fit(X, y) + eclf3 = VotingClassifier(estimators=[ + ('lr', clf1), ('rf', clf2), ('gnb', clf3)], + voting='soft', + flatten_transform=False).fit(X, y) + + warn_msg = ("'flatten_transform' default value will be " + "changed to True in 0.21." + "To silence this warning you may" + " explicitly set flatten_transform=False.") + res = assert_warns_message(DeprecationWarning, warn_msg, + eclf1.transform, X) + assert_array_equal(res.shape, (3, 4, 2)) + assert_array_equal(eclf2.transform(X).shape, (4, 6)) + assert_array_equal(eclf3.transform(X).shape, (3, 4, 2)) + assert_array_equal(res.swapaxes(0, 1).reshape((4, 6)), + eclf2.transform(X)) + assert_array_equal(eclf3.transform(X).swapaxes(0, 1).reshape((4, 6)), + eclf2.transform(X)) diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py index c4832d7e49a9e..88b329d836978 100644 --- a/sklearn/ensemble/voting_classifier.py +++ b/sklearn/ensemble/voting_classifier.py @@ -12,6 +12,7 @@ # License: BSD 3 clause import numpy as np +import warnings from ..base import ClassifierMixin from ..base import TransformerMixin @@ -61,6 +62,13 @@ class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin): The number of jobs to run in parallel for ``fit``. If -1, then the number of jobs is set to the number of cores. + flatten_transform : bool, optional (default=None) + Affects shape of transform output only when voting='soft' + If voting='soft' and flatten_transform=True, transform method returns + matrix with shape (n_samples, n_classifiers * n_classes). If + flatten_transform=False, it returns + (n_classifiers, n_samples, n_classes). + Attributes ---------- estimators_ : list of classifiers @@ -94,18 +102,23 @@ class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin): [1 1 1 2 2 2] >>> eclf3 = VotingClassifier(estimators=[ ... ('lr', clf1), ('rf', clf2), ('gnb', clf3)], - ... voting='soft', weights=[2,1,1]) + ... voting='soft', weights=[2,1,1], + ... flatten_transform=True) >>> eclf3 = eclf3.fit(X, y) >>> print(eclf3.predict(X)) [1 1 1 2 2 2] + >>> print(eclf3.transform(X).shape) + (6, 6) >>> """ - def __init__(self, estimators, voting='hard', weights=None, n_jobs=1): + def __init__(self, estimators, voting='hard', weights=None, n_jobs=1, + flatten_transform=None): self.estimators = estimators self.voting = voting self.weights = weights self.n_jobs = n_jobs + self.flatten_transform = flatten_transform @property def named_estimators(self): @@ -163,6 +176,7 @@ def fit(self, X, y, sample_weight=None): if n_isnone == len(self.estimators): raise ValueError('All estimators are None. At least one is ' 'required to be a classifier!') + self.le_ = LabelEncoder().fit(y) self.classes_ = self.le_.classes_ self.estimators_ = [] @@ -256,16 +270,30 @@ def transform(self, X): Returns ------- - If `voting='soft'`: - array-like = [n_classifiers, n_samples, n_classes] + If `voting='soft'` and `flatten_transform=True`: + array-like = (n_classifiers, n_samples * n_classes) + otherwise array-like = (n_classifiers, n_samples, n_classes) Class probabilities calculated by each classifier. If `voting='hard'`: array-like = [n_samples, n_classifiers] Class labels predicted by each classifier. """ check_is_fitted(self, 'estimators_') + if self.voting == 'soft': - return self._collect_probas(X) + probas = self._collect_probas(X) + if self.flatten_transform is None: + warnings.warn("'flatten_transform' default value will be " + "changed to True in 0.21." + "To silence this warning you may" + " explicitly set flatten_transform=False.", + DeprecationWarning) + return probas + elif not self.flatten_transform: + return probas + else: + return np.hstack(probas) + else: return self._predict(X) From 7a5da82f317251ea651c6036bbda6486edbfacd6 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Sat, 22 Jul 2017 07:38:42 -0400 Subject: [PATCH 40/86] [MRG] Add Alfred P. Sloan foundation to sponsors and footer (#9402) * add Sloan banner to about, replace NYU logo with sloan logo in footer * fix logo sizes for sloan --- doc/about.rst | 9 ++++++++- doc/images/sloan_banner.png | Bin 0 -> 22729 bytes doc/index.rst | 2 +- .../scikit-learn/static/img/sloan_logo.jpg | Bin 0 -> 96721 bytes 4 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 doc/images/sloan_banner.png create mode 100644 doc/themes/scikit-learn/static/img/sloan_logo.jpg diff --git a/doc/about.rst b/doc/about.rst index c4208efdc247a..7be981836a535 100644 --- a/doc/about.rst +++ b/doc/about.rst @@ -104,13 +104,20 @@ work on scikit-learn. :target: http://www.telecom-paristech.fr/ -`Columbia University `_ funds Andreas Mueller since 2016. +`Columbia University `_ funds Andreas Müller since 2016. .. image:: themes/scikit-learn/static/img/columbia.png :width: 100pt :align: center :target: http://www.columbia.edu/ +Andreas Müller also received a grant to improve scikit-learn from the `Alfred P. Sloan Foundation `_ in 2017. + +.. image:: images/sloan_banner.png + :width: 200pt + :align: center + :target: https://sloan.org/ + The following students were sponsored by `Google `_ to work on scikit-learn through the `Google Summer of Code `_ diff --git a/doc/images/sloan_banner.png b/doc/images/sloan_banner.png new file mode 100644 index 0000000000000000000000000000000000000000..bcb98e84030069034b6d4812eea791de918da49c GIT binary patch literal 22729 zcmcG#WmH>V^eq~sXp!PY3q@Mo-JRm@uEpJ*QVJ9=?oiy_-QC@byE{ql`Mvu-{O`D* z-iMblLPAb*Wbd`tnscss!j%-H&`=0bKp+sBjI_842n3@BydOqF0RA?KeB=RMkR7G9 zT|gjI?EjuHptKAE5C}!_yO@}gl9hw2gNv1e<0lz0u}_Z94i?{S%|Rf~)l5}OHPvH0 zfyd1|QQ2RCe`Fn0aFIT#h(-qD#8K0dBV&I1MV`BYtK9QJLIR#>IOi8!badcvTxI(A z5h%-ud*pf1A^E=|MqhV)@@*E|9}gxV4NHQD<+qu2Q;6M2?~|n2lsN+5mxz*n*bW)$ zAKckv6b?e6bOd1{)tQmIfIq{4p8Waw=_tApJ3%m>bErt5Zu#_XX5ug?#))XE9!%g5 zm~PiNZds&29FVYgv}hqnSP~{MGlg0elmibk8Z$N71t~FrjA#N5=0Jg&PpLjIAl*OY zxG*_!pikK5zr;aSe4z3vjR*;l7BdLPLbjg=w9EivmeI73230qKx+gJFYe7iwLCng( zLcfCGeLzOTpFewoex!nMByTkNujwi=4{3psO0DH@q2!SW(nVr$Le$h`B>gfegHMOU zX$)(eCd}CDmHLr2h#h15b{quCiNgi1_7?0piC#H5$r;^F!NFLP%yp;iJj@x6TMJGA3{-X?H)en+_f7aQA|-3vp`~Jqp#(`qF;VaRFlveG0h8; zuy-q4{Y-E~XY5sPMi+3zcrixG>iN)cV`U zeawCKeFXCqRuT35G?k6wT(u7zF*>3Zxs57S%3;OW>~3j16CrpqvIP}+iF1Tj_*ReK ztBzb-KWrqDDkM*9&W;^R9VH*Z-Fdu6`-vMe^vm+g$8-Wj0?Usc1XU?=g&*f4KMtue zbFrMJ9HwHVI;d;WHq!dy#*LyReoLfIB%)#q$CN6_w*>r)i_tDI9~xZ6JX3DxvtohLoH z4O-bxw;+yw>Y9laUw7KJ6fj9!q(Es&E{_=7H#W^WmD@iSLWsL5PEQ%G3do0Fijr^r zBQ|j^zyAA5RLCyZ!eXvxQ<{{Vq*W$erd%d<+QC_QUX)qbAT3M#?f~ z@?GYk>Wb<{5lRuBya4ZgC1a^g0a0$7xJQ?L(M3vDr$JzroI}y3SI6NCu^$sy4Q%&P zbT9Q@3!&~kCBoYKNdo=>4krhG5~iWGYh^M0K4Gx}<3!`+ogZ;6GyM4rl2MWvzrPRa z?NaXA4cZOnQzKB5Q_ISq$X}$X$hQ=JE##BqlN*{CpU9h7Oyx)yU|C{4Og~ItO5bWS z((%-xZ!B(XHkYa0(<#)xXl%C+)#1@D&{EgYuam6Ete{$Kt7t3wP}HK3piq{zXxUeN z-8j-%()6)AmO0YgHGV7vC?!*0aaiv%h9(v$;(74U26n@6QOKW+yV+ zY?b>fiYW$JHXMFtGG&U>tXQgUD)1TVit8dL0Z z$TAGw5)1eYHG_IXXFx@PR4|rs;Xxuny0EWL-oE5r(L%gy<`1GQBW)$8y=~{# z+xczSzs!HAyFU0SOvCz)Ub&OKccDdiC5%|~yh*}xzZAItT!~=P7UTVQHdHw!P#{gWN5 z0B;9NNPJwZlV~Zqd_tq;iRFQX`&hYoXf0&GKC+34G!b#crY^Spoi+nXqiErgn$DLc zTEtpQb4`n`iSSXLq_Ql!iSzNg(Y08vjLU53IaV*$PJCm*PgzX{60L)da%-MiekZRr zlwI6+bU!r^%K4ik8fono6wA&vlPmo@sJ-QFf(&{_gL@iaY)(7&ru*s06i=I@P ze2|Q8kG8T}kG7>ze}4ZUvBs1<@nx+oq*1ITzl*=Q-MnaIa)M7t?XTLM+KU?J#q36d z@zRt1%mtB8`vd#g!kTXTBUo^xb4?vaU4Fr0!TeR`weC@3j%o z77mQ~8NG3-WqyHVRzOW9W|8hx3Vn#I6Owu9!GOkzrXH0s`0ul$xYyqCBl6ZGDH`vp z`){A}9mF{@cM?kyCTr2On>5LE)wEY#6WvzDalx6zlTGh8sWmc;40e8PuWz?%Bi~n# zgXa<~Z>D}{CT9BiGd`~yt>rcP9GXsUSz|a{wqm;O%t}Bb5Bb{pCiQNbU2kgF_OUga zHPfni4ed@`Pxv7TbBaT48Et8mEqXRB7v3EWJ{FJ1L|I&u8+RRE*Lcvyg)AO{jgDJy zhsS}(`z^c;e`gx-JcF{ zWC{{@AA=$GE+#c*>?hjK(=2X59-eBC#p6De5eINFHRY5+9Gr|7qcLX>ulI zCO7#eIibtWw*Y+7efqI#ZLoKcv~$Fhjm6+j*agr5|%ZFiL_YM4rD!s5c-&$Bh3 zOkh5Is8)i9siEkQRTuWYG&jaaruv(ZbvAh!lZuvZo?!bWSm^SV{}t2S($ex`{dv6Q zFw3Pfth5vf9yVxuljRpRCh(5D7&b`w6TJE-Or#$^v^w`VPM2a z9c<&sFe3bcrObiH%P$GekRu^eN`Q-v#K%K3`NC%U{EWLjj5{2)?5tF#UjE^AjT(3#{2j& z)xIWWj!C~=yg!kyGcGPpf{m3mf)DKazWSpUdO3^sczB) zfB$5g#Zs?)i7|~%`E(8jeO7+{Pep#>Qc~E&#J_tS*WP8Mq)2;v^OBR3d)Q{Dq*$#r zd1RZ99mp#O*hBj+U+z!$)zt47g%sW;!PLp&-J;LP!^8G$pwO0osa&sUcPxwiPnI+Ld_kT^TRnG6_P1TU~}GZ7->)s7r81 zlT-sCdT+*La^|Ea}=FKhw$T**=) zJMR3Jh|9j1A&pZV5rg1}KHmDYC|e_)!#>Z#!mKo3>bKekISUJm%fa18sZ#Y)-x(1t zMY{O6_9IqcS;b+hNyK;H$dKksJJ(W-Sg)uQU=RK#^!p?zD7e9B5dZ~0lE897HXi2( zBJoN}OG`B?^_0ix%;(EAt1SE=a~MLUe313r#bOopsHmt8hJJj;;w|6%#MJ^9%ljrO z+4(pqQC3-OmVk?few%kxT58;XJ6W~mbUQ!cz_->Y^Pd#a3ecp~{-7`Q3A6cgF$N{N zdfXmO5V)Hr(4+2P0^>r!vHLY)B*7YwUTYe48`vkobki2eR#a3tITaOdV0AU~A6As1 z7?QCU-?hdH;^nTG9MMp%7GH_Q*QH8>e9vpFJ0j*lo;r*C_}9Ljr)yJso#sFcYPsz- z9+&H@s~q{Z#!-kE;+X&42#sZXM@JQ0Mc{4(C59U1ikK^TkZ>%~qrMWZj6XIvyc&a6 z^>%0hou>FircjlIqjYu5t~-}x#T=wC0+G6 zGDKy!(?S zIHSYK%wgxRu&}U5$Y=$U6KNR&E_i{%L~&*o*0}{@KhX++5#%8~?dTXU(sW@Tf{oXeq-Tl0(IHQ$jE=cR8v#? z25wc%oCTM10aNzvjyOozs_6DKYZsWo2gF6t1iyg5=j18_iBJ^W2wwZ!hvgbG3$vU4 z1YA(DZWE)TMmbXV(TC61_qU$~7@`gsbf-^_i(J4RcXzJ;OibtIW`W1tT*c<*reybL zZc0jK;4!tk(d5D8=L6v39=MftST2Xny_Q8g&x_~7kGBup(WE@RlO%iu{H|X>i*j^T z?Qb`6Vu%M;fp_0h&sgEi@wPlq*D9sP%XE_fgCBb~Z}kSX38E4a+H7 z2jf{1qN1<}7;t=$w}h=G`<}J6^ery?{0iCDI6ghg+QMtlSL#QGP;#3-8OZf~dD?2pkbIZSsqNt7k2l&BO06UPj> z?!Y>is1QUAmOoMl+#mzl#Te52&ePMg+TqgRAb>AM+vU*AraK7yLblm#EavtJ#>%)h z1>;vG6;OdYXhaq)JMcjpe!jjGoDN$DDsuWPKBWGpX#@fZZ^1Ry46$AE2h#PMCSP* z1GWBALP-iHAxhG8y)}x+_FF0|xtE{iZK1sFN_!|xwr{+Bm4^rMPboRy0MWhUxgt6k z{ApS#{TXPdCd$b_Yxr-7@bLY2$7@`jid#*k-006ma#?bxoOZ=mdkMZbGg9UH9X|mK zrrhkfQ*IYcEGXA-vY@Pvtp zV>*=0M}MJ!1LQ{e4yWCpyZy>)b%XNJU*j|Kq;KRT;>U!B5tJx-gyZ%$BPg>=lp5FX zTj~ypn{+C43e{EOUwU3I%Qg&sw7dbdA+x}n9#8#I{XvszCU|rl5Y;&GBV-gPbYLJc=Xa17Svf;S$b{m3^m{T zB#$-9IN*=$Yg4-LH{?EHib39xPGGm)WRetYHybNd{PSTMb?Kc*vI59?feHgp0C(o| z%T5I|g`@eQ_VxdZBWF@3JFo#)^`YI_YuKFN~>hs{81> z*}`bzA(YF^O1mDp7KJ+UIqk?AHVAW`{~b^~EV?y9dSa_xrtGKkBqCqnfbh31xN|l+ zjLuBnI-AMQS+EAbTfRbSkVm@G9@D58H=N2+-|%MyqH=e+CjWOfGfZHehAX!`!8-Sk&w8asWL_wd03xMz=BV)EQG7ki1huQLh zzPFe3aH*hc=e$CKnL(#IcGc}vDK{9;r`34l1vLcWy{De~LPGaHscOG(a3QpsCBA!2 z`(tGvqa>qA1O{K+Y(5IO(*x_+cs~Nym;D$!V^@C{NIxfwrJ1%3Iw3)Yz$IhMZ2Sj@ zTac}pjO>+^VvoDy`Xa)@5Oq224h|;LUHzY_UOMja#KO?VP?n*uH|C;ZsJmp_Q)FvR z={_AZb-zkWLxL(E{9c})JMb_uuTZw{k?IvCO-%kxm6Vh)EtRX&-5yePY203284c9# z6LVJ^GPTbN>7H4em{0)Viy%s}Gwpf5=DTW<1`IlEgFIf{i(+;lDQ^;x5>#)mGbyZc zE!^L?(!p;tB@AR-9Qdb!&I7sY%`N$ZgaEWPDfL`9y(T9TY1z1x{^c*!27q~Cp{`~F z8A8W*?m7OMQ|a^vNaV%#D5aYz_<`{*hf{1^Hk*113f~Sw-Or2Fi_r6jqK#kbzyBuY zAcTUTwp$&J5MS4H%dGqQ72B{N`mu?%wKmBa6N&81yW=7Q5fKrL3T44e_KzVw-9!$Z zBS&$c+3Y1H?WBL!foCXZL@I{+?e^rEI%0B>65}_5ZgTdEYy0xffL6y@njh1=Hf0Wyd&}@% z4KX4w6iX7O2-qoA(K#Uq@ocC-x;ELL!7m6uNCZ7yky(5E5>>>l=HxrN>$`7w3>s&y z09>L2J-s~FT5sq8Q$uB!<^Qu)Q6MzC%dnBDfRbj7w|yFtG-088w7eZb`Re8EwVBdn z6qKCD-|n1mIhnE3`+WP>>4pkSuH4(v=c<}~$;98$(UrGr@l-x(>~`L{Zfl(88KJHh zXFTFk5;f+x7OMhKVvwZbXVbwbc~arG|Ch5tLl>Qe3h@TbbCJ7h+H=3|H}VcnCIw$`H{PxcADG&)W3`fr3j? zAGcs}NmjqYE$3gB#U|vS+&+MsIVb~S4)rX@RxaAK{T-x%46B0VFC*-gfH!(4x z+U77?neGYUoA)ZUx3;n>cF?n>&sNBivyi0rd+8iVn$Z0f6;-e>r-+S>Eha6ERMq}s z9V$cS=gUJyRZvJVf(G5$u@CUMg{=aqUiJxAw5;byZgc6&VFv*~t>WF8_J>R|brH9J zS$H<=+T%rms@_DzF670!z*1Wo&c(eCbs}6IgKn z>PO6t%Ix*4U=dDxN&UMdz+HhoQK+*x#W(~6o+Vymc(rAAqe*l_id~Rz3)bpi)k|?e z0k&KAD@`5_USLTmI)l6$Om&W<)lS~I;n~&nlHF>vKiqki592A+z`)??>B;Owlx4=c z;leZj_0$rC@(yvX&`wt{^W~#RC<=?mgYBl*rT${M!9-xEpNzcxfD!X_-BLQs%LM`? zWbr%s24TNQ_XT2$wiOe%R-IJl&pUr~6O=2%aSGXyB7$re%xxOCIG@C0U;M?EZh6!O8sREIKP>09;l93*5BZWiJ3B!W7N*PF3}SEW^QpoCRf;s!lNSwm!>deP>Q^x2>It)ISOQx-E9oxxa=|U z$)*5mSlf}Em1RF2R>o=ce5_hTeYsJ0_-&&o%e&n;UlJsj%ASfv^^MtUmU30#kyz;M z&1LcN_2E==|CFigo0@t<2*pS)K&W5Hnxi*dKh&ve#_c%*Dd8CgQkZWAJ$H4Xrj395 z$}IZ$!T*M-LumH$Are?DNfaB#0sZ^e4F%mrFI$pEofIPhY4iuD9$o}cI6_#{jnrZw z5V&w;E~4;eQ?kX=J;@8~kxD5>f`n-Rjz3Q-uvMJ0GgJik|T0)eQNhttDL2LO{D zq=q1(&(BI&F2Xe`2)Lr|F|V41XQ%SwR|w!wu5Kv*zor@_oJugN1|JNGk_& zHgMY7%*kd`dj2C_j8&bYGkFp+in52Z1+r&rP07nC z%jI^^w_ObPi{+l+OMK-nkr3iXFYpUTdwaY4;NYOb`0ioRV1NHxtyAC6$f-tq{Z%98 zo7bcX#$+uS8HK^At!xo71pM=v3O?wMoRhwKG%J?g=-`)GIK26IFEj^eN$M%oQaoQ? zW96$`^6U4&q-t2DPuW|3h|83W17Q4le2~HA^Utm}KD`#-ftLFEdJ}Ic$AyohLJ!NP zn->?C{nxwiiTHo?h=olbR&;u3{YXtseS03terxt*S!=Qz!@B?bcYVVhV78wH1v^30 zn^ApFuL&4Jv=6qYi)xiBJZp#=Wr?4|FzSHp{04?2;P%A4Ct^q9@+Nf9VXKhGl|i4e zj3>h(34#%3zDn)mr~sJ1HV~N*7_EU&#{f~)SfTP~SL{8{)Qp$ms^@AIj4$!+ntBnh zUy5IpYN=vpD)rl(-E&{gm#EGlr%WjC@bkctOezi53$|P7tX{Ra3>R0)#1Cuv(s2|_ ztv2TX{fYmNM>%wZmT0z*Wp42%zHjF7v_W4YXFf|h-&=2f5DO1)nziFxivj3R5E1M@ zzx$JguBj{@B7I2sO>Qy+-ox2u-==`i^7Uc%A6or1i-~ln=h}l>AepOG=u9{tZTCq?_Jo#j*p6*ceAE+)f5Gf&UTwWTt{ zY+M_Sx$^~0WF(|vF(quY?pyC>=}iR+bpbY)7uPPPu4mqQ#~ljaE=Tf?&6B>i-K~73 zYK<>G^H(3Vfk0WBd;}^Jh=KG^PM$mg@6Q8zS3E%K+3F$_1ZV{rqIXUx-ZPJ%o_^e3 zuEJ#|%xL~I+!$Z8XM5mBU>2$%)1+lWu7tGlzJvr;wJ9DgPhhO>Fsl`w+iB`vT2LLX zOy}Ro0b{Poe2Oa;o!A%aZYf1kffW9Z-blf$FsMj-&bwK0(H@w0wJYzMG_t~xzT@3+ae74Hc-JYP~RuWoO})YTU&wfjaA zXlgm_HyY(r!*!Gs@i2}K=>B@VX@xZ{!Pd0iBk1@Ads|&cUBsr0p}9-ov*XC1&q|C; zeyxvfwq48|G1<&13 zZo@k&#qD9Z8mqfl)%kbhTO}0ipC`p_CXwyNjy%R5?afO%Z1#0N9Al7388-dKbGi

PthAO!sRyPrc`bVh~ohiA20AF!Zx+xbuaebG~-1 z_uKO$3AU@vAJP}T^$kW9Yjz1nyOzS=9(vkOcU^e;rJltq>b^ZzX zhb;O2l`LB!t-roRwPY&?W6^lc%Nqg7vR4yA$P03AMtACZXJ@}+_bkgnt zz|K{wQKoS=pg`i4YsW{5irVLUj3qN7GyeRrR&E!G$8bV~iD{(p?Hg^Gv~xJ3(qa{t zj&GG|n2ai=SIOoMaTxdl#kw$Eu+g(9HisQRN$FLWwUI>mm5i-pxH1NjNiWE&@Za?B ztj$`H{3`+;YU;F3as>@fxOzP6&$68u{ihgDH0oCyWYAxK0lN%jT*VmMSC$Y8a|Kgp z<+M3ppaA>K4HK^Db8Gi{2j)QerAg^k;1A)u&{X@$SSTBa?oKo=BX>UJ4a{cf3*4Tf zL_AR&mO3yK2!+Q`urmIZs~_2M1SQjq6lui_Yv;?pqt&Dv;=enbJ%3@{FwDBI5tL0e zHss9OaeL-14T|uV}E8&e;~D(uDaasKR|H7qsa5)w5&kM<{AR^6AnP18*Q0sI~9Po}T#gI%wr6NFs3 z0HbdQnCM!gJr%UQY&y{uz}-{L`e{wnvIW^uziO zp6GVxb?Zp5t*3Y|A`rhIQZJ_-jS_qnmpj7149yh(1`A2%O*E<}QfF7{4RI$LL zu*R)h(GnHWe&@1sFu3y*<}hR3(mgK&Vu;S2xkVme9_aX$pU|!l)uxa`cQ}5=s-I=; z`$k#=@(bDwdrU~O>fDV&?-d48_tH`_Aok*oL!<;bE)B@fQW{-ve z*QZlm)lPi~tgNikG71NP;;;3x=5TkklAvrV+4dX%9TL*}yLqCb?}9HfMu1jq;t&ab zeNb)G(?-E{e*Uyq;(Z_7mC1&mGYW1Man1EY@Zj%ylp~ z>!(Auxf-S_ZFR7h>4)H3yhJnek z{YO4UKeO{_BKjwL-{M42salEkJ3G-*smBXnHcQSL78d>oEdoIDBIfrDOsdfN9+Qks z0~-i!Ny~M^`tgwZG1YEFaLgSM=wtb^xFCojpi?j~MBN@vvQ!%ekOsUtwLW@gqBFVw z^$$UqcbEU^mJ;WXJnG;b0yOs6*GtnaG3&81^fX*74 z0*bs`0Jq0H)NL^YIf8P^&B-pqDRFsb@EGb3V1HQG&K&8MWKUy zl)LMpyR(Hzed5U?7$iDiit9M6z zo33Fyu^X-rGYkz5x64gC*{w|*%Y!^$|F%$!7;-mbW;k?LaN{r?x{D+^Pj3E&27A)R z^8*W2aa7ExDo@_lzbI^=BVYGtG@gNgk*r`q+ozSt-R51vo6%g1b;p_j3_tEIEi#dz zlh1{O9xHl}4E&5VA`CD}rLM=pIqF-`=OQv5r#J2oA3mHxY`CcUWG&>S2m1TXv?>-U z69(3oq6RB*h=`W3>^M0uBUKCENBV1x0}YH6HXHY(Y6~;7YX5WI#VY-_t&$!_Pd)g->2&#_^|awD+jXdls|iWDu6h(uW>#!u~muj_PsWRrnRO5amAFb zt(Cm^9W?iKgM@Bo<+63bj0K}3atr04O9rT)*{s^*KI`=cRUkiHnayww=JlDbqu*O_ zp3H5lMO~4CVCvO>Ss(!bw&0^@KMK?NgQtgOl*r(aP<{B}Ke4m_iJ_7I_tJ2x*Xf%I zv}bC4S7yt#?dC#JmxA68r4_Y2lLHE@->pv$8WBexAOU0uc|?;1JO$y7CApj?S!ATj zQU6yp4JT7ZVvYMeOUB0J(pfSus@x_<4V?E?0KV$B&Tk<_tH-K;SKTIRaIzeJQRQK| zn8CC1gI%^cB-s8xg^uqa)TiA5E|A>b{xu;Xhf{;H3g)1xK3+zZ6@ z#jy(=LP|k*fnQ^_PtSf5mPd!2ud10WD^5~KV?%;cPmAQZA*J8 zG&P_LF?55uj~JH{=gSrR!YzD_HGm3gvK%k7-1@@EsNfO}T)gaKT)Cs=a7JwGcG4FB zAOLNRhd8kn=Y0YpV^~0NHScS^|F6J>VSJaE20A(;e7l1+7WdYL6DbNc{|2rfAIFrH z1o-*YZVa`zh;eZ_#yevSlOahBn7MQjxJ+5@=c^oT3jsT%_R}|3W9i~8UZxp&Y(3j0t^^gQt2UKt9Rp`L6QbY^hha>5VhoX8#jY`-9~DVE-?wSu+oXr&WVMZL zZABbhPPO9REO-8zgDUcwyZCK?Dhz1DRMixPeo_gWyBuU^GGivd^7D1}$1Ewe#t`^- zIhL$CO*~giRK={WzVQ2ZJ2sm})wq|*Do~dmnB0v!G8n+knQZ}KlRSe2L5!-(= z%Fb!|ku>7Y#od;C^DQSX69R3SgZl8j`F$qSsN~x_?9slmhhNz0>-$WHL&3q37>-TN z{G6aTJUsj#1aDv2MN`O{rXUBnCac@+CWG?L{k?o-_+;C~O@?+|i*3!`Ien>4BF~op z*rO06Hj&)z`kzEzetwC1QR69%q$Bh3_wPX-x(1Az3<(Vr7W|fR{iuN zseQR)y)fi6@P+8QWW^Kz<+c<0$2k75KJUPLDDfuBQe@BGf`;8NX0Xlg}AI;!yJ1|)x2Y^{o%gJ z_2J!XsLS`M$;Xie)8=&YuBsWU@icC&dLweld>Wuq)(2eh0Ik>~E+bHb^>-8Vx)tFu z6DOOev-9%}8VZU$Q6Lmu)(;jdbl2Ox5@5hDx4TS2syV`&hk8~1ej4Ql63LTHPJo-@ zx8&yu-DZWCtLy8Yxw*Ntq@>1h-^n)%;PHWD-ukL;cRwg{P-I8B}l$tx3_Ek@(dltO9Lzfu4(wPN1bb=RR|XkrZ6-35Q0+H+Kp zAeQ?G z^x~~}!h%h&!#k0X!{(rQq99V|?H9^u6*em^ZK_3NCc(B zVg=cudugb~gw?M|f~757nTznGD0M)nuLMS^rq81+r+{OU(uX7>)~C4%_F_PEe^U6# z^+wst@bEXI8rn>$GXtowWwdB%EtgdXVH0uh!p1`|CXYw24Ib#ng4m`l4!ag3la@35 zAJOWy&J(*&dHQ8BVBC*2tg8Jx@JK0AodR(X)w{Yp>kMYUUt%ZMM(8;B&KiPC6R)y2sX4ub9cph+Ql*eKthxE9U= zcvgzQE>mn?qd0_|)+vXO3QjtNkf2WR>wc^LNIcaXqkenyA^_0GiiSH(p=&c9s5(8z z^YiTOuF%@153Q!h!VGbocLW_bh* z;%%!|*0#H!vV*;*9xLo6pzCNfZ71iw3tS7+z&{T`?v{<)ySB1HH9uN(MXq9w4tY|4A9)fT2$wT+#@<0Ip8xMwW;_%vV=T12oE3|Bb3s#@wM9YOu;oM2McV zlRd<0pCZI9pwZO4^1wU7fmr9;wM^KpG>kW0Jin~Nt{VU`=hix7?bVas>8zC{>`#*( z-j%9h-}{&bQH+=d!gX^2amNJa<~vzx#;HcCKp5hCAv9z<1=swYyS6+@9JN2<#mm)g zXK*v^uesz@p3A+4W5ws*9SnFwiXc( zfQEF{hjq3H%H&xxP6MkIrNbN37C>Uucr360y}AtV_^VAefOI&j?p~V};dBTLi#HJf zv&Hj)kgK49_Zt2TS@+>Zt+3~(*gvu*gIIwK*%-OLJLQ=zXwP%y8a7G0OWJZUZ*On4 zi**>tXfPG%C$L;G`x(!(0~Rf4xIn4*goNT-t<>Tg2HfLrO{4=-4dLlxxjDf7_|>aO zr}$s#IcygGdWK{DEh=z%nkMf|iZV#+i1RMHga#X z^wl8miA1AxrdWVZMO8%&F;c{lY?`51_VEgQyRIGhtSqpki(!<_Sx+$4Mb`Ul7TbY2 zx^Y3n5;IjL@cT3|A*WEMVzHe3#IVi%1A$iHw?w))OtNhC+|3>nF|y5;bQ%&{7TtA1 z*jL0Sw*Pv8r;CX#b(XgP@r+GN8_n8iTd8}U%=>3Pnq=OSClOtD%0m$<>*?dg(cR%z zP>Vh?H^(*tVfO5>(FDYRuSz+fpm(sy@9Bl*^E8^fM$Jo6Bduli#yBJ-rsWhP{1_{RThb=Td}1&V_P84CAVFz zN46 z0EFE{sZ9%rg3PB&mHB`#6kx2evD28e>ZNiqNS>}xrxf~|MzIL!hoHW zKqd)=A3V<1J}qcjp*H&GYjh*=FRP#W6h~rd` z%1iHSLA@>6BnBaKV7^o|5EBtGF&I!HmW-v=jmfLukugne3KZfV*hrA{U~jbT*?~x8 zpvf|vHsMj*uGMCNCg5{VK}p#^X2p(hRAXWPjIwL8JF3^}DMJ-wbclL5Wf>}+3Gj!x zB1P4F$te1@FPYjM=)1>n5tBwjOliMt3XC>jm_HWu83kC^aUE->-b%*cLIO7bN3`Amo z_yDv;`;3?|v9O%a0dvUKVc)Ec9c!v}yx5T%pb-f9LIr?wWO?!CWvkVcMW*r+|SXfL_g})BS`C=98H59g&s8tK$i~49zbfz}uP3IA^afZ@dQ@tD;+- zARR2RPdc%!U~)fHhGhO9YhNRy`(Yz9b_wqPBKatE2|UaZRRmtE)OUl6?jQBsD&N=V zTxx;a6QTVS4PT&h0^{ zIA3xqP&s_zL8prq`;i~nmu)f{8G$u&*atK0^5yX0jAJ_H?Q00 ztzSDWq(3M}JYSz&=j)_yD5m@R@?DQ-BZP#*@D2aU7ERXMuO(S8lo~S=YIQi{!Xerm zA4o+LxBPDWvU)A0u5^zx59ci<(0|@bz!iw~KqPFql7pZ#Kqs+vAeNET1=!D!7$z>| z>!~7!xGk8D2V~s=PbVzFKCxv9xSud?#KHHU zab+Mxc3=Orm?@H!i$7omJ&2CE+}(nS8R&{b#nw%Ub&?c5HEGI%mVYIk=bvax3AhBX;cj$7+-jdbJNW=w@-xYgeN?{ zWCH!WH<|aj+kQP=SLhHxnjia$DlfEIX?0vGHG?9w<>)Q#7nXFqw8e9-t*D%NKHWvU z)anh)JYMuz^$|w0gg2SjyOeiofQEoi`T+i;%JO>`l+2J^hl1jJyP(;=*+AD&x|Ace zWM2-L{j3+t5-_U@HX6S#HGb0J-EikJEVGxC*e77yKhM;+VPj*hL-~dn2A}Y!SRe(+ z!2=FmkkN^BoxCBy9ri0GGT%LO74Rup)RD=eqxFlgqo)TrORj&jGlsOaV0mul^rJ6EQu~Pmn+=+g-5Za)HE}AX*z-q^pIWYg-2X{FziGuL-PCFjp6Y5Qz=K;Szn_Y5p4r^-GzcmsjU0Y?? z7X(muzJKROh4W-UqQREOb15~Dt%u_g&pO(Qaxxz>luM#|eek)&a@t3knVGTG>WcpH1Kz^-`rU^YthtsCj52=<3-R10=p*Q-P(rM%>snC?IKhv^YFB*xGZCK=O-GoLHYS+h+>Acy8=AC>aiy)m_-Ui}O(mfD4Y(OV zp>Bq=KEfpW)$4;@iIe85JnW<(?W2_a{%SO65YWC25j+o8EKBr@&|zR!ULma)JPttVV|WZplP+sLM;=tHL$1uKcx^8~NewX;&h32m%dpWZd^d zm>diO@}(LdfeHSl_pR^^dmo0|?}gpR{nDbhALHt3 zn3OqIv*9bBL$<H{pEtUdjnx99YVf*{4(-1 z!~PZGvbzrnrW+gSYm)fkVh`Q5!xM_ws#wgCQL+uT2nf0nZ z#UVYNs5fUwVb1}II%7=SxENK_&a*~fX23=B?pG`ll8I@N5T1YwVqM5`RwC7%#pUKc zK7*DP0z)*%6pbMOGJ9+^T^0e)WWdS7lz~wvVAWVTDb^@fA|K+&B;@-3PvbLyKzm;S zsSGjRrzgr{B8{u7)B`@tZiCl`V*wDx=fE6xGRMH35SWC7qd(nFnR4X4ZFitBtr%l9 zt6;}4#@FgpqlF5tJw233m|I}SJdl;5T7`0(nb4? z!7MnDCjpDCM?2!81)X1Qlq$UIw@iJ^K!8{GF z;2u?j4*}Y9(1@`jm}X@LIMEO0NbaWJ-E(8&;^L}PQ}L`E0JbC$cQq8$hk$WVz^HFE zT@Ca7`}h8#A#6gzZ(3SfMu0;|QA$#hluLWOtJz7pQon;xIm}nhm+7NFGzv*3X#sHd z+XK=mstdP*17Nc95Bd3?C0N7QdZ{w=Y@;Kr!{;G3DM_+|x8eI@J5O%zD*tBMLfLsP zD^x?|UX4;)GQIz6DTWw0v%IJLP=^u1}Tq^tjN0Znd#!&43S3CL&cxJ9UmW`0U~MC>5|Lcu<{G~k>Zm5#??(L3G4T|6>jglUw?IF zb)f~pCoy<%slj;ms&vWeg*ilAMh5Wf1M+@VjKMbE1WSxzE>__Orsr zKH^`_Y0J;EB1&X{lmH$RTE}PIXg2ULrNVRECDaDgpWsFmd$gh%aREPA@C?3MW}DMli5!j+9%cm1X?!u=~{cFn(t0ZUor0 zpJ$#NarU||GcWvVUr#yfjlC?y4`weC^X!3JhrVZa+A@KAe}21tjM@Sb>W4{w$Nscn z2!)KlOvUG2qMW5ylupwR^{{gL{VH=r6e!)#4f3N!!n3U+?bNy=FpPH~LEEdz(b3Tv z;o;#BH+Nev;7XGT{{4D1LH)q-9n_~vJ%gV_B7Gvv$ap&dgE*KRE#8=ol|CFuUY`U9 zox*TS3oZAlSKn_ca%HOXgL7WK4{AC7_Ur_1i(LHRyu?Qyhq_Y^J^6vNdwzs&>*z$Q}u*o!O|1G1NEj>4f7~9o{qMFpLuh zQ6s#NRe_I&*OIKv62^klk%MK%L8UHP;^ssxGRij}is9RYcD9CBTMqzrRRcsEXl`ul(M646ii+t_prSaRhjkDgeyVF6o&scbm3IFuuIQ{Do&-(rFU&I~bWZbGt z;!gj5_hUE1(!G`Z+Iq+2=X_^Om&M%gm zoP0}drcXUHMs4t#^D5gVeO=wIb6hvJ$jpY&fa>)JhA?lxBNTRTKE0%5ILsF5{pZnR z;rhk?BJagT*%)C&>l)2-TpU$>j<{oV4L96gIEk6Z;CIn3lQ7*@Gio=XGtZ zyME@zwp!Hh#o~`!H|^hGnIIY?E^Y= zoj2Wmc%s8CzT)aF-owe1k-R7fxq3K8I6*Kdr+LPx{@E!vWXtpEV^?%>S!QiI9`Bj_ zLB9~NJ6Xdan(hUKg^@bi+G=F>GsGQ{t5+K=NndA#fY$KrijR2fVc>|P!gY$Q4vkFJ_C-Ua~3`8|N8K?$CVUjXafmg0)V1 z7n6IsW586dIvZir)^~=|oaTAyg%jb0_U10Wxb6?gzS$oo-xR|9CPOD4FJSyP1Uup< zGs>ZahP~l~1L}(ticsib8#o))$dSugC1we(mhOg)x3#r5Mr<2O9RvCaD~i?#TAPE1 zU*0VC_xHP80!^Le7?$wWQ2o$f8I-uZO2@{!Ll%bN1)Eo4j`wH@G-9Gj2QE^X3B&u7 zKIrIl}^KOhBPquBy1Fqo}zz8yCDM$(0~rS1!cj6diU zedur--qO>q=q~g6s_<2IKPoVsH5u;s*@WcYlv>-$fXW#SFc1DFw>Z-%+-B);N$GYf zn4$P~#U9peH>jIfNeQjx$u$tlxa}t+b*!H!yDbP00S5%Ziq@ z5(0|pmG282cc~EqCz2xLX&&R8&=(ZewU>rSrPd+CBI1BZHVtvFh?y+z=OjIZi>Q;-vB*4y1a*Hh8y&?Kh>b@Kt zRz%>Hs53{Cn;iu3*!B90`y|b7kEOyX$L_UAMRK3c(vMDX5jJ-9hL{dF??i;=yVHHk z*(<(+uieeA=JTj#w9g|S+QI) zH$+9h`b>Yd$9XoWHTXTkc!X=Vwjz~6kpqu9p6svx1L>K4aVsKQ_QIP4bv8qG)`axY z(a}g?A>QuH7qHXsg;&cVY&M-0JG@mvd67#R7HKI zUEM)LvYbUruzdD(oMUmA2&vu{ffu}1w12vcQb|;*oOK(&&6QR{*`UxGwa6|k8yie9 z>41v@Nv$>9PTx`(M`9=F$`-%gjoJ}LTP#`{+(LAWmo2qS)jyVAilEf3f4}2ZN!zD1 zV(9zsQteS{(PS1j@S?EFYdxD}BOAx!ybXh9_K*Az3=-KGd^LKD8Ox)jruE6*UHu7H zc+4}C32;Pzsz^|2Rnw%)8=LR4MbArF&xdd`_B1hcU@ zrAk+%rl)_cY`vAPg5cB1Ppkzpc>nZeq;f!#^GL-8e5BmM_DyV@{wK>%&y$9oPM>&( z$p|?f|KjS{Olm_UFm0hSE#zl|1wf{iK&c4`gvD@(%ek*xiNVTy_E_LzJHQQLfeq#X zo($EA555@$PRSVHA7O^WJV-)hT^dY{%kVPsJBN4)K*XTjN6_5JTVtr81gUqP!dFCO z5tFJm!5cUi*TGuD>|{*3)VYUZXAk^3(+0;KWZmgF!9IqSg(*v!4UjX3ny+_X1Wxj@q3umI|`~pzq!>DR(nAgI5rfa+5fP zBS~T9&pX=7T8imYeY&d7<==)?A&8WVYL0t8;@I5-@2XSKee!S=scVrQXlS~>T*a~kI6+_zczbAgu!Ts1RG zbDb6HN1s{NRQl(JSF46V$##!-uX$!WK1!E0<^DWm9P7=(nuC~@M7mV*cQBNywh+h#Ak@s)8nIiYte;@C+ z8#1etl$n?2reu`>mu4<Jykms7XtGU37>9m zQ`6V2KU-wf;*dYUUF$Z>H~~=ATi0C=-uE@{M#w)kkbk^cv^rj&Y+R_j@vtbKyf&4s zUgN&dHHARU_Fa0zD}Of-dza$Gqy*H4Q5P|SzJKv;1xlNuN=E{p@yI=02nbph&G@&jAw6-+ z&>c4TAH0@w-8lQ`T$s% zP9QoET?a}EO@O6PAS^wd`%4_(txlo`c;{n3s(7lTh_J%T^hIvDvW$-4IO70ixa)75o_|LvNJv<;t2Un7g7C-w(`=3YU^e`?M0xgI54{Cl~ zz1fqh9JI4O_hLuH4h6pk!zjEJGrOv+93$O)cpS-a_+R-{@GWB$OMiRWwPO->vg@qCYFDO=~IjME?8P8ewFSn=_% z65S2Fw)cmIXB|#_r}DnO;l*60PNgi{~{`dN7Z9G7kC;4!4wbT zw=MzFSQ^K-6al%Y2i6CGa&()VD}3BHIRMiF@f=C$$eq{4Hw8R!YOH(Kbu^Ro zSlVEKXy{Ez!q3|&T!B$Kkq9v_Ali#J@*I0Yi2a{nNMfmYXXTrM3<6Ad|xw?Pzp{U%9=} zz4?j)TBi8gHDE>}#Zwuh=;iIE1>m*M^npzYHQ5KuTYYN?A~jEY6&wKZggpv{O)AoR zz$Z~B?ji|HFu|0fGGyX;QW9IhLetJ)sahakz!q^h*Yk2?mZcCJBDYCt@w8Yw_t3cTmRuWF% zt@F<+HBST~EWquq30giLITumzj{pQH<4rjQmK%uH5ceAM%D4h3(=;>NW1<>-o^RyI z2EsUq)aYI=s$yDv`d7Ms-u~YSI2m zq2gqb;i*&!BeM#4;TNrrkIz%3tv{T(AagS=A;AnJ?y#;-RbWAOz;JM9>11QTgZBrW zB2Ex(BaXYK;7`TZ-w2Nisy$Ak!K$pg`T~C3DKHQN(nBy0_q?hND{P#+y#6{46RCzD zkN%3)C2UIFCNQwL_?T}sz2G~s&M(9xYy*=pOXx^ie zzef=lY#gT!f2^nlrikG!@9QuLlLpe$*0mKC^MO ziar0!n{TE=fndFrT2x>7)Em?5l#k9kE|Wy9>SSoHUAwmYVG5zHel!sfEn zs+03ugr{p&#hO`;!vKzc{;AGoFL{PMWNQ&yzqhw%U2gGdK8{n;a__q9542Awkr>vv z&B4QS7fd}#2fgj>S`*l&eF@9!?lBbaeg06;tw|S^l&J9taFiwON`TJlmJd77lPk`d zhe>JK0si?Of5)y7kKP?!q#3YBgysxV S4?$KO&0TE+t#VDfi2njI8OJ67 literal 0 HcmV?d00001 diff --git a/doc/index.rst b/doc/index.rst index 439e70dd94758..a04d529121de3 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -329,7 +329,7 @@ - + diff --git a/doc/themes/scikit-learn/static/img/sloan_logo.jpg b/doc/themes/scikit-learn/static/img/sloan_logo.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ea714312753a294546013fbea1783c5445ac25a2 GIT binary patch literal 96721 zcmeFa2V4_N*D$`J3L@AWB3KZW8ahb1h=7QINCy!y1jLX82myr~a;;oNMNv>hK&jFN zqzk!7~%yn82E!=t08`F9D)wQ z#J|Q5!#01$;Q=^s2!zmDaQeZNx$&hSelgSw;`>}S3H&~TO!dHz%)&3f2JYB{NU1%C zu>M%wA*_#-p|!GPf zWXZ}!BHZKi&vfF)5Vm0c`~{*57K(~4Tq-Imx|9=$E}bs2=-(m0{tH?(54s6m<%O+> zcoxBU7s1%wpxMIghcJD>B|nUN0ERgSFHjUdegVOGLc;TbI((lAgLrtqWG;kYd@x=f zJ|2ESfqDGAA}Szr5ij4m#c+NDE4L-P$h z%l8PJ+VX_F6PtEiwsALlkm#?zqRI9Z@|+HMOX^ZD4#6#KQx)<>T@sD8R4AWnmqBF(2T-cl{E6B*;0Qj3fgJ&aM9^2?f^dXtYxRaG;H3CV2e{a8y$0IfI2b-zhgOOCEsg5x&0 z!x~A1kp8Zm#;$;VHgvLALi=Up z*_xmf{f>0P+er)cDt~HL_Sw-6W22&OhO@p2gP(Mw4W7@+X=~fe@~FjKD)jV1nPsIt z48g0b(RHPDaC)gsVpdjBD$`p_Bp^%uYDwygfg5#Ck)*!xl+r*qdVpweqGV-q`5wxV zDF4nNOLAsPP;lyLP(s6Q`K`egi_Zd-Bm7+uR}UI(fO`S3=ykBttqE6 z84b_;ltKjK=@~qS+LUpmdn9G#LJ^~dac&2r zk~~TosZ11KRnnnrB85;{*3eOMD$0?N+$GOkCgL$k+QEhn4txl?#dKgiF)9kFE4$g> z^3l0+_0yLlhkS^M*4g(TC->xhq!G1P(NP;yi%gGZyk#)yt6eV=2L!k0#0cpnqhZz0 z$Ah9ZdrhVkyL^V&km>eeRyOnSV6Vgbx^8VsuzqY+o;XfseCTShgkD!VeGAYpQ;Sm5 zjS*KOOgE;op8?%=BsvDM6v?BP0{1=+UB2LM^SYE8mnpKmFY|2!8#1v^JG{$}RYvf` zk{PlOA9!{K)w_%AJ23yTs9uZ$!F!y9k55dl`L#)9_dpBqwMl!9IFIhIfP$()BM`4zb0jU+7-tMl6Ev^sH=M#U#K=Z=W)f1a|J4K*Y5?ww|x ziB`ihm&ntYLu!>Nw4TKCSLv5?`noSQqk?@h?&Poft6g_np5In_X25>uG^Xg7@^$@K zrgKZKdp|9^oGe23uDFFW-InX?5*mKiiFjYTv^4gpy@zmB z^OT7VBP+ZaLy7bsz2$28v`5~v{i8amMI1fO2uY<}6j1kBsN^8{$0JJb)Mc~bl6-wP zcQ*8Le=Hk%qI#vB*h;)z)f4a1VeyD)?h_!{xwRselo+B=?Hp~Uk>3bzU!%qsbc>i4 zC)ZCy*wrH?s z%4D$?EE$s7TSqxktgDicn?rTk-ulWd23ePN&WkV+z`~O}ZpE-6TFyt5*ixo1Wm(bi za(BE_w0N#|;=9TlGCf{Y&54H6GIfIFUE51eO<3#ujLJw0g>f1-Ggh8e;4rZO(bLqg zoly=iYoxB!Xqj9+(C0$(Obw*IrA6%B{d?T4riwfMtnd(gx&7Gr6Q24L zND^Hqe$vYI<5l_RqUXaJy=#ZMt>GO<$7~s`gq!flMpufi$YI&BK70+NiVHV6ptOk*yN^7d?xVQ!eHV?#HKnoG8Xwve*P z_%VG!t$6n#ho!G2V^Hy6xu8bcpli658qY>?7lvFw*@QwlJ)1aX=Z`o>$BmtOaib!W4c&DZ zx9D?->cd6P(?4~caL6X7G{xqjVVZ3Mf)QA+!&G3RJ9c2a-0R@CE1w+qq4EJP7A zo`+;t_57_B8){4^rbk?7Lk^6OK~-p`n4}!S|K8ri)DpE`BaLqN;5$mJHncesjNAo9 z#6f+ArEN5k%s7PJeaAlB(4tC^cDz+7I9Ae}4L$W+yWkw97=PdZT{!(=HGhHq*f1{? zTOTl_jq5F~j<;g5p_I^;Y*HFCJV{>(Y1@)VNnm6=d06P>HtAtE_VPfVYkuP+oPOLC z?Mi1AI{F$NiSQ3em5Gsk;GVU2~>toa0QR5>#|Z?N){bQ`J4PGK#A;c56VKXbq~x#v&c^rFM+nB=RDxX!n! zl>BBk6iQ#A3{0Z}dc3%lib^WNHs_U9XMM*DXi?ps}dua~K%7eg^1Or_1m_h~{ zy5bm5G+{$chuDxE8>(_O9zY><>$UFvG5pV=LM zcNMn79!amCx*n3*U$lf_ywgM4_&_MqasT{%Z6P1^8q0A1AkHn;VJn`e*BHq< zF>%iuJ8(F$l{TCaMMV>>9$@6V?c(i@cllxi0v0ZFZ`C)5WQYhWGR0Z(6kWRfXzZnM zdJ&b0X(+4>hNJN=7WYuG)(h^2+Fv$`YI)mq;5D2@%Iocqx2Np24pbcSG^(Or>)!0- zr=pCR)Iu&$7w#4d91?SHv+Isy6b04x4h4mTQ&vdCHPXe0-mIdBgx{h!IbN(^2%yl}Tg-mE!w*B*IrVB7d`$9_wu@5U=^h}{G_FL4c* zNXwm!@(Txh(yqujG$!A*lbDKkmX9K~t}u$f)Z!Z-{;VR~{#+NO8hs(eikNJ^V8hze z44aUME^G%jnX#*bQC^YSui$yR+8R*%wDL7JC;1>FT;fDw7tVzhVzHizV+fM7%dZT5 zko7S3*l^02fyQByYxzeq$W^&fd-{VaQWK|0R?_PzF0R?lEzwPSxs4ZUJ~&J&;gp{D z8CAqJ2W+E~XtdX@)j_1eY|pg(iOA6S7QGI7zQ#s3Vm$!DP^X-R_T6@+Ra!3RzB_dX`(~g)D$f|n7hCXZ& zlvEgw&mssUB5qV|Q~018oV~bH7q&rm#e9$6yDZP!nLzHuu$MsIgD3p@6N4Rv7|)Lp^JPr+l0MQ-e|XH}ABVu9AwaLx8tH_eZgxp`<9EQQzENg+ffCHCbn| z6iWxl`!0l?Kh{_SS46ZG=oOJG9(k)FpQk{;~W`Z&Ed3RUz^8Z%-(aefrdU zWxNWbxBl#2?WOMpyIJWpye8MfD5Q5Xj7qH9nCiN1YM*U$$Q@cmP9yJW3CAmCG9t08 z%bwUQ!#IMaAyeLhuB$lU$dDeCt!1qE)U)ZnWvhrs?&X4xBe#0$1u5@(T(Lz{G6~}( z)=-XV3Na(#f^DMFKs)hX}+2Ys6v?|)v>P#)Sv6NQX#tM{XA0nuGyv%FWj4G(r zAeV;|(0Nq_y^pgSs0zdtDKruBn2@ys>bm5`sDkKK&h0xp-2%gra@gB7 zjRDd(kN027mz8*PpEgob(Mxj$&#K1xF3#gCwQ{vl;GX$lx+Q!>R=*iMIwJho(8SVH zjku5}^!qo9S8LM+%8VzonW#l&>3KDIxp&5b+7$>PF{|Fv7BRQ!;6hkMY$(F9{OIVz zhly$N1(F@nM$Olf&8t{ zqpbI!&!G!CiqIvB;N{+SqSG$SYG&a1T9uRDVa|Q zDf)PpAVwXPG+NQ&QuOA{o8>odJTGDtIo?61UHDzCBGx>hv%M-Q^-ngm;*)zH6UB%} zyez1Anh|&;x9ET_cn(KQBUKmk>b{?#8|uSz{+WEVPyA3 zvQ$HI-^3cJ3Cq0jrLv)JqafA^dU=uB-3ze+5-!_&5}tN@7-?u$)Qo0l8l`dw&`qT8>Su8cOac9g7-avn9<(Zz1QlOY^cow_dS zG?_LU%!=Ke-=1;sfP3F7MBnLl{$=(KlKCIcd7~WH)+bdFuRXNPjCQmKc57TR#sV*4 zvi?DJYbMhlC7j4>W1g+jtNpz;MpwU;*tjvZ+~8^T!JxOY2WXY!+lSzA=v!`*iPE1*3}%t*%Y3*SHNjFtIf0%*XVMO0NYdWuoIl zonhqB1%cqAzQ^|h8)IpCa1uQvzEvqYws~~L9_B{BHnC*eOonY|Lp>rgH8%=g8*JGA z&Z)n6r_`WR|CK8f=vXsqLt85$ULlG`%uGT*dLly$)LVC@bnG;vHl&JmVm-+zXVR1^ z-}~N(Fh+2!2ul*#-Q|@P`VofKsK^<1CDbgLLNJXf-5NsO#H3?kww8x#A7UbpAu44b zKaza0wkEEQmQQomr3dLc8N z9ag9`2Z2`wn6-x5dMX_Ox^`-8=s=Z!DEvZqNw-1v_{L;b!{y0B;^;Pg&^MfocY5%o zEP*ausN3~E>+}tgR#Op0Pl`NM!`8JtYrMnuoD(tE1(Tj zwi#r-C_aEtJ%cL0Uq{_mp0*|4sVt?R=|FMpD7N?X=tmTk>+~sA^t*=WcM^7;aCOcI zq_R@8So51LpK05-i4|ByUXB=zJpQaX&^&JdFOK%P$cC&{6p8I!o=qKL_}ks$LxSdm zy?aa7dfMD=HoEqSruOneelI0h_e#N7aE#^XIwa5o%n8)tj3i{?a1A!4kc`nYbn5Rw zB*6fl=7a-!vPIe(pu#%3GwgmH)gJ$|L(a&zoAOk)K*jdRlG2OUX9gq7j?>i@12Tx{^&18=q0Uy4Uay(mA(P;m% zC)U2DD?T5aM#4U0YJ5ZqQh)Cwf)|3HQVkB#`zF1?jTX?_O|(`nY^tW;dpcB=R|8M4 zLN8O#+0&^GZ%CMQr8Ebp_o9KmW+f%*yDBNY3rWClRl1$C`ZOaT$W~SD1dUcvSD+dc zGUzy(lRB!O5U5|$s835Lkr^&a3A-7Ur(CVd(#j*d(S`0#UFfXgv?yj=wau#hiIn!I zO#OFfV+pwBozd74ZIjzo_d=2W&y4#+(KwgTvd)N0K?Hr~5>oA8Ji~Dh}pgH#mq@Xm4|!uHonSh2zXl;v@5u?r%M)nPPy%m{`mD9 zEs?QqNBzNU@DPUWHDf~$>{96mtDWhnq|Ol+kLBB9->cc#X5VV@3G$HrsC{aS@Q@pk2S0$x!XvEYpu-8yF?QA1L}ep9$V}5-f-&=+%*t=$^OcEkRMl z`_HniSn8!!GK1PhhQ5z67ii2~VysGcYg7yy!YUfnQ9rFzFO2Ew!*vdco7;BaW5g3j zXugj4thDTH1edrt`nr~P@h58;`~BeFFmlI*>fYkD!yYQ=+}{3;sTV4-`x*4hM%}K0 zWA@~GCWY@q-AkX>IA@s`wIE&7>}kCWNsESCjOvfY<*x+RViW@89j^rxP6fx5W;nPn zw0`xb&tuQ?8_wc*vDm0PH73_JrhQ+fwuS)SkxXw#Fp`nXoz2dQRGeOZLXs)DhF8Ipx?{SQsy%rHv) zWK}vHQ(zcm9GpS5sknbWph`m~RkN6Z>HO=d?<&-I$Qw*gMBE8#h28QMt8MvX_WQ2QS&2$Y`DEmF64D-n8ttHlVCbST&sewc_V)2j@H<%nYMBL5 zuLN?*9nPl;uuHu=Mm;|U5LB=95j@6;=k+truQ|KlvXUM@b#|Ao0UJ8ZhSs>Koo^d< z@ND^T{UdmtQf9j`BA!id$q5kr^!kp3IN66u+6ubc5K35bO+BN1`1uV?d^5aUILv5O zhob^QQVxBA>2y}1jD`(No_f`NoTY8RyTV(a0mQva6|CsUBACt1xDS2^WKCo4gEyVq+hJn8*}8xV+7u zl<8uh9QAzc^*y|fdRqw26v?V>J*#c4h#ka5m-X~c?um#XoMnAbfGkZ|lc8oknS;WJ0`wl0iXWZRW$avqiBkb@`IIThu--&|WAqHadm<=yo3 z^&kDU*M~ce8Syl9T5n=0snMCCCJTQ*l4*gVJS(SVKK}uon`3%Wb@pZ(3i6 zgz;pHM(0Q3Xt4hHMmEGU%p5KA*zX~&E8nsIHnqgY?b3eK+jTj)h5N_%jK+qR7B<>~ z&d+k#d2InO>r_}hL;90XPc*FzN^Pay&AsctbwOcaOEt zEAzIg#Z}i;4ift>6lw=EDy03_1U4EPMXwxH=pDPUYwbu8A>Gl!uAw>PbUYZzGMY|F z7!HZ*@1?lNY)Dp4anQbCXT-a7e_Ih+GcNRFw0hBo=EswkpTzPR`%{PJb(;(=_cW-e zY*Buv;#+g$y=7+Y?$Y*s0}Qq1%bp$i%+-bPaQYp&dznPwr6O zlibsJZ%xaF(ZQ^E5^Y<@Fg?P$hG`d<%u=biLzgEdR(f_Ayz@~*5JyjM-<8wy`@S34 z+Y-T9#h0R6WS^E4)~7aRgXgx%6=SKK-k}wkkyWlcb*-sUc7M5GgjV5?r9_5`gfk-v z)bW;{JN0FB)O`4B$AJ5GHkIL7_JdhCQeOb0_kBQC9`7;H#iA$I&fAZrdo?yHW{%*63bLY-oeeG`VK$x&)s5Rn?BN1)(ow~bX7-EhiMYM{7Lb|~z0Vt=-C&wl*jKb+zWgxLH`1AE$6!M-Q%^B8owu3S zio=Npm5FJz%o^9LVZM4N9ET*vt?YEMH8iS+NqT=?4Z*31VP{u&X3KNzClcfKv(>fE z4y}W&MY3D6c<*OocA~%wqFVh7$w5Un)Lu(sQo*-fG&2OetJNV+5_%{2CA;GL1ldsR z6p?vGe}eA{g;-m0Y_i04QX-lSm79K-GSe7)`{_O#8X$rCAY2hF1XY(dZ~x)&cse14d)YqT)#Y8J9@?d`Xn>I{NxVqg9?=}@~a ze4(S={QZx+u^q8z@R0~Mbh@@kY)W2jV@jmt-XOcp686-z>V%r>@@YjawV5alp8n;X zZ`*P^Jo z_O{P5LIlZE*dpU?L#gB^Vp76kDU%N4^>xLQg;>`1SFE%GHss7IdBgNBPaPv#Fa<48 zp+^FfAL~&mg|%hG#@8OUtTWQOkwp8!3~C9Z%z3QEr~5RT*OlNAqtG?^78iTi%#r(M zw2f|C$2`io-H=%>)11Dl1-_|PRy3|}ZL(Nke_KfFBq1+ag+MdV=fFOdWQB@@+4gE?m>Owr`Wsd zRn0hR0wb>uo9`iCy@_Pk)@ElpA{xg4_Q`_CGtrhA6bb>)xHy`D((c_s?9d)l)ynZ| zR}^6^$Yn#(mz@cB-HC(hCWHFdo-mT>mlFwjwR&mIzCo6)g)8h7@=}_*GJVD{xmmqo z0fw;*CsNE4R-kw*ov`r=i5yoJ+7eW~^U(wP;m7wMH>7v&U)Sqa5dO59UWm@YH_^vA8^ltW2Ph{{*B#MXn)PEHd0hk)rH(RAObU>1ceI(LGG#s< zbvP=ykHCBBYO^@er0lj;w1@GAa+mDXtr>Yip%#z4LTs-cWu3@wuL@?OR0f&gzBQNL z^mtOy3}MnLdREDGoA=Y|`<0I$Cg>R&%=tv^nrC6ElRdYGfw&h4>Tkuv7wb*T&*ib@)`CY^t`(rK6gYJy+ zwy|Jc+2w@t4axY%=HVyzDOZYg3!BHTQ;wjc+s7Qu$wi9uWf%2aKyAqzmgp(Uiy^&; zu=eeiO}$%Lia!v(26GwoJVGQ7|Axt9_N+=;Jaf=2kziL@F_mfjnyzp;`gniXosNso z&jvV_w0Eow^gwhCA@;TEf81ISMaJ8a8ESVzQt$n#i_+ik|4?!=A>zhq+6}>LSPMk> zA5|0?@HD$H@4dDHFMsr>y9ez?CGir{e^igw8xOD{H_J4p6$POjcYO$aOAD}eJ@n|w zQ?)%$uQ_%3`BhZfDxjT*k+$lk<6trZ=;i5{q-Fl~v5j7}nn$|TZyB|Ja)`AYX0%13 z<1V;V6k;q;nV#lpbTAu-xe1~85Pa~XT{$69%(Td#6uh;jT3c7{My804x26m>YwWU1 z_~R_n;Cn}^uDYVpQu@B#!)KK~3@2)5nn~KOzp*RRw<|<{_roNOk)xWem`Rhp4a7l7 z#+o3#W^)pOlXMd{B!;-il)zwT7~yEAjlld-H3mz_AD9;8s1 z4yTT}*y>E?Fh;_ToqX9$t^BL1wDNrHjThZlUw5FK$)%@NOR4HATUfF7DorVq@s}WsElx&1NZpbMLfJnEA#k#k3Ivb@9&_?CyY_j^%eZMMsi`At3JA)zCyq9w0| zH!IXq9SWJgRK>rR+4P}YaU-Sa(Z*xXwmwL0%8^_jzqhRo-+P!U{(%@>N!P2*Mp(6$ z@^+hRxOvj+6Z##4*L1~S{@qCb(Br4{&<-ECbD1YK@>Szhm9FcdvgUlnp+FZ(G{LQ0 zgIZ)!-N2|son|Gw*p`sXa|JSwcjr7w#L|k&8YyguG!%0Fj2Y9L@u;ODm$ZZ$x-+M# zz7~z4zCvDEi7s+;{5?44Jear>>rBKgndll$4s=#RBBgC27t$hRRAl#Rbq`vRdI#aXac`U49T~yIMsm}}(}NM;SdnL4q+T8^hv~%09z*escH|Ux1`>~zfrqwz<5>?Qtlu1s-8SS(_Nk&< z#GAC%pE)-#d8}1FFUo)+(CY5~N&}65%ZfhRKr4`qWcm)46LnlJq4Jap3+~baJD+Q` zzK4x%YpIBfZ6%=8y^$qk(xUh}H+@=^;!X!$ELER;p>Uuh`ef^Cy+HS7H7C=lfzo1h zJsYZAP#~>Sg(Wv+l@kXnLti;olqX%-jDk=*jS*1WZrRjAz;p6h0X8cs=JOvHO; zG}1GfdPexvIz~opbC8~$*#Za6lR@DW|JScy&)@&5JF;klT$fpXo;+$*%uckJ*maWZmKls`*h1f@sBxiO{-QzctOeCJOI1`LcOr&MsWSFY;y5Np_nYfEotw{{9%IEWrazuMC3o)0G-st$Ipc$%wmkC|O)jV}xV z_7oc$)MZ@{V7fCVJPZcw25s`w^SeE8!-4I(>jU!Fl@WKpwPy7Nk-(SV`?A@8GTyS3 z;I$qETSAiGA^HY3#OvX7m8BW)tUu9E3sBh5_$g_bA)>Y3o!5qZ`@#AI^VL7q20>6b$Q>*8}~Ae?OJo|5Wb_y z6yck(rXs4`J zG!gmuR$aFR!_PZ8E>hzqgWQdON}75?q>zL_pyzlwIijs%gU2=5!zbL^0 z6{K8piyx{YgJ7B8_q?HLDKShTmptE<9zVoCfsPnJG;f2kp%VcH_yw%``72BtLlT(| zgBOt3j_eLF+<-A}M&AstM~rk2ma*0qY)n1#@Gl0Lo)w;&{KA@UebZg6d3DAz?f#36 zZ)siOA)bRh%(9T0Gx;4u`^p+Jh_*o{){GY>!pww1lHXP04*S}(7tXyD`Hjg&IPqJ8lGNIylz9X30p_Utyb zwvhtF6cvpu)=RBFYJ|SJH7za-3#HbCoH6cLLSuvBt8@K@dNXu752D!B8{FX_d=kM{t5^`q?(hWFRc9ojep%Z4iUh*x1NTwjv_b0q5DgY5Z+!$jQ>}50P<|w z`fJ)~tosf(f8WhGXxIh25j9LHCystVm*Zpur@x{FPx4R!4{-DjY)$8L1tZ-K^j0e_r z+LHPA2CnbSQUmwHc$`1dodetpCO&^5F~hi>jp;N?YxGy7|89B<>3lH?IfDCQDstlg z^3mcSo5?9XgDOaIheRa>V48j#9RCGaLq+vx*lHT;-+{l`B%s!xGyTOP{n5yurCfha z{qJTzkmld*d=)iqRpsyO{9p3?GeiHIh5Cb~KSS?lDE_gXeUY`@lN=xcjxwo|W!`%V!GuYVe|C52xd{a6^B4(*D+) z{Z#S4bPJ$In)#aJpLzuaAGE?AEY9B=;pgQ8y7X`FI(~Tv|Mz>`Up0}Vt)CgexXF7X z{lSZ;?^yeFr}ek@9$%^b_)h7c8~^JXe$_3i$SY~cE5U8x8rrH_U@`xHs$2ZqgZqD1 zwQMnN|7S+@{|j{N7j6BAu5oTy|Jm!iUv>)rM~3xp{l?5|3ZVak>YuGDB_#!A@R;?@*!z_!2|m9CfJeUFSOn@DArCii98!_{g3+0FkC-Z z_oETUHE}uSt_}BH#SFH-zVY}~9e;1;7u)g2R{o00Pa3K7wT-_jIJ1dgD*k&5zkIs< zv58+=sH4dJAgU+K{dP;?e%C$1b?L-)w*iX!?uv0&bG?t@;QTbJf@MS}xU(+_Vrk1OaTilrT3998Pg?fYU6_ zA^eqd7`G&y3*8ARdU${wZx8}kSKxqshTMw2{uQFkg*$=p3yvBWBL5A-rTGb@A;<@~ zv-;`5JIKlYX%_pxlgFu>Zytv>#Az#NH_va==VvK$+>1e62VK+hSwnKV~3&&G^hBCyl{- zdxIa2;t3qY;xXES;!z1<&+o=#7* zvDfp^=X`FX1aRJ72qezVdp8aghkaKMKQ|A67lTufn=sumgSMcj9~STP9Y_Go7x47L zATf9}C?jHI&7lsylmZw?FL1;AV@;44q#x+#IJXdF13IqH$imae90(`J6zwSme*UN1 z&x`l-wlQ+B0abk#E=Ty^L(FlW$nPKmZr=VjZl2%4L=i|(4td1i6t~mHeD^^t)|*3c z-WTw9$U=v(enAG_UY=hhB{to7Fx_Gr3`z>RBR$;k-u@se6aYSH{REx!I-d*v9)12n zPeZIX)^CPz7fsXOZt^t)6oD+T7>=xk{INckcz+ynrs4>D14a59ROBGmA1Kt{;Pb&H zKJ+6x_x{_=sR$AQCrtkxH$Hu@aoqV$t1u52kAnfphJk=1wyWU038If7m;{Idhj}Z3 zySq6Dd2TbZ_Yv9oP)eM2YGW2^5z`m%{j=MbC5UZAaBk=-kgKH zIR|-j4)W$4-#_=&Hbr1>WmFup5TSi(h<>jus-d@dI z$=t^fdB|&LkRQ@I$ifB@ghFV!b3N}J(LRFqK_mU$q=2VB3{Lxq?s_h9Z4l>x71v8~ zQT$Q5>s1us3UJ_FO&Pe*lai9s@pJdk-n+y2b4ifWo#~1%P(=aj=cx$S($Z2?QdU$} zmIoN}xTC=F*%5gRZo@Q@FLZVwaojcFV7-tOhpro#;qR}zem$q6ug5ft?uf7T_<(up z+*;fbib!uH8hBO10dc?;zY+&!b=(o!9$eRbdR}PYJw)-a4-&Y}`Ud@62k=DzDxSdz zx0M_MKfwPLgW#HcpSiMwzlE)dU+c#JzkHymH9ty)~=)YBl3q#Xa-8GM zbcP4^bywEZR)fofo!*te(yJdZf1qKGb@%c(`V$)5z2Rrln1T5t4WK)~RfpTZMP(-L z|G>pCH~;nJK%*6Zre`?+y~uAW{nHDH<18jn1TM9(cs~SiOXKlPt2ufD#B6S^y#tHD zb2QU*haMj9<*se8U2}(#vVqceLoE$Be8&zgRk)I-y0Mz7mXWdAj_qI(-rvf7FI+&= z6a$t!V-UzMGz|^al(hgs@UO|Csi~}?VW6b3Lsiw#(AZFQ`jUTN?r$`AAi+{Qq`R>n z7R}XgA2&Z7lG8xl^

zQ*k*Bj!408>nUc&kg)rxo^yTBkTTCx&3RhGs=A< z`&Fpl8u|71{!iWa{$3vcpf8*pZZ|zsfI1wg_6&!Kob{J~gUx6HH_w4{Exhp@-;sK% zno68Q5zYxemz|maV_C<4QFdnjk7d6VwZ(Y(>j5{QTy}l{|5(ZbIJpc!?(p(N;y@WS zbvb1<4RC6LQ%g=+U0F_9Llv9=rl~5YrlJH+IEVwdriz@hmb#pZ5?oFNt_e;pIiTVo zUm3sv9StRL0$dGFS`FlbQ$sp!<7tF zc6?_1JI+3{{T~|vkVm}!K@&JRf7b*KRC|xt5u~@n4lmGGaF)&LDQl_eDE>5OhLpeM zJANyv0#}(?az@UNCAkZqtbyTxSVfoKhOx5L{Dmy#NG+0HUg?ro;`w^#pNnH9-ijzlI9X4o(R0L8!$E zH323FId!Q3UEzeBT2z1zaY9fQgxs<~gH++FAeR#YZb3K=(cs24IiVKd6NFqGEj19= zQ0BljfVP1U=%gxK3((|*oU&S)oU&S)92_l84vv-<7e@=ujdQ4Jaj9u(acb1!($wN` zqO1g0mjik$2XtLdT~0$zQ%*|`hz1UqgDcCyRpj8RazKv(5?n(LD7`Yk1-AmY6TnTP z2JRQ2?%>V<3cn4E9y@lZYpSRl8}86B)HG1qp{1?~->w8VR^PFGyBg4P?#2A>iv6dx z+~*M1AB#JL{ReC3Yxdvu{cDysSKELYfz$&k`!nzi9sHJUYG?>tJ9~I}BlU25O}0y! z8X2m?wba$+l@)**{}k`1ban#2&{)5tU?n*ib~q!?PltkOD6n*ak7&iO2J@FP( z!f$N$~pRYiVq{+laebR%GXHFASCk?CgZb`1^6D?>jg;NeR}2 zot=cha#yg2lN$o(V{UEC*?tHDQ~RYjbNo5+&j(m>JJveAF;TPp&OK66Uo!s9-$e)? zKd^xiY!AR!aR&=eLHs0$dk6aaaNwK`d6phTapF9j-K3WK0R};QB`5AV9hc+cOviuY z#NE*tuy+#=hi4ymv^yvM5XAosz;pI=;@u76rvto@fgs)j;v2p3XfF`wYy!3vjdTP1 zO!09x``PG^L>vNfB@h?!v)OA1;yPf%BoWUU@q;ts{$O(@fD0L7eU5Us8|=w!HQm>|4oiFl++YhzxAF#(VJ_>|itm6`g{hm7U*U8*hSz*C!#Uuwh1fmUHyw z^Frysz(55rBtn5B(67h8MEHvN&ygARDRAohy7okB$Mo)0QXH*DfZe9>eo{Ej>MAMu zAC>sOTyO?kGx(4Li%*e$NU$VS$_{9m7seA@Zj8GZXPuyw7v{SaGBYUr7mLlHz==-l zHHa)pVnIt?6`+L;OCjEoT8MASe28~n9LRxvy>7dO?7#|n2(p(PnAUp`2l?FayMQMg zBzbUN+#Sy#!@V|A2)tjwGzuT*haYUyCk8ErRzYi_4bWz=^_()K0d0eRgN&eEkR@aT zIYRp(1XzQQhWwyF=y&KCbQ%hSE<%yebtoQ6g3_RDC=V)v?m_pVDyRl}29cpI=mqo! z8iL+IW6&fF1`~uWfGvTof=R-pVX`n~m?lgQW(3;}+Y57q9e{blFfcsqci0J77%T#I z9d-+r2Frz&!0yAIz#3s4u$M3j>;sI!!^c1fMTo2;Vur>wLHQ3i%%LHS@jX zd&kG(U%0Re&K0x|*`0y_m91r7@Y z3!D>(7040zOQ2DpPk=5cD7Z>cR!~>aQqWD%U+}bGv|y$nNw7)qjo_zw^XEykt{^R*C=TC?%7TGFdD6&rkFLFU7 zRpc*`4v~)w7A)Ab;I{=%3;Y*cSdg}$azW36Pohgi6+}%%-9?F_F`|W{O`@Y>LSoWl z24efgg2bZ4^2Hj&h8GGg+_cbeq1(ccg)s|D7Pc*>5HB$8eA;6 zc++B|#qNtwE>2urxwvl$?~)Bm43{95oLG{y0(qG~f99 z#*B@f(n8Xj(ubt4N>@vdZ<5($zbSN6(WXI}l`^I>K{6RK&o?jFtiRcJbHe79EdpCK zx1hGfZh5wqXRF%QLtA6EHpueGs>^!G#>zIz@q=}Z7`X(wHhB?w1Ni{?4EdJ|%N2Gj z99Jk(7**V;=%jc_v09O(q^9Jpl%&)JUku*`KMpU2e^B14j8KkIZc$mFVyr?`DOP!} zDyxcAjZ*Ur;^yG?!@YFqlYK^++#q)w7fpYD2HH{DyhFZ87J4(Q#~d!fHh-%US3|J83B zesljV<+lNYEe3}TG7UzzD{c4NUa)=KP}`7T_}32J9VR=@?x;6fY-DS6-KfWSy|Jfp zhVeTSHIrbIzjpHPG~0P;XPc>nDZ(_(^xZCvU4&f^%|y(s&90lh+P!tR-|kX#9`jx1 zm(8DBNLyemiYy^ZQ_IVi-FsyA`0gpQ60q816=T)E7ryuRz17xBtX-|st;cM(+l1S6 z*>1MQ+di-pv)gBvW;brX!#=|Pg@d94!J)=+wWF6~v6G;aol}Yv-Py=F()rCkwSA}e zwYtc-9C3NFfAxOv{pGG=u5PaR2lx-zA4oqi;QqmQi5X`dIq+P;y#@BGaCQgASw3$ECIDKMsL@tg6d@Gk@O0%8I_ z1=R4g?toB?t2bBZ43NzV7#9zrQ3H5N?IQLJo#JAZ{R@B)<8> z3R71Yv|MK~3 z_`c`;z6Y)kx+)ziTOL|Htbb(o=y8>CRb{n7HR-YL<9koEpOie+d|FhaUQ<}BR$EY~ zR##B3USHUt(NO$M>se`|PGd!re$)Nt9nDoOrY$vO3vy$tZEJhmzP1#+{A~Ba?x>zMJqf*AdUIZ=zbJpX<7MqD>sMWUh`yoMxYtu}PWCV8j~b91 zNE=ihyhkynJfk{O-wgQ-O%9(PSu}EEbo1!lx4*ruedqA*&3nK1><{NYuKJitQ>8ti z@1ggMp~oi2&wg6<>Gp)?L^Z>nF)$f8B{+4BxrJHEGGjewqrtYgqmU@qRSM+(8?Xgn z>@Mge#Pjvw1uPf$=mNJtoLA1(6z0Rwv%^Yifw2?z)+ z5Ec?%ASMb9F)=RE=O2;j)c57Srr3W$i{`;9xwwi})b^b&KH! zRsu`h*83h?s(k5|;IiE98}9FIe5tbh;Jb}}$KOjEs#-@RAig0!~Mz6jrtNk7=0L zdj^JHO)aW!8yMHz>2T=CpVw{|KW-oVv_;F*(d+1$=(Lh29h8Z!+Pj<%2c5m1Ui!3? z$^gyh0bKF}aSHIw65X3d9z1neNh0SFkot3LLsT-}9ard{?_>h=ixp_---^Y_zlJ0)bva#Ja zrvH)WU$D&XghY6`Y%YTIp~;D}Wy=D#4%@b8rf?WddauK3QjGTXwp>`;hFt{L;;n@9m$g4P-;{C)rTF^n_HMNbmIkmPf^(7v=61 zp>XK}$p(Em5u)u#kK0+V4_B|Vp@Fx(QFi8AkIHEXNVHacs!?=BJ=*(z#h7iuaCSQcvoH z*@5blLG^Zr*RfL5j=KIPcT?8k(BM>v!oJWp>5}K?i@Lg&y%bRxVucv7A+1X~RSqQe zggj#kTJS1uxzSBtghKoV?5M9fp9XU&j)hZULy9si@g5g8bk9T9V1L)=a(omyQjG6@ z3HRulpDT-=1*Y{~}4nb@1tpGnjqhWa+{| z^>?XGvF{E33>rPghV(Af8Vix#j_q8 z(54EOovZ4W0oSMTDidBwQaaa-$Z>bhxRfk`H@E*0)M%ROjb5XesS^%J!r0Ia^N%&v zSoA~=@=eBoq(o};f#RlRvQusLR#`dS@JCA-&4wSBAL}RR>AKc9b%$Q_k3O@IdL3D7 zA29qI-!VBkYQTny|BsC~kB90F`+%oao0Kd?Axn0XB3UP7OR{B;DU>bdXG>$5Q$$6Q zb?jpwyX?EEBs*E6kSS&mzHdL%6*l zi}M8^gJn5*6T!LcVg3514@_-~`}}F;E|wo(3~)GZI6s%1(6T_Tz6uO!xc8sTc4tdw zK=c6e`pfnY4J7wh0R`3*bGLM<6I5KG!O^gz$XH&*fo}mh(SzvTShQLEzB?H+)V~aij6m_>;Y-9A#PlJ;CC9uS}H3?e}Kv$6og1HbUocS|`)G z@0!_hBzglf8vVHriyhjHEK;gQZLyJb?L@%tmo3#R)N~N%k>)0YSb*)iR|*rt_$GG zkTCl@4(uNEa3%H;?yzwQmFXRs#{Rfs`vdEW!>FV&_`GuiC_GGIe**?hL{%#Rg>BS% zjdteoyb=d_``Iu)IDdo0H}WI~P{mTnCZrn^DVChTY0qdz-5XcO$lXox*=U}9sr zDSXT9$d&G}JE9V;erXaHOSr~XdMO~d0?kh{RO`n=7v-6Bu2eVK6EXIM?@~rDjK`Tj zbk7Mo-+knG^;g0Z5BrYf%I|9}4Npt#7uBp-`|k#-pU7`&Og}#|pK`(V){nzdS9fsm z;{K=0rEHJ<&ka^`fx_e#%}vU%;}e$1S-+ebP7 zI}X{?Lsm%hz*pBFpJPihvh#U5VuvUII={dp&zkm4B5|~?-VqEo--GFUG?%es!w6K} zMrl186LD~~WKXkqn&|uSPWL%k2t7NXPrp5`VAdLU^rZBayUa?da|9B>2Kn-fumNmh za0F_EYDY9$L%1JpseTLdvbc%KhH3qNvr6YI`Ox0E$IHHu&H+Q(1W(wTa35@C9fW;L zvFLBU1iQ>C~ez<0R8 z6*R#Xd90m61Vi{VIK%am%+!DAxe;ujZ(HCsZ87jb|7~#J1D-=C`7O?t{RU5+vc7!t z+m~}KFSI%S(O!9oJ%`-v3bN)|o3L*q+u-_`V-%(0dqlsJeU!|J0?MSTquS{sns%E0 zBHJh%UarRgb9(j;o1cy*A+`rFxiDwxw#q9~)ZR+))_x5MlpgPw2QP(pSJ&|$tq8Kd zw`rXbL_tq;Ks4-&V|6SVk5trswj5HkU+i${h!(!)_34M>l(d`6 zFGbrcGLtv=zH+Xrt+tNCWq&L?&ctnB&fw8MLZpX63hP^5-XVOe1L2_*S$gV)czl6=>y?Z03XWqb6$ z?ct|IGGGSmX9@dNu{vSj#C}WxG#|EMIwg53@JkyS!w(K{Uq&Jjwp#nMSC2tmyWbcdB_f)luK-huyUsbVfAl3+>zqH%9N)ojPXnOqcv!E#T<1gZA-n)m_}BQs5!8Kc=vM1nh@pC&IqN zS6M%D=@^OcyLz*oJ02UiLw(KZ4&ahxvc0}`;XjSfEO#oKc^_vw2s?N)p7<^kug>#vq%zfSTJi~{Y1kL%7P)C`Wg+1X7&^0up#Mh8yqkpjTR&Pcf+Ec@Oj38 zVFs)f?C7ejL(5?=-rtKwCZ~WD-GE1Nyq43vT!jGh$m(a)0f7CC6u;3?{g2P;{6(|IX%+(?8Gv5&_+zOsM7kU1*x2#6Gl*N)sRv z>jlSdxyVye%ml64(sYmV)b!0IX+%H#!5iw04E_>@{eqMLkbq0g3XhnlD5T6Qd%B7IgAiNxeF{!2T%6LJ3EH?; z@3sdfh8!(5!TAgH^OPEx^?Z3|+abm7KU0N3slCwiG5mhPKH+58d9wYq=mchzU!EK# zrAJT@zb=ptB>37ei4gN~V%snh6g$BY#>!xo$W$>)7@~D2D{rG@!G^co4U0hOC*^il z)3wM?`EAcg^`bp$yJAGPh{)XWBbZL$1tno>zeSCQkUndxeAObdFz(ueD_ut7e8Gt~ zY>CxKzJN`ouzzz6#4M|@NPR$plYPZx21O%q(Oc`3n%W7WH+}aDZc00yF(xwp z>^gmp1phNUh)KXCmsJ2m=Lom)t$o<^Y+KfEOGmu5`s%m_h2dy1wa1HOV=`{x?vnDo z^QVjzFDXYba?b`oy=k>4v^D27BP12A(R(<{J^N-`z3H|6xD;M~G`Ce#zTxqQj!`m1 z{8&%+HR#=YbZ7;$O5S7Ei?#Ywbl!M8$7t@%BmZkU#fz3*<58APV!Q+;V8{>{E*UVV zf@O%~zm9+7mu=cFf9DII2(Hs~{LC$<42k}rsBRW1clYsUciOIf*4^#W&9~d~`zx;ossDls+hhOD!Fw&=^G48+EneXcyNt2`=vyP7Kh7-{_5B*~iSpA1|%}rm~ zX`j%{&b>b~E$|fW)8ldql~v{PRX4NOO%yBh-Je&8@3mqImp=G_SD0%zxM~Kyf;Ny> z?gf2>s*&H1$B=YZfx0qbKlcp*ANj~k{qZ|6W&i&{=YnSY* zY*r`;ehLHUk!4S$pF*=t`zi6+UQik#`oFXe81mv90a( zwhm^sAugyB_ET77OAfH`QJf+mp|fwKGPC8nz0v7G;t4rgYHmvH`Te`nIPsBx6s;{m zDtL@W0(*pIr$80st{LIm?rtCXjw$l{s#!7U9lZek* zJB1tg1>Y3387q`8Rcb&N^>E(RDmkZCNI}o;xuyg>6LOd<1=xw(sQ8v3h=!qcX8`h* z17@C$zVhoytJ2nalh$=*Q~gWA_es#>yH?9nLK#9a`5poqdROhOlH1dHc~0Jpb+O-R z!qGX#Yf^2UL5`AvtQ%Qm#G^4)F-%R$*^I#p5d)+IYJ9L<`jHyDA_uSL?lR8@M!Tei zDrm%GWnzbo5B#|F*<@*p6MBZcY5#3hvNU65;FL2)6O0>#u@m!SOBxUJ!N=EIKe)cH z>%TwHc%fRe{|KK0`OZm$PuF*+l*kD6g0bLJx^Q2d#NsT#o~ROfN{>^R-q?s-41b%T zW58pmB`qHRQB;KYjtG(-lR@Mus>VnIkcMTPvS7o$%$||dx=8r1z7iE z#uuJ4G*3`RRT_kCw|-wu*lQDQvL+suvO`z$uPGzV9x6tDCbGb>P1)bzo!vK{k}xjt#0O>Hwx^gdj^(sae#$la(koiZB^7xon5`CNe^ zTX$jhGhrgt;emD;>!^Qj7mHSJg>XfB}LZ z@~8v&ZuVo={1E2FMjYcz1VWmqEx`&R75U^nsN4?=I|d{5(v+A;wIm ztz~nf2ZD!5KXQ4_fJI~h}WCoSa*(sXvvWQH7HRMqf-WN_TTGF~-u}1}3N2sF&k4M);p@PZrC{E87X3Drc z=I%DCIm2Z-$A#79E)@q3lfVH$nhTu8Foi=)7~;>unf*2v>O3cRRDHm$Ub25Q9=VNj z`T4%=O>f)Z`3AEDhf+wXq(p)%tD-NP-_5YSaeM%0&+XFUsQKWBJ}rRC`xRXoiZG6< zTgWQrgWBx2m;wYmu8``hZj+hqjue z?;Sp@%66PY{hU}g-YxK8cxB{^nzP`kYTaJS8N`zcVq~{bBC8}-PfT(Of40Mz2Ftr( zBDB)Sp7B~i{!oFKxda&$)RVvqz~=r!VY2~1{(TG&Y&=P+s3?@T#}BSYK`>Ut4n^M!(er_ z!)MW74w=@(gf~aJMU(eR6Q2ZMH-CM+b}yBTWk1BSvtAPJ*?)vBU{9xRZ?Kl^a5QhR ze5Wo$_WqF+XP!OAb=tk-W*5uI`c}5Y1?3Rus9%4HdF=Lw|8#zA;UV;3oHW4*z5!;i zC5M6tQL*%C7g2}PLhbZ^=)zjcm#vaBe!L-Eb#JAdc>#2-$zS?a6^X#Ag7#rvgIa+t zvOHEUw{qRzbKft)+?ky?XY^yq(@UJh{i%2B0<1?c{L2+=qgYIRsF`KbC}^U2xuu~> zRAy$uW(bweqUG=giJ;VEBDHIt+0VY&v3tGC;bQ5O@A3^Runa`#I8}8z7udrkOF*n_ z=sfI5xPd1RQqtGYFJv3Ey|1iWHBm8Xjhb^fU-o%H(OB+?!?ZDf`Lm_tHO2Fm(H(S% z%+euP!`3hs#%lhdY@>GJgRYH`$EUv<*}wZNnqbCxv5Z%I*L&}2-cSHUJQ+q1ii3*V zn_1n79kpOL^CQj6Jpiq6){Cd*kj*H`d|uSAIMBeo8Ah}l%m}2*ZAe3&(N$Z&a%mor`V~Vh~KUC?MP0=oI%6o$M3c}QV zXQ@_U*Z;G#3h5MvDoJ9K-M^X1>Or$Ta~1F-wnr86jrl#BtJ^65Y|Cv_jhqz?n+mX> zr;yU5|C#EBHd_rwaOAz|6r#pz{qNnQ7$p@&ZFQVLOln zAqvYydLBN5)u(E-F+>Vsns6^QssBn|CY11uJnxk|bYXXbugNsE0$^UCFlCU0s8T)v z0Q-k~rnB5GD^%-&aXI2g{N+OWeO$-fUOey+XycsY2=r>+(EUq3s&azb;A(zl8R*7T z*Yt6pw+lSAV|rnukf<Nc(f{Ca)6P_SrXPYiw3c z9;xh z?u4(=T&f;#kp^zPFg`9 zf#Gf>S!O`rp;08bW9foxEl@L5+ZJ+tqm?hla#GK^xn&1o>W$ZK$6)o6g4Eg_8->WQ z_aLp7ADXAq{qQjZ8-87gOL~0BCfgm4I`_Td5l6!qZ=;NvIU{#gSJzy?-1*u{$7^zX z$Xw)yF2@^I9?H9B=Bl`%5$HXH@Vc2^tb4W0xQoMNyz$j>^qtm(p*OR~SL(B_`MJK( z-W@71H)mmd{@7^wQt3hGs&`!Kz|bWE4jA&nr(hMlgq#t-n;DEWzf`|?@85%#<|NBH zIkTs(W6PPJdtM2252*(Tdu8lVs1ej;_Fxev{}GVVSnZ`{3^AZjtd-Q18Euu+fJIH~ ziSG}2{Avtxc4iLIq<8VwtSRC_ZZ*VO?U8nA8#U^LxXJq(<|bSIECkg%Pr`n?fsxt2 zomb7TZ7^x3a91Nl+rN#;JeAna$Q$MHA&!YERr&3>L{7H;maFNSD)pdo%}}03pf9{i|jxm4wTP zSNnfQiA}|y}=%+H!8raXjfKtSzrS za?S`%W8x5D0DKpRe(i-A^E<>DSE&=Qggf6X>L=ntO5HaWrw5Te75H>1AO>IYwB+Qt zq4dwB+)pO${(m_Sf@T#y#PTK7fIO&f^v?TAS6t~gO8Vk$`8pM?j13**O&5W~azPt! zW%zxElzBtVbk}6w!QI!Z>&EN)KPtAV z0=1n83v1uU=cwZz&5lVpYG;3bb2G8U@Ap^oTdUgvTTW~s15ss>TLqAWTVf(Gs+xtN zuOuu@Oq+}@F9y`B!OfQ?K?fw4Xy z5u46`#8XHIqD=U>c>_ib^|j#>QwtX-E<|5ku|6vIqE`OYvzThFysA75l6FdKcEuVdoiS@QGxOfhYdzzICrwM1w;*rcC9X9uP%w`S*$0eC2C`~wI z0jy4=@Cg>D!l>P)BePMB4v($mN=LQtEw$q#C3!v)st7h9W0cZpi2Cm0f7wS*XJx+%2{?#DR2GBT^HcF8Rjm^i$u_zAXVd%ln zgbgc)EVcK8+_kSb$HjRc7Q|oamXtIrinwE?iL6*S1hdf4JdwV&k`4U|7)^2Urc==l z@vDQ@e@a{%9tP$7_@Q?+2sX1K)7ImW4T)ijQkcqZBV=I61jY%YKjJ+d;?eXKIysGK zrWVj*pY6N5q3(WxgSDqXZB*0esbcNh$z#9aek0^*#bbAu1GdywSblHb^ofV}y`vgW zibfsJH!C^mTlmGwYUhI;+R;bPaDn~i+bCmPANtk?2C>(g8JR>a1Qn^u^2G6U_(So} z4SVgx?gu8z=)XOJ+jwS`i};oy!)?^~3Mjmd60MKhM8B$3LlR$F+}1UOWI_K{V+oaof%UwE z*Rauz8vyKg`T900b{i!wrA9LTkFS(zX0I^oGgrLvU>3N?!=G2v0Z+XiJCSqL%KP%s z>aGJARy9Q(U@anJ)Y?Y%EE~ft+cNC`)d~FVr2MZdDRdV?5I{%9n2AK|_$lHR4N>hF zDoh_Lix(NVaM1V7f4{R32bJT7W)XH6A#L8YZ+ROd0P%T>46AB@Du*W`D!4I;l`$lQ zVD(HqJ3Cj+$hXMj9=GQ?FY9wMdsXh~$m=nlCL4-Bd|NqmV*eh)Pb?r28V1)9lsU?_ zA+{BQResboEIT&)ZnAUTWWb10S;0)BBypkaEa#z8FR@GA&-$%nn;*|rIIIQrBRMM~ z6DBFGoWtWR8ri~08lP`1+G2H#q8U?xGTM8eaar9~x-AmEW4Ch`xCy#>gCj^lw;>~i z2-%%18z+bbk+)>=rLZ+v)pzdA`r3P{V$%1i!pxylSN(w2#IMR{qS~yCZj*?>3m_Gn zEu1O3pr z(KQM2*xiuAzR}6qwOM!Xh~Yxj`8=g()g>a$`<}f${3<(wm*52q_hXm>V$dh36c|Ly zdCXx4g!{@oe8@)%Z;va<&$4!+{-kpc=lFdpLx|HFjC5bISvCP~zwmjXi=e8NUL zS5}y}O;4;WbYZGZk=@*IWr=TrM{W<$_6&9KimwqD^@Udq-!Geg>u>6UovY~mu>UYS zVIB74K*YuCqZ)NsYt}X@fh=Ug(#xuFZJ@=zWwqr-s^{g|sb&ZBMUtD?oEHi!wjg#xf zJ_E3Ekbq^Wq$sak1j{UA5Fs$-?wxVvSbb++=kVQ(cUi^)=GT2}AM6l!u6pe#W{pUs z41k#ftwBRj3iO7(jiPZR-)R|xVrV#-DF@SIsV5HITgL?fH!1w5<@?_b4u*HFY4r`i z2&+rypoPoiA(Prc@R1RX)D!SYMeWK#;S*A;Zv>!w1%SmJuuquUy$n;xNYSPTV81wm zw_ytoQHWG_EG8ac$05FlHpzILz2e+5Szo1Wz{7vfUX2ad@2MSMd6RRbSFU`<;ZdlW zN{Ly~_tk77vcvwm8?T13@`_bQ?2vtB;USsEjReoL4sxuq`S@E7Zu^e^cGAz5#1#2o&_)@zr2aOsdRyr zvd(RkYidQ&JRF8Yc3L*7GL~VWgsPq>D$zO#_=400AO zPb1!C8?)s5I|z?&;i(!>kNDo zbQzjO0C46_sON*YshkPZ_@YS1C|xTHcX8^5DYc3=#$GL1A& zNk^ozi{+*>jsyz?1S^}0$Mtnw!#chD&~?2YF3tTCc{K7Q`5dW%2NzAbD334_7^WNr zQBf{ZWvAV=l63ZVdzz=b`0CgspKj3aPVh9CTSy2XT*vdXF@{nNvp%DFEzJ$j z#vNwmo!c5pO0;6%7@Z3F)y3gfI`{bB@~RG`awhW)G>1V?oDv;7K>=)JeZksw4ryEdigCpTf&n~fr z+FHFL{Z)>C7R48vW`*qCK2#8{2F`7_IXd{?GH+CdUsAZ{} z&kAlzb8Xg4ET|cssHE-cYEgTMa9Dwe#r(CJr{GiYAlS*s;DaXF`eh&|z3e^?gNCX# ze>ePeTCjg~H!-8t3%^^TA$s4fE^7mK>VvOWt=Q`jB*}g*JjgYO69Y*!I631(HED;P$#bF1~RELF00GRYy zE7p3lM&n(1B}dWSP+LHhh_OK1i?-(j19qQ3rKP6aku#F1srr?LxF%(>ek=>kV#2+@ zun}>YOBAp~$X;ufL?7NV>mqQSL{|IgXuU8Z3xK`DU@tHXDB|Cl4ivH8R)dm#S|Ls0 zqn`r>17EyV_!|CNwBebc*Kxz&BieRG3j$~pWi{M>?rpjRwYdP9US1Hj

*VP0acaitbKgt*Pw&_> zL)6=1vBu|xU+{KehgE14q>PHj2*)tRG0gMZD228}+Gku}w&w>leG=+sax{mOc{!*? zzWMmk!ESaM!`*l!msVhXRu#>CfVgZ5&0`oG85CuT7=Q-OX{-vRaiwW6Z=WFtdF8)W zZcj2d(g^=7`W#PAbWfJCo2sz5P$Y%!MT2jL&v{G;xgf7C&H+K%Sz@qnt9CQg*+II@ z+WN`r@5ntih!K3Gws8frP!zBhGi6Zu^nPMm1LwoRo*5 zGT`h-r*6Qb*yCQMb%=vkQ`&r*$duuM>T z-fhk0`HN#7ljrUGi$MbIVX24D&-CO>QQiujn$5Y#nJ@uA%<6{!ae#d%?GX}*)euK_ zbTHp#$T}0_b)an4lP$Kpxy668|Dn&r!l3x4r*-)Dyp#EZR6Yq03#VY%F;0dVPVDO~ zL3o{Fz0!WfQ-*47X#@M&)=Av9!_=$I;EwMJt*;jVIOy42b?r7uRF37_ik)b9` zDgy)Z#_Z`HGyM6>Y|R;gVLBt@Fxd+@lj!xW+v`knz-bxiJNL}qIc>ZI{Z-^uGtdnt z)Pian^PqG-?D2gf%F&BeW^P{G=G8lG@Zpq5cCMajAtrCWrt18Bv~EAZ91+g2aQcQ# z6HZ7^?!_w^<+I&VoSlsidgV2=giA@fKDUZ|wKKEOCE9f>hbDc)_HVh`8iFX_2!or$ zjH=1dUeE(nzUAu>c6b7-jE~;2CcyZWJ=*0EmVR+Q6)NQ5^W~~T0>vk4(jxdbR!`=G z&so&bNs2N~f-SN5GQR@xs|I8Fm9I*|F?PVbbk|eH*1_V9P(|H`yRSP}O7h}^m^O^4 zcn}TRFyjU}aWS7jgR2QY1odxH>{^^f>l!#bPmlwi0Tt2zicI?eIm*RAdC9@QC9Y~z zgrf&k0x3h&(=pQ~xWNwBuybA^0WHKuq2vtrmb}R9K-%#pjiXZ1t#UCd==YQiWpN~? ziooV1=p_F(>V%vb^&4(5ZtD?O9|n>;hV+~?>w;D!8MtJK$XGd#1%@PGOItg9kRIg& z8lBI+;jf`+H{Gk=Q%#Z)@yp$Rz}Dt%V3(XO9OgxT5`YG~OwcyQz!jV-7-##zEjy}a zgS#@bs8GLi!PSgg)^X8{SA&{woBOfsZ!I7e_M^al8GTrG%er`Rk3;J&q#&o6cG;F0X9r{iBr;N%>^D}cuPA8w@^zSijP#F z#IBR3d)X_{rUC5N*t5oR`~*5&fnL=`;0sv|d9@dXq;dCCh+Guc*yqyRq38;Cduqz6 zaXbG&u;!15@=q#ce8GY>%ii&FcioGD(uJU{+-RpZ1lU|$1&O7wFToggBHURFzASLKjL9jD zs|qNy6EGA5%;)LHFkM}5{LMTsDT$9Z)wgFL#nQjfV6RCqP7WB-goC^q2CP@Ev9#LO zu3eZ{x4jyFq03~~%QI0;Wxv=gf7s6e_6^ZP_MKoG^5Y)+aW*c%R23v%p3I$ak{5v8 zzl;aUmZdG9Du~`|WBdA%vfw{;(>yP_CxrH>S9@E&cyBJ#(?$ht9%giHz{NfBX47SzrKO%Yt0=?1m3qiVzC zT80`c7H7Tq)(3rqdCJed3T+M+br-$))AM;j93sSIvG)3{Z=^(WqCjsBa2U2EiG6R|d_k9sRuPuA{{B*ORUzC;m^yh1G zrYf(6v#9JRm{eq+5lLCtIrO0LSqgof_Bh_^H$@EZ^zJgrcHB(Ur+#PMefd4p#dFxq zaOF!Q(v>otT4T`P1SJ^Cg`!3QJ=hjh=)RQT{x&$O!De#2N?O+;rWb!@E-%;Xm2eFL z&g(ckbc(Bm26r11iv0 zVg|xih*6m6(8nGXq+<5u4~;lR>pgMNEbVDi8K!q$^KY8+H%s4`^Q{{t9L`?T-@77^ zO1Zzjjkd{NaTo`f5*VhU#;3!H>NgOM2KvR3tWhPkH5X=aVs7*<&5AcKb>42D;Pu3r zOgk3W!ehPBI-CUPa%Ds=+G7^W%1+x8o0j#aus!RMsJH_2$qunN|1r<3IsVa zR@+_d$AG`$jBwj)kdo`pZOpksB9MzsZ;M@~9P9GF<+CKl@z3a$M{%F*CwWHsS!9@y zoaY2KPn%1c5(rWkeKXiUNXGP@PVY-#)YkNY?Yu`QN zM`ot468YBmA%6&R80hm_TU%)FKBB$%F%RZ6!2C8F*2LbkPGdszPBB5gyxm!WhHbAI zBQD;Wt8c9p4JR6&-CY`Wc$%bArFdDVd*r0H7@v7?$J4(lABFx|%=mo(Rs(UE`W`L% zF&>=H)5udm-*iLxJ`wGczTJoT;=2V(i+7k?8R((BqmP-6V%g8Jf8zkMxU?W9$5!H8 z-OB&f`HKqnn0T?yO|vj;6R4R<9p3!xhn1%~d`}6c4>Q6&68e3QK9czQA@6z5o7X*o zSxx6Yn0Q^aCOQAjh`vkzO}8argHPT3d9~l-Lhb*AA2toeoY3xW5Ae9pfF`A%!^Ph13gq> zRSrlibB6rJ5$jstSWKKc_t7w*Up?GTlczQFbXMyL!Mq=3VlRQKYxnL22Nu>xM%G=p zaKWq_ciU~JoUbm=m7`*@JbSFHP^b&L_@qcFGM|J_`P#%Hj_neBoe=77noDrQ>kMP9 zo)u-4`yN@aR-RF=n6t25SC(~>-@Ky#u?+l-Y@Js~<7JbedA+wCp6`F$VUy?*v^iPa z`G0nbxfo5+0#1h_Vftb+hBi#k>P}}svs!mr zSX-GJE8k+!1J*D3E#3*;@|<$aus_}@{OHYYg4kzLz2x&gyxv(0h%`bEh||Rrf8s}m|a7{q}f;%k?qNvwWV1c=*9VZh+75|6 zF&U%dw-!}I+7*pf2u0xZFu@#ZcuyMCxT%C`Tw2nb8>9s9n#kvK`3HVJ#`Z*FiijY4(s>*(@YkPcC7hGpjKx<3_HC-#r=(38FEo&klU!>sc>VSJp78eD;Lo*ljCkBH!c_(;pui18C-vy2!s18A7}j zHMlW#l?1-U_6btX*Nr;qdI4_anH$`$iKeyVl@&{6j(^G(OC_Bj8WvwH=-AEj2Z!0) zDA<36UB!0jfdg5S$u=zahDdst%fQ4r&kI+I6Fd&QxJ10?-G{rfaPyt+HT{?-nS#5* zIq+kM&H=wQrSrqwli2m_uEW1EOetVZ6eC6XnT+I?ZEMm&xyX|0A}Z`r5Wy>tDkis> z)iy{*l$JmULqr6ma>m1yi5?p6k2?o?SKK{4UMJ7h-*_QCvM0l$?zvL9f7#3>kj%)Vzxi9foAo9SANaJto|| z0q%l*4cG$4`vv5U2j5Kgtyz?aM2^!B254~~UMo2}-Mli!%hk1l9Kajmz>l#7VPCIa zEQyl5&JE@dv}Cp*$3E#Slv{|gAh0gHr)kp?x!oHg@;XF7E za`Ffnvb!EbA9wBkOx34D4=j1h2sN+`Fx-n_ z@>3M84~Cfz24PZK#$$|>hLxu?+#`1Vve-DLTqX3SE!uE-B3gRwM+5^W4;FPPo66xM zOxQvh5{H7Lh(VmRLx;aSJso6ez7l;5S!^Gfcs#M+UN|$fy&Uf_&~jH`=po~v>Rsiq zb9CJ<&9(*ek%w#QLODNFx-7W*5Ut3bU<+gMx~z56mXObw-WFE@eNqs5J;7lr?iwG! zYTD6H<#lU9&P)$r+39?vh$|nGfkX86?V0QgN<4ce;*rReZPd?FkLV72WFy!iWMt-} zzeJl5Ag!QL4}J%4=ejF`goEC)`X54@H3yf=FO>h3vm8htY-z0`>s= z^1dTKU=w$xM8Z^M;je0jd*Q&=WeKxgdR}xd&VmvBHY48^e=$scE1p<_g(%c_St?ym z(1`%^5atZR%OW)@y0kb=5X0*1n`%rSyILPH)M=?(?quubafH9R#gkKAC(LZ{6t8FG z6-eBi1glP;R)l}d|C8<+S{)kjpKYQ$%Rf^JESSmv* z#gHtVbf2W?Nr|6zWe>HI8)9#R`Cuf&R0XuuznxPzrhFoQKevFi>*{=R{F9SO;aURD zRS%IR`Kfhnqbh3~CE2!ls!5ovlTWaI9} z5{_$0>&J_3P-~~2N9HZCKAM(!TnH}MxKZI-*dMg>syD-Z>P!On!p!k^OKnk*ZS?z0 z+YTQHp|~^Ukmdr7Z6Js1p~8`+di99j`u6>T`mN^xirvyxQ6w%ui+=UzuA)sRCY8iq zqAJ2p-<&+z9MC*J(y7Pv1u2 zz_luf$l~8(D(bZ&T`w?TI)#Pp!LDZ_Dgvt*Lf+&6!UrY5I*5TYWN~Q#QxT&OO3DR> zR0+-t=~(FN4k(9GzjgE*!TD)uUiY0|K~itkV9Xd97*oR=+5 zIM~!9n)xC2g0*>!;ORg8AR@46^+3qptPL47?OA&d|AV*ZyA$qxG)&%6j8~+;$gnG| zy<*lP6xAha8u5D&Hw>g~(3bNTHnveO+WMU#1(@s`ScuhG{Iw@>%$;tbwz8#o`%=d! zREta`1(}FB0bAeRQOhtE9;guI-en7Be{dx|;1O|}J}xOgTXTM8{G^%lmL7zN-=tjR z#INfO`){PwhB5dW82v@YAh2Yp(O#}bD93YOd}^pswvzT(TG!K9jIrE`xAH-Ukm-ZP zRd*}r?JCeBfeF$s|D13)K#jUwYzWgPXzBxObT)WpCaO(zC}sHAQrEo)OmwWMD*ut|Cv|wv zDyCwu^hv4;FmxD9t^%05F-&ERJk|-p{tf=LRjGN2xP7apa{WgRnqNBjNik=|B6u!w zaC_oAGRr@4=auXYXl{vfFY1CC(`V~L_KBNKNffrX9=#&4 zC-rLzULMz=Gh|=i3pDqy(35-b;2`I>&nWQOiyP=8`~M8`^JvN%Alh;1B%m0G>+?Q8 zsMW~05XCxEMpu=?Q#yL>6Fsk&`$6>4XMV0y+L)Bo5R>@ zKSl|@sjUQ?j_bvh6gkDX-_E9UFR1vScV0FwD=xhLgVWb$n8g0IazQ1T!sNy%iiT^S zqNclz{+6|tzY>%2u`qMThcX;bgiE7Scxr(bBGG0BO(NAN8f5Sk==fK14O)P8+T;4y zh{2>prDeB1Bkv+7EP7I*!rbS#@zcq%bhq6a}zn!$o?~nnhha zL_kD(FHsa}0wT>&vTcZfNbe;g-2f4!x2PDAE;R~BUXdgeA)3%OY~r_g&$;)EbMC!k zd_UmF9y`h2Yt1#+oX>pbvjE*OI|V5jh7?s|3I}S$QdCgV*eAs$cU07^2Bi0HEle05 zRkL3x^Z3K%XTutVJ3zHC1>QrSL9zg8b|%1UNp9`-%ZCMThOX;=bhL9cm3tOc&gH6n z@b!oEw>!TPr+g7w93u|-+1keiZXa%e6L%lIKS!WHRBoL$c@rpH{%a7W47^+ zPPiZ-fz9>=e-*5cA-UxRSPM4ha)P!d=FC?k9!k(ZkJsy&^T&Noo@#m-xNY>j!AvU8&BjEFn^BIKj$x1E zM{%rCQ*M8`l!F1IK!|0-Is3I7*8IO*Z2CX15#Li;ZvdQ#hVwVe!S#$p(w|&VtUL~$ z;2DJ^rZonm;JmyssQ11nC#*H8Yb7|uCMocMSEq?@sqbG^OtG3WTUynm4ZE^?M9+b@`tcui}jaHh9TK637SeJA()T z$w3C$k~AW{K%+QSelf;a(R8r+hqtSL$qdWE)BWOx+iay)yhe{;$*P_0mdiA&y~6!d zCVhP!nC||)@IJOtm$O@Ay>iUe>Y?jvGmxk01} z>K}JrwRbWM;*5s9Z6Dp=u&}7XI$Sqdk+`tPtd&8etrYEs*S(r`GvX#TIU@w2NC-TSC?;_2h?Pngnby z$KQcPVgKaZL$QytVpuyVeXV-PJN89;qte+*5=9atZ(otN&t6|RZ~uowmVa*Ull}5r z6epwG86JB>N^!Ed#D6^>;)k10}vUH-q%{~NgS(VFcJ!i#zzc_Cq zXC|7wbU7Z+&J<8cD{^1PnIj1f#xetfnpVE2;l$SL-J(|p^@JXfrQ?9xW~f557D(J* zt`CvGq$9|JYD=JK-h*FDL5rb(=dmR>e)u_Mf`|=3dK*{piKtam;HZDsSM(hL_LR8E zPOhUsC(-J^w7%1zj@9O>RotqKHHux zvlIuvE4P++ECTS)`l#%nGX2S zY7&wI48zZCj}GCzYx&|;mP<4BHI#s6FQYWwxn1)(;aDHEribh&>9C)#^m%j3kMZm@ zEcZ;ULy0Y#bpNuJ5b7IhKmjf^$3KlV;AOB-P3*=frc7-`>D%@z6kge76~8a$T!ggw z*7{xZ<(`$0I*OePfbd+f0f5bGU`Q*9;HX1*_5GuYq6C~=T-5k9d*So*e=OxHQ>0Hl ze!Dj=B8~4I=Q)Y74H*k%VBn9Wj&O`w7Nw|uu|BUjIhP3&i7#S=dABuntfSw zkzMV1X9rfe=2aDaLv~mtt_zSIJT+(|4X@qp4;K0rl0)f&1bBWP)Ox?=ie>(wxt1v@ zKd-q-HafiSPy%VEk^rSOhOgC_%}oQn?*(W)CX?bE$iC`qI~0&E&^D(Lu)^vn?o>IT z5=-k_7_BV0{vhW3tkjjva3|m|Lct#}(sSiSaQg)|DN743#hG$!#yn3?jDwP&M$U2> z#d`OT*ko&dwX-pn0Xuc>b&5M>JswCeRqQ9io!?K)0ZkE*_h^LVrJDSfX!75G!RT#= zhpT+&4y*A>ce3^$-A(ptw(7861fV<2Kh}6y4xF+!=eoVz9f#l-1A+GGqdoh-R|q_)Ij@`JwQTJ;HRUMtzWBm1=c|Uw`>tf$YHQAnyHu2W8Ar%QR}Rup@@kns*nQJpR_p+Q zTca}AhZeCNgN#hU$l4%#e-rcVH#(aQv^+W*WRMi_Hh3p{PfRAB9XVk&^h7Mum91v;Q;0A9Mt07EJRO}DcZ zIV|<^I*u?H0f)a_UpKdT@cjj-ZqoYa$uuaE#NY#5zZ0N92hHsI_ih5IjmnOJ)(@iD ziKNBHa2i0fPFRVvC6j^3O`xK}+*(i;m^5Ig=R^+XL&Z)GwnjQxaGbJ{a^;!JeJARE zy~ul3a{Yqf;pi59PPli40vK=g{DQB0F2&K}&dk~0dc9jhHPMnZdH(jvfnP$?!879} zX=>A-q=IBvm>y>L9I7)%t)r1cR1O-IsHD$Jb~TcRn&gx`e=S#bI=qteulDH<+AB~` z7~y$jtdkI3>t{)@1En*inK}*7s1qX}s^8+wpJ|=%D>Pi)g;eRD0e!QNqXBD96V0ID zahJmmy-43gvkmJxItS`Fp?Z(+{$G%$hC-888l}In5#-i|eb_5;|GTD7P zg>;g9Y-mUSxjl@WExBwHvj&U1>SZIJ9&d~Zlxpt0W4A|3ki}%n_MwwGo`c9797@;h zlY`@~hO|NMOLn3gCB?A~`pS29PX9<=bxD+8hCq0W(1JmB&yWTX7>E{^4qz@vo<}BG zs>EHjWJWYr;cnYw8zxfGFLG~|~0XhR(NgzA@Ka8#7FBdnUyn;p~b?^)z!5QGDV>YJz zY&MOMA2e;91GTAtueyP&x?8|1z>GV1P%}~NbYZK(>J44P{$EJj4TZP@PeU!6ATx)F z*=0^_cW_|6-J=lCNpnYDk-7@$q*)WVGY)QZQsdl9Vxw63*6f2VI+0UMNsrNHXSWU1 zJ5PQS<&k@K@3(!fl94sP*K7IbCD)NLIp=j4lmXP|VHNo3jU~k{MUj~aOnst~5WUf2 zPM~mLq&j!a$G^Vf2lfxD>ZNz|w@@qnA8fKMH@&6WX0!7>gYV4S-Ndl+vJ0Y@-N#+4 zGE9!@wyp!hxCs!SGF8QSAf1mXFH`^mErmYgeMJp5v4d5cclWT@MCHA)H|lS-@PGT+ z{0I_a2lnCYPUIguv0ww6u8zHDkMNqLEslujU$=`IYaqY52VKiGt9{zn>1$Lf-8U<@l$zNm}R1Pz5yuqr> zRDTOUM}J71x}20@Zgx^o@k95mF`j{sVBB-PMwuyL4O4%)L^uX>!oZGNAG&X2-)Uq%-f-aoqlj(jr-zv>rN$#T`1C)(k)_EBAHvssA-yJJb zIS#}DzuY!kP4@JNa#&x>o8~a-(E9Ps?ev4c0l+A0WqsBtbQI?klmHxN_5hkGA3fGu z14jakA3qhB-<7GqL94XsA+hJ+%a6H{DJJdy@Qq>8JA8VTdUNfM>8lEtMlMc@oJc%r z2kBe-iJ9(I;vu{Ctd&O#MUzgW8NYBhV~oI1c=aorNv`Q&J|7pNcVFBtHFTm>Hn^}5=EZdVf%q!@WW>{5q}vQl2w4KnI|y+|Cv-5}ur} zTFnFN5+GwbYb<&eZ60EiSc>{_m%I;3*C=YkTZav5LNA@5w+#N|dUo|Zf1=1GS?g=! z_ZvU+|3oE*$y*f6m%MZDUWpJHk^?>r{5|26O?l*a!z8~+EioV>w>hd`gx-D$V zn!ry{Wni~Xfh)Tg@iH^2_Bv_Dwp<+h`Y!bk*RCrv`|ZE+$qWXl6Ii=gosg0{2yik; z>`V=b2)GOQBq4pXFM+ZROW|imHhgD%<(^dv5(uVG70Yui1eoXl`YK{9@W4ClLrvfwK)7r{n^L^PhD)9I z_!hEt@I0qSsW#>ZDSJ+wxe_%i_+Z>B{lLl}3&~$co$cHG4Wh-XxZE6?p#xykUO$Bk%cSXt?AkzR(!sb}? zfS^f*NcRCjC5hjeQ~!4QFW27zfI}h(0QnuxZi@HpU?Tbf-444X7yok6%8?pi2kW;- zZ8(>TTjv!vF7*nKkcoUyiucOKKIIt~x){3*6drD`O+QmOOc~(?) zxNZM3_vTGa>`=8|!@YecRCl5>(YO%ss`_u5>$xK5nq zV0pKyK~k*oLPcIseY3~ap(wkSAg z-be>uzQb7GxTZ7ipKX+%V?2?=KfBU+P>3&6-lA>|lN7pf>}4HCs2<>J5Y~&6q?NUy z1oj;tG>o4BwZVcuiN9P|t*Af7;y7+Asic{+9i%~Cclh2v(NI6Sxk{Eghco7+6X*cl zu4`an5QZA4?Zp$D(@CjBdhE`w?%GxZLwVPy!>1BQtX>?Ko7={vB60nc^46EprLPh< z0xP8aW=7HpfEb4>2J&AuaNHENE8OC;Tok{=o;hlNOjD!g~f8~UhAim6Zf-{@9Cd9)cG#< zg&a(}4>67cl@DiUws=LxlnMN~t3>N9-F^7_8>DORsX@gVQ?$XU2lBVRZacX#c+_HM z#WtPf{EPT2P}9g;*SI;->`im*?dsOr1lOmw<5o^$7p;pI#H9%3`A*25R-Ed(T^{b`N&W3A~^+E7|W#9y3o#_f{XJ6i^udTaRQ#=-LI(z^S_^G*g#r?p$mHR|4O%mI? z%GJq0TS5R#{nNpGBy$kb%yME`M-j+00<+&tK5@+6gO3;>K5SX6V0`fqOKaAvoo6xJ zh$93Fu5=vypOdW%{piAT+?fAsZi82YK-7}4LBp?IW%(|Gphc*D9=J*li?ShsRaEqAx!ttG9Sw15Jdd2vPZ#bGj z-^%ubHO6)1GvUF&y1~||KEnV=!e?U)E0OvcXvz9>tlFpGB|^Dx@5trbEf0PEsf|A7 zJ^5&Ic)Zi90k>o+r6TsnJddrvar7~}_7R?)!?|C4V@U`s76C1bn+-oeTn>iv2`Ph` zf==LsvzePEH?4j~59(s|!wRje(lO`v-lfMYTp~^eXE`=pt2nmpsETj)5;D zZGXA$jFI3ot9i&XXtV9HGnfd`tC60+Tp#(J$awv~ga7&xU*JpB*hzIrK6UlQOnU7> zlbQ9d%dHMyc0v8Mc$TY@*R_Y^L#y0ubwVKV+kABG4z)jI%ySewm+fAQVrNj<&okpP zwR#9x(Jqfye}=SvylVbJwUe3nelKyTeT6ZWCNFW`L#K1i!eS3rc za@}jBZ2YLS1|T>{?(pjZq@2H8D|{m)=7H!z(tlo~{>L@w|8))XQxX1z*@(sT0@oKk zhgo?_Hfn=pnmgd|0#N@2#acxPq#5aEqv%0V45qmAOd~0?xnI6JZE{1NSjbn^73k}} z@VfT&-HmTtZ-i`Iju#nJR#@uVpa1C^OvAnd!b0` zN657ru&N;B&7)6lEG7+=8K`|#Rxs52%4}!nR?wm*;O;#CU}d+T^AUJYGqB%tLg02~ zqph z6b2~{{^=~gGZyIpvW~YOf}Zw$R=#Ipjmc04_Q~|^bvg4d>*GJsM+&x|di^Bfs`)u> zHN*`pw=S^16W|}ZgBzXyPPzx_51{rbay}6m&;~h>xZdVc&D^{s`F~#eUw75uH}9gX z0v=B?a0|KwcmkTAF9UgxAa8Lh)Vs~Nz!PF;09n>XVhYkufcv`i4Hl~bd^T5QsE{8L z8}4(&AvIO~)@@(o5<8wRKC*!Gj~Cup51k}`_p`~txp2&JjyUg`WepR$Ns7D{wQwdW ziy*Iqe7ng=ouu0x&sDpqEw6>@9nze8-WS+~9eL)m*BcYR!L=h4?QYrKO7 zh8r`+GNaV=(qAI`>ilBVS&$(rRV}6zC_Q03_h>p6uB29HMbzUQ3yysI_#;k6Ad?bi ztFxJvD86O!r=aD4FamuzaLhys?sH^Q0D;h}J2<33-*j>D`^CwLp6j6_@u$W_@y6~Q zeM#}yTA6(Cluf{j_RxeR4UmB>b{nW7U7)CJ2=-+%Qx}hGk%{gj(h_m6xt?wr63It~*)K|`h+UAn9Tr&0r2)~Az;L502|jYyqdmG0 ze{{1T7m6LR^7R4WU?>w^TXmuJp?|A^9@n*Df?hNfq!$eZXX&k?D_ z@sMj3H+N3)@4fKyE3+zo{qv@Ow_g;fqa7;VAk@r4Lq0OpVGt$ClV+Kl9H6@!p`5O} z1PFX*HdsZ3IvW2*+Y)nKo@~EHLI4N`;x#GK0*VNI^O5-pyy!VEstp22wDmzPf4N-i zd4DS+Q;#?vuo34TXotz_gEIMXRyj4?${m1egO||__dQ|e==AM8)n1KqWG{-Sw8P-5 zn#_DO>4u&xlggt2mP-BP>!V}3f!k{-r`!E}@5Ty!JR%hkaO1MIxzwhNjFJr*$^e@L z&^00xd-nJ!y0Q3IMQs|VZVxx-1E@ugTC?PQd#my5w^Eu|85g!VqsvhF zITztGIhL!UjpytaC2OFtRer(>HU#6vmFo*Si>5H_ILgGu#xL_)?R2P%u%Xav93mU} zX6IKg|IdTvWeEnfFHg=3&ijFy{aZaS@)e6pdj6>XLDV3*V(G8*LXOb8S z0Vds~uFnAH2rK3&KI-clt)b)ulpD(@T~4Aj)Hq8XhcaZY+3O#x(&H3zt@M85!MhVz)M zsuBzFO;+h@5KI64eD@uR_%zAuTuvR+O1dy`so74IiOgvrhdq${^=+KwLNW4l zX9ZZc$j`Vb>r%0&4l!{e)B6IY3+?aU5sAL}d%pSESP(fX3<7fGOxDdF9WPLas-yF# zhmlLXbb{H|brrfr({-k{Gt1T?PxG{U%67+jiIvUlj~0sgBkwH3Rcih11s+Tkz%a5e zSEMan*X_`{puMPXkP%eGFM(BsdG~y`P=I0-v6* zDU=s-;+tFRas(!2pJdjt9SvSQJw@6VadNSwLQwYy9$w#(6PrUqe!68ZXHyTjR!0$JuuBx&2 z{pGq&V9RoT0Q3)=ZO;7dJH^j=Z$r1)_+Y^S5f$k@DIT-F1CxPS*<-TaJz0*D;9Riy z%VqoThd?GY;l>;01)kl%5a$G6W3u-3IO`)(?BJ=5i8wf>SbL$eVrFK$n#{4Qm4I`G zX(W_BjyHdB*utsA3itb9(lMG=_+pjd_*CY*{<2F&h0KC3mBA)EnUQK` zC?b?Pe-_zcXFcIQLBDvGi~-_Exyu^jb)f$a`9UWF+NU$N?fi)NuyPDj?%Soram;|% zE!&gUsRyt1+^^n2VMOav(BMtN!GhXLIzw9E!LMY1VEX{_2!PQtNGEWLSD*CzW=p^tSQ^=#89*b8&^Nf-glUWvsYX z>TX`4W-!?_xDkhcJtnAqV_!SXU%8iDl7H3TM6k(7jaoGr1&A%(TF+7EnCNpof_Ovk2=fRhQm9G*eo&=J2EW2Koa>wVd zZ)zW;PlNzv3=$Z#L?mvYBCNk?0%wHtnwSj{`t+pno_>klto52JdALsYQ_vZ)s!!FE zJkf`+-d4`MAlS2GeH|e&j?JqS+IIjTP~9V#c~bLP#i@ojfzzJ*#tS{J;V(=As%bmk7u+gBbdlu_PARN9~KcE*0*ClVd5rKx_fN|bPUF1Dp@Ti0ZT zv@7PPrXt7FB$YIU0nN%uKWw1qo88Mg_Mm40N<)K|g(k$!^)FYCA?TurK^lOs4zr^E z_YX}HqalW7^QSsevW{Z|;1pz2`q5}&0Ca9EVY_xCrCNskc~T@h#3nELfQ*JKt+-3Ks$xeV(6$r^yKl~R`( z2tsR1jwKw_*Nx89z0i`_W=>H#U^Eyiud?t`R?hmvaclj5-qrrIbb0R^D%=4CYle}B zNG%Cj2Jh}MyyV*zr-ri);a_WwI9M8vEtm_ro~gbd(puS)dq4YA`Ru)yVmo(D{?kdA zCv}oE0c5&Kk`unl{#q3(tJtlmFy=hr5hWMB%nasoc*b{2>gV9S&q4;8_jt&deVdv> zjx$g)(CcinX8{icsb2vDRik^@Hx*ufb$qiqH= z1E2nKl<5)7TO56d=G$R%Xm71F_eEj+{k8vXkYF?HG59Zoln!3)?E|x( zKqJEZoBGkWRtcm}K^SBRFAsetb>TsmjWZUmgOk^|X~A^i%FV>{^P=y$*+wWWP&$Al z?eQDCV%tq$^$6e>H-*`3`IIi*kEQ9K1r1t%dBQ z&}mOqg_}+Fb%~>Bdv&vRo*!ZkK1P+Y0+CT55b70&`pA&9k6<@76~j*8N2ObgZIg9R znyagz?0=~u&~AU^Yp3laS+NxA_+-590;rpF!yVuK*=;6lw?y<=q=KV|EFf=E1${MP zPlMcX6P^zV&gA_hfqSkZgLiAJtz}O=$cupbdXXM*bcCO5k9|nbbO(LIORxWDzTfSi zL5+`Pz8vway`BG|p1}43rx|s*aUwv|4Yg)6QzEF=5UV?5%%x(rdyKtW8Z8Wbr|eMrD-NnicF^1ZPD=Zcf}+tO{vXPLjn9@;g}v!kxM6KVRCSe>@rd+=k7pr zkH?gk&FOX-`|*dFk6Wg!n>KUyT;?ikSTF>!^ejUI8&-}xq82YIhE%+1e0eiIy z3|3S`Q4V0VFUTyQv#IObFsYLGSC@rJ95-}XTq&tiMv#E%6EQ>)oNclBxat z_YtZ4hCa`mg1%0YlU&M$(+L10nN{yYkT}l0gD%mxO}eyq@Lkz@v0<>obf{$e1CzM^ z$X7)=I2M|YnF6`O&O$JH;#1t>&Kx(GpRK0*_6wvIR`@FYMV+h4BQkGAvFrBUN3=_( zua5N-me=;4wo(U!n1>8($b;vOgV!sl@L3G&a*kvgMt=}ZK6K%RKUSw`?)G+Tg4SEf zT`A|!$lR0su|r>O?=ncjd}Td#u7WS{LsD21m8iVewpb(M@QdFX+vK}X{rKU_%0F{g zS-E8Fj@2KajifdO4oZp3ybaRka}dl;^c23t?n_pzUS{`3Ej7iIt9A47uPiS=J^oLw z4cDIA_9d_UrY=Tptrx*qbHaea_bceLk+BI&?K1^by7ch@>jvfKBVLx_JGC!ThI1FQ zO7jeb$Au3+fSP4YJ{;T;2B>%y*fG`$=Pt94nhdbJXq+DllQNIgDch5Bfa;NMy}bX~ ze4soGvZ)3S*HjPB1uMf+F0PkcR?FO=KR8#5Nk@a_*Ezq<5h)x7M%S>UY;fg4u80DJg*l#J;GdTGFY#*=?Kqo+UQ zI-QCz&^%$8Uyt*T+wjA8VGy}RCgSF24Qv?H1^>&4&qa^@53>eWCV>n%qy{s)NbwYV zZm?F4Hz&as+<28xA%(#n4aM|3OAa*EzO@Sj>LUHWa;!!ukL%N}kGo@C;xVsD?6?Vi zbSeoPs^WYpP&SxGl%1)~%+VK^*l-f~HCR&ii~h9W{v;VL1@e3s;WW-&exMtTN`|sH zSBH=`Sc%R&Gn;!GdCTUcmJD9-dOaTQVyn$BdtxFv5oH#BDbTZuAp4Sxv7f=Pe>35a z2OO?#oPyMD4Bjt2YfGI8DX`gnzSR6te?ulDa^tISvc zI(bDnYg?>97i;bU#rFm{hC%HQl@s6CVtdRVZDVDitG(O#RLWEdIKvP|B(u%I9)A_y zdM+y5#BvCEA9_cpv;6tt(eq4B9YuWGl-EL$2vGCj?)0Srh#PPkrWBgwu2x~^&Li)J zuctg^?x>BC$b1>_)zD2`doDWH_wSdrqhPM3SV?d`$4{~^*!0q%6ZYcz6-s%3NW{}W z@(iDxBm1I`t?xHK*NOuBwl=n9l7iM~ek@nxFeW66dGjD=XtYD6yi1BWAMSDpr`yic zm9=v6>$jSzkjM5PD8D}dExKcr5rbQrmMWk@BU(i~Rv{o^r z$w5T}MdynU>X}FqjS3g_KMus6^i8*g-1ComwzXQF@JE;9cTn*Z&K9P!*(_N$rWc)w z!?du_=~KS)->^qb*^l%CQdu3Yp@G_IDUY{UMGr+4#7*wcPwiB?6&(I;4Rr2y`NF#a zZj<<-MHc;UM|X7a3<`AnAfR*w=s2K(`!Ue^vH$;nF$}D&bLHr%atiWuB||^CnF{xp z!;fS)asTy zdq9a-^*iLnjN$W-uGaIj>2-ES9@T#VEKV(eK%U#5dyikWHH1}DoJwcuPyh4u`@F!H z!Y?F#u@=3igTm1pjjYvRnKgyT)*WHqj6R7u3_Cg3o5e_5irrA*t!n*wzwA zF)aU8-z$&Il%8zR%RNx#JS&hx{%G)z+N;9C3-*ALM5r!ihTb!5tVtvVOlAu;x(<(EoOZ19t)F zH=siGZ*#c42DlXz&O774)B=n|6u4aUuPoj$J-DzCE6D~*DxfFVkIHgmt#m`_&YrBI zv|zqEMtT?w(X&cVo$jR6nHHvHboT7HQ@)GIyS481t1|2KqSEy){}NlfzT(mE6YstK z3D-tO9gj2;#;?YAXzCxy`JO73^O<+mGWWsclb9ZkAq%4KjPj=uwXip@vTr{QZeF`a z@D>`{L2~`@xnL)6$&LD7pu|3cXFq`0&s0O2yzX+Yc*NbuWIPR>zcxA0;AEBy69 zC?HOCFkCdp^&pQRo-kFayXrt{o0$0cukh#D2(+PSFS_ z3WYFOz7}snuZf`s8Y|+o()N5xnV|n@vKiyk#sC#GU$|w81lv+mpmffKy+tPKu$N{3 zd`$e-9;}y`nfA4|$8T=1zzGLH`aP+a0Tc{Var!mQ5~GyfyPm>UL$X$`p(W32XDvZ& zI@~O)JJ5P6{cY-x4ZUqELFC6kYY#-K-M(F*_7yHR<~ayO{pC8%oTLE<-8Hv|f6knL z{m6GU^3r|7jaU2Be#r{-1N>-|4=dpIn6h@X$_N{YbgZm`%3C%t&@M78(z>K$ zJA(dGN(ODAz+A;a3qZ^!r*==yO;9w=wjzRnx7W}V4?AKKd4wQ_-M=?GEp5u8DI(37 zL_vu9whY=l_uwq}R=63N0cGp7UTKD6w=W3(m8RvP{0%ol)~BeFZ=U9AGrPmj@=yfk z*LEDZ`0j&e4X==&0WKMLC0`%bgiQoX^+85_r15m#c8}kdhilZxF@r!7xq@tes-AG( zf_L+H=OCG3zfHs&oGEd5vlw|Z5n#L~y_cd@QyxtpIa4C`#N@O`*?b*1d`kQ2YJpPU z=WuY2As^I%>gcGZ_TW3#wRL<&u|YJb&U8UaTf zC&q!WXQF7dxWX?FdSa02wKkqe0I;$ba?W-`IhbrHcQft8+A63BdHx7%ppBKBOx)+P z@TR4Gcrw*4Q{~a9pnLprp+|3JRflCv{1-@lVA$Ke-Pj~M*5!s`oG)nFS%K0dW>-2B zu_4DcE)7?txTpo6F<46AJ7~5$SzNcp@#)VP{#ZQNse1izdM|W>Svs3;$z+OfaT_+j zV{t(1OM>-N!|GQ>=O*wAO#xynxcZV16tIp>s)$h7WLJ?BT(t6e;x51H@==a6j5ltq z92sEI64lEShs`nemxK56@%dx!SS=M5mVEQj2;>k-#>IuxX*Aa9>($!;8obw%bvuWiFS#q_vK1 zE>aqIlmO{vQ|)W)krrr&iUbd@MEh>U@%zc~=k84tkh?sAk%K#_z0PJ_{d}?+uZHJ& zccX5Lo{TI0mGE`4FCDUQwqy%uFcjsm!9AnmAI7IS9)t2K-U@-&JC|4D3;IvX*3SOq zpM!pm-{eRQi8dQlSmlOc#w`3}yQs~yex#ixq8WRdHyB50a9BHdG(X0M%Uq{@OI^AaQF}Nw^DmeA@EthZdG>TA@niN^^Xijd zCa-{k2vEZNNn-3u1Y1LZVh^`6n4U|s=q$cB)4?2vk!tyyMvt#qRW;mdd}?HNi!OR< z{6lt3;gzHMJd^J&0dR*Fp&@Hv5X0H32?e9!UcJty=t&ca%Y;tDiaYs`eSe7M_`5!V z=)sIDi)4}C+{k266RZXUai{#tB=+M&I1g-7$CtKznA+VapO!kH8B;=8Q9B~OL@OI- zJll0!DdLR(WDr@*V#fD3iWX_ICgD>6P4{)8R#M1(OmOJ|l><(c2T7~~&IBON6*6$7 z15l)WInw;29Nq=UI^hkjj5rlo!O(@x$?|tAKI>7{e)8ERj8U9vY*xCf=jYw7vbvWi zi%MzKY#mdi1L^JKz+3itYYWprs`a3Z>D&RO1IJl$#Z#omUgGFRd0(=aF>y~ zK=xklO9ho7G(%def%bjdc{KZhPC)aORhT`tUY&;->{XcRRy%J64 zVg9#EC~(Pxx~L7NjI_Q5lZ{&89)g&>fRQmB3Vs1v-bzx37EmRiHUSh?P%z+K^jd%r zn-_k+I5QfzG5J*1khHe}XLm9Z>i&4?B*K7 z-%@8YMca?H*1nBabasf6&)^+Cf|}s-k>S7h&T_JP83oo^H@7?xkM7-pbHTY_FHaf~ z6|kf(RgI_VgC`9WFs8NKJSF@55+!$>*m~ae}Zq!N&mZqGuCku>i0&dIcP3_C+3k`|YW#1wO!07LC=Bf3)&oaoh5FW*ccOnh_a z4qm$N7Bh#ka5}RvYbN)+V{fX}c1=GYU2y0aW=f5=Gw)v{b)9C?6(?=)QOEJ=S0*{H z&#fF8vvcyYaT{sI7C)8E;MKNqH7{w8F5x;Dz2`d&AnS7g?b_Ti(Y#2N*Qa`<9E9SD z&4hhn!eOl8Clf9;(Wi%&ZZ~8w4AMAf?LUQA>Vl_73c)-jgTLCroy9;%JIo{4`m9uv zDT|rRWJ)yFMS+5<@xV!e)3J5RHdg|uJY#clAhLv)>u7E?emv-z$su6Jy7B44cUe6Y z%3^<*e0kY{FV>d1w%hM@&kG)!zPh-73zb(q2kQHXR<+nX_A;`!(*FA~6=wG}VbgwI zllFE~8#Us!YCezCCDY_EKV5blCtL`&U8B-5c#*8n9^eO^i?yARPqCN@X z>MtW_R6$1rMI^e$KDKd^_yN-wCk=9*2s}GEs5Okm%tYEjat_`* zvo7V6<=_v5GXHWAD)P$x!1<28#k@Fu@Qo)ub&P?K#Z($DFk? z6ZPXE=~8=O@QF8ZPJJhD)YM$wJ9Hv!H(8mbeQ{Gh9a#_M+#|6TKFyFxG&On(GdX~j z$dc)RAd70`4d{Hc%mmei?v2sr5YQk43mE8$&U`E^nQ^RT5=vLrqvd^;g(*a=-yKk)V zKGV^*-Ucut-|~i)!T>>4$H2Sbtgx^T94kl!J8|Y#HG5N1UPHK?r*Lmb^L%@u=mXqP zWy8F^OBo1F+gQY2YKpTdWfAbXAhahb!`W*%?0tzN6crfyjd^?H==P$qE6I~R-04WMNg2_zd`q^MH5n5deT2CcP1^ZgnSwW#jPd{3XXS=d$*Z!*q+&hDd zfDX$|mq?&$1xGiFa_(VRxH0w^OBV!usfn+IbnOQ-edWiM_D#>$-g5j(5?!|J5g-5! z4BKF`2=WK~)^1K9;4=VNrdNMvM8_Q)5GN0JTC*Z^y%h+Ds#ECuytQ5rPkL88u#x2t4YN^eId7N9=$E;6G;B=BH6F`wFqx!g5?2ln) zO;@Pp*-sw4-<$OBTzZqb{e{Y@my<*@ZzES$=VTJPg z^y?8y{JrTvYMu`|?YKX*^Wu!bYw3UlD;%7-3@-p-45p69gcWJ1EO47I>_v}L17~5T z-IOmM1e=}x*mwJxwHQO*wxlJBHoqXZWp3qAp%9^AuV90vaBX=s~ zd@{6t61R%hN9Bh=y>I{dTFJc^7_sZD#VF^Z7a04@5(nQi95d{gBWt0hDCrMbB;7Ib z0@s%=xz~FHQ|_2!UVQ};sa9l%3Lskdl|YDY+!R#;xe1`;eUFSHVW~-39_vpIYI=tQ{r&x1#rV&3@Y-aU%8QuGE@wzLnZ8Ed zf%ygjMXCe90Kv&WSty$JN1RQ+@`i|HR?pgK2M|4bF|JvFv%h72-!j5w%d|&ys>Rw( z&b==WAQ}c|1_pup6Qt%p&yMlpZbH4fI6qD()N9GP3{$!2+O;AmvKUHH!q0yM386M zgGfWcrEvw7@=ILrUi+N7BJ3k|;hT_6D(5nGUpJIQVy8IYVBKy22RNX5_VsDw%)i0- z?p7BH4J+(~F`keAn!A=9A4u)ZkA8SQ|7G!crRyYcTG-}+h}A*7 zg&xtQY$Ch{POTUNV1l@>aw(&>KDa>c8 z`Rmz7gNE=z5<5-9on@1bJ_Q=27890$m}^^aW>6%(tJ-HDItA&|n4wf&y|Cqxk(@C~ z{_&B^4Bqgvyi6)7uIEAv(IJgT_+G?LUo&(}Y z2G|~+4=5K>AVs{HvlrIakN@?ctqIwF;gipD94?J^T3f{WaZ>o`;k8Dr~Kx77Ke1B>H~i(w^|t~kk&o>?qf4VDm)!efL(ay^cSZ;A^|Rljze?iZ1Oe)DmM#02OWOy0{{DXJ$0Rb9 z{hNw3;4?7rTwve&I~VXPioi8J1#wIl0vuSMLj^RkjrS$dn*qVA5oKM=|#K9SK=r?5Z4*?YrG&RO*eVCN}#5WAiLj~&H`L)bQ? z)$YkAbq?6Y``Wq0a5v!bH@lvJyHrACJZel#Sm zJo*PiXa%OOsZN0gHh!u04ba z<($CjLAcl$X=lq_jYi8Z9{eNdLG)%x-f#09W26qO zz0+EAC^{(h0WEGC+(DPu`06mNWA_F0(uXS=SGkIe+-j(<9W1y={3COLM>KsunbRw~ z5H1x4hE~1qKYK2vjb}Iy;WQA$dmpIO1=5~)b(}f8V%tv=hZu)IT8lnC#)h1SGP|sY zmZ+z(RTJSS#?Z{0YbncwQ@c*s_K1JD8@0{*GfNMvlk}w~t_uSLA_O2uM5Ypw*-8dNQua4cKukla>A~>?7IB#N7{zzY^`m@yxU1mWs2T`+B#k4Zz20W z)4CUv17^CN(xw`y0#&K7O}`&k+|GcMaG_wR4LrS;Vr|%GdxO_@B`@T3=SHR=mC}P{ zlTQ*a?h%jHkC*(=&9mTVImmJ_HR(aa9q2yaUhQ+;6DF*uIYrj^(UojhD7vprX4rXTnLd)_khX)MAv=jor z9f|T8(z3PtIJBJb{n|{7ROAGTJrBX{2{0O#N96|2@zt@Z^vMgaC&(IpiG7CG^sm)& zJ?QGWsTWkLpEI-92@R{Pn+aY8x9a4>t0Ro4Th>fGKc>m4%M zc)N&4D^=i$WKQAuP4CJ0t$O<6Q;|PzMnB@XLUJIdOaOy(0W5vg+1cfz4xYGcLt_15 zFs)EI&V1l|^5@Fb$>Xlqj=tmQWCeUJ`&_rAIpwQ~^DxI6bD|nTu{0miF{z~N`aKaGU(hVVXBew%}$P6wU@CuxmMh8f8ynz zlZK*qg@lZ)9=-Yh`uYy2rnYTu?1d{RN8|`nMTAfyNN-V)E}$SFE%aU@O^iTtEQmDe zy+ovUQJPYsB2of`jsk*03Qdp%#EqMrf8l-S-S@^D;~&Gt5XP=+uQk_PbAI#trk!g^ zwNQ`7c|h9@tXTkwd7cr~rz$;5&!CnrEb#T9auqpHrVG!#wr-)dUT+p~C)z&N_1~GX^C;~CbiggQa8+966fj?16AB7*Cl0Mh4(HNI(-S-!dfph2_$#6sD zX-s6eARfYCpjhzwhahRFl7h`J1W2`Lh94m-`~hv*&7rzO<{p0tpTMQ$_1;DXvgCdF zt=6b(7MukQf~47AJVeHTq?j<4IPlucYipbJ7%5Mb`xL;FE%U75(WX;UB(G?5Ww_f)2m#$HN zldQXJ|CVpl%<^FwWk3Z4_I7X>kbyocKMQ-VTTBDVanuhLytE$cNOmYo{$>IAiwdZ{ zS0PH{zsHRb*PnITlbMA;Wjcc~@i*Jyeqhjf)&FzezYi+>{~h$tmB2wHCa{YxIsdtm zl*Kgh|K9HZ9`w&+{&PDUIt2Xpx_$_Fk`LWM55}KDtm1P)P0g9v^cMVVR%z%oencmx zsNIQ)q~X(o{j~ddw_rMfn)fKF_`OHlYmDf}ZVNp({nmE_D<-{V#hEL6l#9Q!18Z;v zG{E_9?1T1!YvD||)82|PryPg&hUqD!B&_DlhaPQ(_e{GT|IKP=@#6Gksey){JOMGc zIbXwR&_&1ze9t|gtph#5tfEey+*W3ZCuc|%H2IC&b=1G;OM3b(Hrv65u&B8GmZB3W z7aPQ%%@{aAei$g^_t+bQYU$VymC#G*8-+x^h~4U+xNJQ7VtLVCwfmy^!vN_%mk}Gz z>)8)h%7i)FKa4Y;9)LE7q5oFUi>Q8UY3+6-M)|S~- zj)f|rbw&c#5rEFGjlr3Qk_^uQxf>zqypgdWNfY>-_KKZ|4{1^ieOeX6keWBW+GoT+ z@EezkVg)*PumUO>F~96yrk^HWP$w^W;WJ)eCGtpqaCowMWsSSy`R$edw+NL9Zrq>v zfn%Wm)c|lTg^))_Gam+dw_jmIeqkP>i4}+Rd?*Z(uNJkGQ~6~ail|E+#jO|+MG(7Q zASbVscUcTK_az_-7%}Di)*PfWfGONnd~D(Ni~$KWP)iH)qhFVCSgaAtGL*{7JO_Vd z7#AM#b#$0n&bABBtG;t@oqWuw<8!-s6D9Y?eRv$qtb?F+gJ2+#z`H@n5X`g+66`gV zpDAm`R@joZPN}3;x6y}%l~zu%C#QD7|3=I+x9seJwr5~1 zGew)Z|6aDrX8&!J>>}o@)vq17AA9&pHonBg8kZYhx1#7KRf0~_0gV^5%h0_3P)%!# zSLM`{7?CC@R21t(B`Yra8e@XHUGEH^UhH`&d&|{HIQ>E~;#uiMijLQ;6a*SPnXz~N-t~noV%i+4yBr*9joQnrUfjz_Ew`9)d(GQ^(I?b(P+o`tW7}CgKsuZS zBOImNcMHqO3efJhdR-KELcI--ryjd_-TkAPni1uZ^(C>iceM~3p=L3LfI9E4_Q_R?+V7iPAac7!a@feAtS!cPbƸaxxnmWev0W0+!6#ED*Wk!e2niev=|Dpz*@$#L`J6DvIKsUg@M+hryOj zPKj00RL=gSgk=^}xnH?heOa}wOw|M{79rm2^&=4dRStumXJdmlFBYQF@6xK$$G6*Z zNDi+1Mo~*&tlkOkVo|g3>0lkpa7c7Wea$}w>I4+?EtyKRZ~3X~bzw&AVPR8#|LW~5 zG!RR>*5~;_Mx??v**W`;=F18&Lv~jNPaF>7(;r(4X0Ntk9AqBKAR^l^HCTMF%?;n9 z=%s)wt0^W2j>MSwuk@yf6y8-XP1#%nu05E~qKUSju~V$TN@j)~bH9^~SH1LYl#G|> zT+|k?ed<94k}^tvq!Qq3*23(g9CFighDnEg2@$g4{yN zrX3c4P?q;f<`~(!zoG`oU%R5jLAn;|Yw(vI0q@Y)mSk zX9fB99(kY(m?+?zu$0dGay!``>+S9&W%jOD!1VGtc9Hc11-JMSDiSxMmhEdPT`)Db z-F|q3lLY#zFG7*dSf!1N4XUqWsX6whefx})w6zX=%zS%0LQX=%VmjR7VALt82fYO5 zJ;5YPKWsJ5U#sGt)vH&nx+EJ|%2@mq)%j`Cca#umTmV=Bh;->- zPLlLR5{emzVkY6xm~LVpk~GC`yic!fY)_Ql*wd!VC#?i+6`q_=6n9=>r9NaBAg1?5 zfgwF01hscCep&qn)LdsqtSAcjGIWWRKKP8JtMkJXt(7(QJ>|CcpVTw$n_`*pYve9R^|ZX{!bL8Lb2S1XVuKGl5xWDPaW*pkaH9XaC?{l)p@QH%H^ zqPZ55Q83D`~-v+=+`4 z>VcPPd57{pcAmKxx_|9bq7m8&Jm>B_DTEUo1_>#6X7+`y(yIj;8E2_ph0ZU#^PS>t8Ul7Y>FIvGiM^J&FOwiN9GhZXaL1bQSI-`)ye&1somio#V_{$^Nv8h6U|Q* z)~;HX?*+jwLVmtykPpcBxPYth)%1uTocZB*XS$psc+XQh{JGBIJ<*$9m%_qt)^vYU z0t217DX98cV>~5*mLRn7xOH;Ak9RD-F5d7FRcYMr>ieS;aY9I~J)O7jrbhw87z8eQ z8kz#E(E#r3rr;wkLa%@>_P$R0P*rkaR8k#X9f!E=Se(tn_N^^IryRHv%0$q%AUFC; z$QY7^P{21;QGq@e+3Ez)@>~Wc?+I8jhNn5rBorQCPTuaE4!=HiINK7v)7!XhMH&X3 z2e`UfSp0w@%K>U({?U68(q_t-DjaiQkb5+5JTMww@!K%wGRo{zN|B$?%$_E3diLMB ze^b^@+ytod*oV(Tz!)iKK|E!+EfOkg=I*QU@F`~<=e8G&>Q<{q1CHLjD(H}N`qO3K z-;{v@N`MPm1fk8lr2soZ{{t=zJy<{^Yxqo>yo4SmHgD7mA{IgemXwr2pcoRSHf4b7;ovn8RYKwi)SwWeI#IUuZ?@8z1)Kiy%o6+a2KQYzN}H7= zucYh0VsKP1d>Hjeu6$FjgC77a@bv(T8BHPCCVdM^EAK1+ZTLuUy!m&(R27>$yTI&hTIF)ftZt&!t zY|9w^>7#N(2D5W~bC39Ff+S9WqmqV@eWwW7U?tl|r$AQ4pSoFY%+02B>H4(`@fybb zTvT`2w`a@0h&q@VCUvC%maP}oLxRG#{>4|f-xCs$f)>EZW)XE%vF zyuor3j&_N>q2{`dWr*^gaZ#dnWY~^!K`RK@*IE4**W6tbRxF-L#t`{{zz&N3d0&4M zFPY9m;Yd%ymfTL>>Ib=i?+>3y>ySIojc1DQ58Xdj6jPtNnws)8$(8-fEY!&f0sb{0 zYSVTd&Z61!%_`umbwWgKh)JtGF~1B5qb};{$D}nR_MMW`+R#;~J$UY2vh*)ope1I5 z9{>?{rDY{SW;{M8!$yWE41#V%JW*4&ww>Qja}h6qBOT)dJpmF zzL?9UG$J5f`$`7l$}~cfdLsBERXH=59QCd|iQWO{k2pE$khB6Zqz z`|Z5y!MJiyBG{|ZLuv2c{P;1lfbW>iWAypxEyOEG;Ot)nenUFRUJiIE>d}^}h zApsrzZS}1CH8=f9kt4-|cg>zkc7SOq5=@Z7xnTFP0-9KXWLpM0hn^sbb$&;_4wD(3 zn{ig_s5JE<4|eqQVK-|{4Mnq;WhW8)lHVMxsC6Ah(RtuDw<23?El_bI+ayqGhzifp z3&)<#knmIu6O|jrM`t~oI5jG@#r^UR=v~8dIJj2i;0JbtsXDVb3?5{Qki-kY;&2A^ z6*9(dt1&ry*Oer_+7l`%u{;StwV(9kXmOym`=aRGwrz5f>1@Dr{gF zGyd0$Tnl#ApN!cZqixKNh&HGjb1wU3tl4D}|LpmT`@LfRx_o!W6{Mfna8XZty8%v$ep5{5^5K@Xw0RJKwvY+M3t zkSFh>ln`11nC|KT^p@}VEa&+kQ=qdcK^t{0|G*i-)?A@)jIxYwAeDM($eY8 zBcZ>z!>r&rRy1`M$gy^rBC+#imgC4qd=T^CSU3a~(*2z0_?$G0YtBy;De-WnI!x;c z#b1`+b9>p&A`)i>-3MVEW@#VD5ZP2MQrCI_*aK-wS?lG;+;no`DDwBP4#^AOajY$7 zKWTKw`Y}hEsN}wQ$*;S<8j^ewEGrkGI!K3V3cVL)9?B(x6;;xI)5|aX+tc%fUYv{i zUmiJVt?FKiAzGoWIJhowb_kaX>xUQa7HBB%u1XIedrE>LG&LYhVYC;mJK$WhMXI(tcftZONm6O6rNlz;B60is1a=CC4G(o%N z2lObB!5Ee0An)uvCdY|sP@)s3xPAdC`j^mC$CeF0&Syl)&BI{)bAS>>dD0rxgXxcL z!`R;|8C8hgG5g`C`^A1b(sfv#Y&~QB{<2n?xz_aTSCe}VQAB1cfZBq*8J?NZ?hgo} zpL$tMdL+5D>xXASex;Su_UP$X8}sSkgv*R@n6k|Aq|S;<^>Xp| zqph}L#5EPJWtPl7R85ZtUkFS=WmzbPyMx~p0JHaG`8I@EZhuIqp1SfT%}QwM^Ly_j zdT4{|J=eFT7BqhWDk~}m)Uv}=etHIR!O?G(D1_Ijm%rKi3Yw=HeG0DUd>;?Y-yOfv z)hnUS`l){y?D4&M{1;0Q>I_@cnDk0yxq~@xfSqS1Fld*#DL7NF;2qS}vBERxb40?` zM=D4m$2|yi{CIOgCd(H;a0MI6AeezYw#2TcZb1+TQV{X3htdFV1KjNLaMDIArmnwi z`nc1`w=~1QhB;E7t(PAKo9b?04MF~3m#85(jp>u}?Ew;bK$GNw0T@b0YSi5&>n+K>ZftvmxH3m<2xp zr(nh_aLOnI2pj5a-7VUW&X>H-nec5LCp=i`9o=Su%=RtrHXpylY z=uEXsOr6m({@_fRap}u9#v@leHdf6`zv;?4|1i1M^THAuT=h$T`p@ z7;1XF!U}KSAW`|t_-12buism?*5hyg#7sFKkhQt%KnJ~T3ezxja^mVP7YG96MioF@ zhb-=MK_;i#ekssaJ47QUCc@-|a{c{Du}cMIHLl7l1U(umqFFcvm9Btt*2vNNIN!E8Hl4DL>q+OFGwK*5-MkF5>iWij))7wSGTS zYrq-#o)I!Z^`Nl`uK4$a)O=T26QmSXm&>$dnoW3A+FE;Wx9Yy0JF8lC7T|lBjH_2~ z_A4;-Xl_}h<967rAH;M@v`d(dc?&Q>q_jM&E^6g z=PtHTWdf?^y3hug%BBPKMHrUkR@Es8%&{&Y+<880alQ2y~|W zM{OGa^{32Q8kKTD0jWe1+}8Lj?M{zh@%y>DV$D-&QRPC_ch25*ayD9hDsHge$^Kt> zl@Iu!nG@a2G z^tRHl)G~dj9=%Jou3`+VZz#=!QLi*Pk&4&TNB?3>^q*poN(|2=b=&k=#tO4-H?9^F z-4+6j4}CfXw%&qC4tbM5l4u}_eLfTTT||^TDWqY9s2-kWuVR@A zc|~a7NXIGUC6HjLg8Rpb7ya>cVH90xAf*&4G#sK0;bl-V!AD%%UIHG^TUaznVaP(s zDs+$5OT;?+_iFFG+~F&;S%Ls@$IQuhV`@f1C+LrX%n4DbwK9CtvJHA5c#3IKE9WON zHCb8oLtp2rTb%J_v-gZ>tu}2CW*}gIUnnKwpzg_XhDRnSI@_~=6OJZ*b;j^rQL?X$ z^zpq0@&(JIo1G34AQcfwNZX}w%`G0mi-srAOch9gwe~Xyry?!@I%SUztCs6sr^qhy z24AJr2ilcN{Hfw`e6+AuSjVAqo{_+4g%oEQ>%jX|Z$T#cIc@mCr7}5w=k80hmE9Yx zMeMOWo1~UvXE~eWKmh!%oWj^dJE)Mc?EzUx1$Ya5qvNr5DCXlMHPCZxRr`47ylUb|^EU43M0Smo7wg+*bHYo~4_V_w zGkBmou#6Ic@Jln^;BD@YyTT-9#L)H_J^forwzP=EGy`LEx4M5maJo*meEOtQyZE_n z17L#_7|mF@dB!@ZO>5yvs+rsH4=)wS@lEGwy)*izE>h}{Zn00BPmEy7B*?FLP(3mL ze!PLRhf^IJ)t`hjfyh^ynGTeRr1>#=)9@P70mKkU4?*>XKXvp4v?8)lVtkxzPzv*&7-6Csi+P;8$pJU<2I14Hniig0vE9=oX zr50xM^Fv~RKpbbkO{$G%6n)XKG`k*4@z&eS@d(Y|Jb^opaQF5okQ=!D$g_%(Yw!nVQ?B!bP@}e4zn_mE#x-7C(9I3Ef3vK^@F#Ab z6}Zd_$N{S`daQMs`ppWIQKthimQC(4zcQBZvXIwy#OuNGrjfq7wE72gLg%Xfb=tcG zt$s@^I{f->Ih+b`L{yNVsg)7LOA4`012iDI?Ki+z*Q8U z0>Zd+z(EAH9_=M8mj}H9F3)-KXF2oXlo07j(U8xR5h`tWPCdnk*R4&JNVIr3WH>Mx zwSa@0M0-TCWb#MRWb(YH1x=Y2iOIJrzrV<7v21(=RLj%`H_N6@nh)0i-&p}Kg1XS! ztq#!=NJtf&HqN*AD4OMiyS{vCIRxALux%%A*rqeEwD3rTp5$}fTQt|UU@xATnw}pT zqj74Ug_C1hj?|o$?R(usEv`wcdR+MfZy1!^FZg8U8H{EVjAiEqNip!Hc0$1qGN{L$ z_9Uh`WY5?$L&lr#ZYN~H{Q)#JX>*&gO^N%_G9gEH(0&pp&)g|P-_t!1p|}70v~G%R zZ^~Q5qZn7m-ZvXU_kHXvY+cr33Jwxnk$3}k<%5L_5mw7{3Y%>lb~{M3+~&a9crW|) z8`t)S%yOG&-|?GE4vRKB>~x?wTlZ+pA+wt+fJ_L(WHCXZ8Ck$KN)H}|Cm6G-HT6bg zqv~}uPxT(R;$>CcQZ~m7)o|AHq;qH~<#oqV;nWc~hrQTJR^WX=ZP1>80ai)yZ#FKk z=jV%o_ASZQ-pb}e!Y9>C*B2h_O%?ANrptvI5zvngR**T6d1ZhcHl2*b*{b~MH&}gZT`tuIUgkC($p!)>1!q;a2?o+t^}wLP=i#P@!!QDpsxOJw_AaPdd}+k zruvQC(R!=wqR0F?dL2hFjsYfbw@>VHu4_JywjrbfSapATH9EYXznzDGs zoDX#luywzGQm+==dE1_K8XUDdIVu5kpJzt|3@y)2HEB&P@aG@n*=lM8NmiJ>I6$%e znCgo@?shP~_g2}lBPBlzJ&^F%S>SBvS_}MQ07!1EfO(;hdC~rh7dU+I>cOIeYp<5u zWbN~lsB6#<9>CA{+=VwPtsW)8P|Gv7txuFzIZ7Dnzl(PbUKiM~d=tSA0uxL+VRiB% zJz>BUx!`?RuB*Un>u$Mxfv^p=e*4Bm;4yX1t5(sQR>>&(2!Z)z#zEGU_6^gI#=Ww+ zJ3jKcv*eS4#ITxpatN?0>+7)sB0!&Sixp7I;A2!j&FOYXW!VlQtGzZO9Iej{JO%rv zjAer5{8|ckQf5YJh&-ME)A6=y68 vX6YbPSAREw@V(P!MHAn}Z5F$Ne*us)1Bj~MJcql;>i<6ZKi2{u;nV*Es2}}P literal 0 HcmV?d00001 From 1577a5e32e7e8267412733c96de21c844b1656b9 Mon Sep 17 00:00:00 2001 From: Sebastin Santy Date: Sun, 23 Jul 2017 07:41:49 +0530 Subject: [PATCH 41/86] [MRG] FIX Examples use int / int without __future__.division (#9426) --- examples/applications/plot_tomography_l1_reconstruction.py | 2 +- examples/linear_model/plot_sparse_logistic_regression_mnist.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/applications/plot_tomography_l1_reconstruction.py b/examples/applications/plot_tomography_l1_reconstruction.py index a8d45938fef30..dc0a1265e27bd 100644 --- a/examples/applications/plot_tomography_l1_reconstruction.py +++ b/examples/applications/plot_tomography_l1_reconstruction.py @@ -101,7 +101,7 @@ def generate_synthetic_data(): rs = np.random.RandomState(0) n_pts = 36 x, y = np.ogrid[0:l, 0:l] - mask_outer = (x - l / 2) ** 2 + (y - l / 2) ** 2 < (l / 2) ** 2 + mask_outer = (x - l / 2.) ** 2 + (y - l / 2.) ** 2 < (l / 2.) ** 2 mask = np.zeros((l, l)) points = l * rs.rand(2, n_pts) mask[(points[0]).astype(np.int), (points[1]).astype(np.int)] = 1 diff --git a/examples/linear_model/plot_sparse_logistic_regression_mnist.py b/examples/linear_model/plot_sparse_logistic_regression_mnist.py index 2b889d25013d3..5610f471b5d05 100644 --- a/examples/linear_model/plot_sparse_logistic_regression_mnist.py +++ b/examples/linear_model/plot_sparse_logistic_regression_mnist.py @@ -52,7 +52,7 @@ X_test = scaler.transform(X_test) # Turn up tolerance for faster convergence -clf = LogisticRegression(C=50 / train_samples, +clf = LogisticRegression(C=50. / train_samples, multi_class='multinomial', penalty='l1', solver='saga', tol=0.1) clf.fit(X_train, y_train) From 2f8a0dac471f19e33bc3ebb8525ac9c791e972f4 Mon Sep 17 00:00:00 2001 From: Alexandre Gramfort Date: Sun, 23 Jul 2017 16:40:40 +0200 Subject: [PATCH 42/86] update grants funding info for CDS, Telecom + Inria (#9436) --- doc/about.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/about.rst b/doc/about.rst index 7be981836a535..9f15362dadd6d 100644 --- a/doc/about.rst +++ b/doc/about.rst @@ -67,7 +67,7 @@ Funding `INRIA `_ actively supports this project. It has provided funding for Fabian Pedregosa (2010-2012), Jaques Grobler -(2012-2013) and Olivier Grisel (2013-2015) to work on this project +(2012-2013) and Olivier Grisel (2013-2017) to work on this project full-time. It also hosts coding sprints and other events. .. image:: images/inria-logo.jpg @@ -77,7 +77,7 @@ full-time. It also hosts coding sprints and other events. `Paris-Saclay Center for Data Science `_ funded one year for a developer to work on the project full-time -(2014-2015). +(2014-2015) and 50% of the time of Guillaume Lemaitre (2016-2017). .. image:: images/cds-logo.png :width: 200pt @@ -94,9 +94,9 @@ Environment also funds several students to work on the project part-time. :target: http://cds.nyu.edu/mooresloan/ -`Télécom Paristech `_ funds Manoj Kumar (2014), -Tom Dupré la Tour (2015), Raghav RV (2015-2016) and Thierry Guillemot (2016) to -work on scikit-learn. +`Télécom Paristech `_ funded Manoj Kumar (2014), +Tom Dupré la Tour (2015), Raghav RV (2015-2017), Thierry Guillemot (2016-2017) +and Albert Thomas (2017) to work on scikit-learn. .. image:: themes/scikit-learn/static/img/telecom.png :width: 100pt From bcd442a75191bddb8dce33371a5ce8fc3965ed28 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Mon, 24 Jul 2017 20:17:39 +1000 Subject: [PATCH 43/86] [MRG] DOC Dedent what's new lists (#9349) --- doc/whats_new.rst | 6457 +++++++++++++++++++++++---------------------- 1 file changed, 3231 insertions(+), 3226 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index cd78a7e48c002..8d7728ccbcd39 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -49,21 +49,21 @@ parameters, may produce different models from the previous version. This often occurs due to changes in the modelling logic (bug fixes or enhancements), or in random sampling procedures. - - :class:`cluster.KMeans` with sparse X and initial centroids given (bug fix) - - :class:`cross_decomposition.PLSRegression` - with ``scale=True`` (bug fix) - - :class:`ensemble.GradientBoostingClassifier` and - :class:`ensemble.GradientBoostingRegressor` where ``min_impurity_split`` is used (bug fix) - - gradient boosting ``loss='quantile'`` (bug fix) - - :class:`ensemble.IsolationForest` (bug fix) - - :class:`feature_selection.SelectFdr` (bug fix) - - :class:`linear_model.RANSACRegressor` (bug fix) - - :class:`linear_model.LassoLars` (bug fix) - - :class:`linear_model.LassoLarsIC` (bug fix) - - :class:`manifold.TSNE` (bug fix) - - :class:`semi_supervised.LabelSpreading` (bug fix) - - :class:`semi_supervised.LabelPropagation` (bug fix) - - tree based models where ``min_weight_fraction_leaf`` is used (enhancement) +- :class:`cluster.KMeans` with sparse X and initial centroids given (bug fix) +- :class:`cross_decomposition.PLSRegression` + with ``scale=True`` (bug fix) +- :class:`ensemble.GradientBoostingClassifier` and + :class:`ensemble.GradientBoostingRegressor` where ``min_impurity_split`` is used (bug fix) +- gradient boosting ``loss='quantile'`` (bug fix) +- :class:`ensemble.IsolationForest` (bug fix) +- :class:`feature_selection.SelectFdr` (bug fix) +- :class:`linear_model.RANSACRegressor` (bug fix) +- :class:`linear_model.LassoLars` (bug fix) +- :class:`linear_model.LassoLarsIC` (bug fix) +- :class:`manifold.TSNE` (bug fix) +- :class:`semi_supervised.LabelSpreading` (bug fix) +- :class:`semi_supervised.LabelPropagation` (bug fix) +- tree based models where ``min_weight_fraction_leaf`` is used (enhancement) Details are listed in the changelog below. @@ -78,95 +78,97 @@ New features Classifiers and regressors - - Added :class:`multioutput.ClassifierChain` for multi-label - classification. By `Adam Kleczewski `_. +- Added :class:`multioutput.ClassifierChain` for multi-label + classification. By `Adam Kleczewski `_. - - Added solver ``'saga'`` that implements the improved version of Stochastic - Average Gradient, in :class:`linear_model.LogisticRegression` and - :class:`linear_model.Ridge`. It allows the use of L1 penalty with - multinomial logistic loss, and behaves marginally better than 'sag' - during the first epochs of ridge and logistic regression. - :issue:`8446` by `Arthur Mensch`_. +- Added solver ``'saga'`` that implements the improved version of Stochastic + Average Gradient, in :class:`linear_model.LogisticRegression` and + :class:`linear_model.Ridge`. It allows the use of L1 penalty with + multinomial logistic loss, and behaves marginally better than 'sag' + during the first epochs of ridge and logistic regression. + :issue:`8446` by `Arthur Mensch`_. Other estimators - - Added the :class:`neighbors.LocalOutlierFactor` class for anomaly - detection based on nearest neighbors. - :issue:`5279` by `Nicolas Goix`_ and `Alexandre Gramfort`_. +- Added the :class:`neighbors.LocalOutlierFactor` class for anomaly + detection based on nearest neighbors. + :issue:`5279` by `Nicolas Goix`_ and `Alexandre Gramfort`_. - - Added :class:`preprocessing.QuantileTransformer` class and - :func:`preprocessing.quantile_transform` function for features - normalization based on quantiles. - :issue:`8363` by :user:`Denis Engemann `, - :user:`Guillaume Lemaitre `, `Olivier Grisel`_, `Raghav RV`_, - :user:`Thierry Guillemot `, and `Gael Varoquaux`_. +- Added :class:`preprocessing.QuantileTransformer` class and + :func:`preprocessing.quantile_transform` function for features + normalization based on quantiles. + :issue:`8363` by :user:`Denis Engemann `, + :user:`Guillaume Lemaitre `, `Olivier Grisel`_, `Raghav RV`_, + :user:`Thierry Guillemot `, and `Gael Varoquaux`_. - - The new solver ``'mu'`` implements a Multiplicate Update in - :class:`decomposition.NMF`, allowing the optimization of all - beta-divergences, including the Frobenius norm, the generalized - Kullback-Leibler divergence and the Itakura-Saito divergence. - :issue:`5295` by `Tom Dupre la Tour`_. +- The new solver ``'mu'`` implements a Multiplicate Update in + :class:`decomposition.NMF`, allowing the optimization of all + beta-divergences, including the Frobenius norm, the generalized + Kullback-Leibler divergence and the Itakura-Saito divergence. + :issue:`5295` by `Tom Dupre la Tour`_. Model selection and evaluation - - :class:`model_selection.GridSearchCV` and - :class:`model_selection.RandomizedSearchCV` now support simultaneous - evaluation of multiple metrics. Refer to the - :ref:`multimetric_grid_search` section of the user guide for more - information. :issue:`7388` by `Raghav RV`_ - - - Added the :func:`model_selection.cross_validate` which allows evaluation - of multiple metrics. This function returns a dict with more useful - information from cross-validation such as the train scores, fit times and - score times. - Refer to :ref:`multimetric_cross_validation` section of the userguide - for more information. :issue:`7388` by `Raghav RV`_ - - - Added :func:`metrics.mean_squared_log_error`, which computes - the mean square error of the logarithmic transformation of targets, - particularly useful for targets with an exponential trend. - :issue:`7655` by :user:`Karan Desai `. - - - Added :func:`metrics.dcg_score` and :func:`metrics.ndcg_score`, which - compute Discounted cumulative gain (DCG) and Normalized discounted - cumulative gain (NDCG). - :issue:`7739` by :user:`David Gasquez `. - - - Added the :class:`model_selection.RepeatedKFold` and - :class:`model_selection.RepeatedStratifiedKFold`. - :issue:`8120` by `Neeraj Gangwar`_. +- :class:`model_selection.GridSearchCV` and + :class:`model_selection.RandomizedSearchCV` now support simultaneous + evaluation of multiple metrics. Refer to the + :ref:`multimetric_grid_search` section of the user guide for more + information. :issue:`7388` by `Raghav RV`_ + +- Added the :func:`model_selection.cross_validate` which allows evaluation + of multiple metrics. This function returns a dict with more useful + information from cross-validation such as the train scores, fit times and + score times. + Refer to :ref:`multimetric_cross_validation` section of the userguide + for more information. :issue:`7388` by `Raghav RV`_ + +- Added :func:`metrics.mean_squared_log_error`, which computes + the mean square error of the logarithmic transformation of targets, + particularly useful for targets with an exponential trend. + :issue:`7655` by :user:`Karan Desai `. + +- Added :func:`metrics.dcg_score` and :func:`metrics.ndcg_score`, which + compute Discounted cumulative gain (DCG) and Normalized discounted + cumulative gain (NDCG). + :issue:`7739` by :user:`David Gasquez `. + +- Added the :class:`model_selection.RepeatedKFold` and + :class:`model_selection.RepeatedStratifiedKFold`. + :issue:`8120` by `Neeraj Gangwar`_. Miscellaneous - - Validation that input data contains no NaN or inf can now be suppressed - using :func:`config_context`, at your own risk. This will save on runtime, - and may be particularly useful for prediction time. :issue:`7548` by - `Joel Nothman`_. +- Validation that input data contains no NaN or inf can now be suppressed + using :func:`config_context`, at your own risk. This will save on runtime, + and may be particularly useful for prediction time. :issue:`7548` by + `Joel Nothman`_. - - Added a test to ensure parameter listing in docstrings match the - function/class signature. :issue:`9206` by `Alexandre Gramfort`_ and - `Raghav RV`_. +- Added a test to ensure parameter listing in docstrings match the + function/class signature. :issue:`9206` by `Alexandre Gramfort`_ and + `Raghav RV`_. Enhancements ............ Trees and ensembles - - The ``min_weight_fraction_leaf`` constraint in tree construction is now - more efficient, taking a fast path to declare a node a leaf if its weight - is less than 2 * the minimum. Note that the constructed tree will be - different from previous versions where ``min_weight_fraction_leaf`` is - used. :issue:`7441` by :user:`Nelson Liu `. +- The ``min_weight_fraction_leaf`` constraint in tree construction is now + more efficient, taking a fast path to declare a node a leaf if its weight + is less than 2 * the minimum. Note that the constructed tree will be + different from previous versions where ``min_weight_fraction_leaf`` is + used. :issue:`7441` by :user:`Nelson Liu `. - - :class:`ensemble.GradientBoostingClassifier` and :class:`ensemble.GradientBoostingRegressor` - now support sparse input for prediction. - :issue:`6101` by :user:`Ibraim Ganiev `. +- :class:`ensemble.GradientBoostingClassifier` and :class:`ensemble.GradientBoostingRegressor` + now support sparse input for prediction. + :issue:`6101` by :user:`Ibraim Ganiev `. - - :class:`ensemble.VotingClassifier` now allows changing estimators by using - :meth:`ensemble.VotingClassifier.set_params`. An estimator can also be - removed by setting it to ``None``. - :issue:`7674` by :user:`Yichuan Liu `. +- :class:`ensemble.VotingClassifier` now allows changing estimators by using + :meth:`ensemble.VotingClassifier.set_params`. An estimator can also be + removed by setting it to ``None``. + :issue:`7674` by :user:`Yichuan Liu `. +- :func:`tree.export_graphviz` now shows configurable number of decimal + places. :issue:`8698` by :user:`Guillaume Lemaitre `. - :func:`tree.export_graphviz` now shows configurable number of decimal places. :issue:`8698` by :user:`Guillaume Lemaitre `. @@ -177,659 +179,662 @@ Trees and ensembles Linear, kernelized and related models - - :class:`linear_model.SGDClassifier`, :class:`linear_model.SGDRegressor`, - :class:`linear_model.PassiveAggressiveClassifier`, - :class:`linear_model.PassiveAggressiveRegressor` and - :class:`linear_model.Perceptron` now expose ``max_iter`` and - ``tol`` parameters, to handle convergence more precisely. - ``n_iter`` parameter is deprecated, and the fitted estimator exposes - a ``n_iter_`` attribute, with actual number of iterations before - convergence. :issue:`5036` by `Tom Dupre la Tour`_. - - - Added ``average`` parameter to perform weight averaging in - :class:`linear_model.PassiveAggressiveClassifier`. :issue:`4939` - by :user:`Andrea Esuli `. - - - :class:`linear_model.RANSACRegressor` no longer throws an error - when calling ``fit`` if no inliers are found in its first iteration. - Furthermore, causes of skipped iterations are tracked in newly added - attributes, ``n_skips_*``. - :issue:`7914` by :user:`Michael Horrell `. - - - In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict`` - is a lot faster with ``return_std=True``. :issue:`8591` by - :user:`Hadrien Bertrand `. - - - Added ``return_std`` to ``predict`` method of - :class:`linear_model.ARDRegression` and - :class:`linear_model.BayesianRidge`. - :issue:`7838` by :user:`Sergey Feldman `. - - - Memory usage enhancements: Prevent cast from float32 to float64 in: - :class:`linear_model.MultiTaskElasticNet`; - :class:`linear_model.LogisticRegression` when using newton-cg solver; and - :class:`linear_model.Ridge` when using svd, sparse_cg, cholesky or lsqr - solvers. :issue:`8835`, :issue:`8061` by :user:`Joan Massich ` and :user:`Nicolas - Cordier ` and :user:`Thierry Guillemot `. +- :class:`linear_model.SGDClassifier`, :class:`linear_model.SGDRegressor`, + :class:`linear_model.PassiveAggressiveClassifier`, + :class:`linear_model.PassiveAggressiveRegressor` and + :class:`linear_model.Perceptron` now expose ``max_iter`` and + ``tol`` parameters, to handle convergence more precisely. + ``n_iter`` parameter is deprecated, and the fitted estimator exposes + a ``n_iter_`` attribute, with actual number of iterations before + convergence. :issue:`5036` by `Tom Dupre la Tour`_. + +- Added ``average`` parameter to perform weight averaging in + :class:`linear_model.PassiveAggressiveClassifier`. :issue:`4939` + by :user:`Andrea Esuli `. + +- :class:`linear_model.RANSACRegressor` no longer throws an error + when calling ``fit`` if no inliers are found in its first iteration. + Furthermore, causes of skipped iterations are tracked in newly added + attributes, ``n_skips_*``. + :issue:`7914` by :user:`Michael Horrell `. + +- In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict`` + is a lot faster with ``return_std=True``. :issue:`8591` by + :user:`Hadrien Bertrand `. + +- Added ``return_std`` to ``predict`` method of + :class:`linear_model.ARDRegression` and + :class:`linear_model.BayesianRidge`. + :issue:`7838` by :user:`Sergey Feldman `. + +- Memory usage enhancements: Prevent cast from float32 to float64 in: + :class:`linear_model.MultiTaskElasticNet`; + :class:`linear_model.LogisticRegression` when using newton-cg solver; and + :class:`linear_model.Ridge` when using svd, sparse_cg, cholesky or lsqr + solvers. :issue:`8835`, :issue:`8061` by :user:`Joan Massich ` and :user:`Nicolas + Cordier ` and :user:`Thierry Guillemot `. Other predictors - - Custom metrics for the :mod:`neighbors` binary trees now have - fewer constraints: they must take two 1d-arrays and return a float. - :issue:`6288` by `Jake Vanderplas`_. +- Custom metrics for the :mod:`neighbors` binary trees now have + fewer constraints: they must take two 1d-arrays and return a float. + :issue:`6288` by `Jake Vanderplas`_. - - ``algorithm='auto`` in :mod:`neighbors` estimators now chooses the most - appropriate algorithm for all input types and metrics. :issue:`9145` by - :user:`Herilalaina Rakotoarison ` and :user:`Reddy Chinthala - `. +- ``algorithm='auto`` in :mod:`neighbors` estimators now chooses the most + appropriate algorithm for all input types and metrics. :issue:`9145` by + :user:`Herilalaina Rakotoarison ` and :user:`Reddy Chinthala + `. Decomposition, manifold learning and clustering - - :class:`cluster.MiniBatchKMeans` and :class:`cluster.KMeans` - now use significantly less memory when assigning data points to their - nearest cluster center. :issue:`7721` by :user:`Jon Crall `. +- :class:`cluster.MiniBatchKMeans` and :class:`cluster.KMeans` + now use significantly less memory when assigning data points to their + nearest cluster center. :issue:`7721` by :user:`Jon Crall `. - - :class:`decomposition.PCA`, :class:`decomposition.IncrementalPCA` and - :class:`decomposition.TruncatedSVD` now expose the singular values - from the underlying SVD. They are stored in the attribute - ``singular_values_``, like in :class:`decomposition.IncrementalPCA`. - :issue:`7685` by :user:`Tommy Löfstedt ` +- :class:`decomposition.PCA`, :class:`decomposition.IncrementalPCA` and + :class:`decomposition.TruncatedSVD` now expose the singular values + from the underlying SVD. They are stored in the attribute + ``singular_values_``, like in :class:`decomposition.IncrementalPCA`. + :issue:`7685` by :user:`Tommy Löfstedt ` - - :class:`decomposition.NMF` now faster when ``beta_loss=0``. - :issue:`9277` by :user:`hongkahjun`. +- :class:`decomposition.NMF` now faster when ``beta_loss=0``. + :issue:`9277` by :user:`hongkahjun`. - - Memory improvements for method ``barnes_hut`` in :class:`manifold.TSNE` - :issue:`7089` by :user:`Thomas Moreau ` and `Olivier Grisel`_. +- Memory improvements for method ``barnes_hut`` in :class:`manifold.TSNE` + :issue:`7089` by :user:`Thomas Moreau ` and `Olivier Grisel`_. - - Optimization schedule improvements for Barnes-Hut :class:`manifold.TSNE` - so the results are closer to the one from the reference implementation - `lvdmaaten/bhtsne `_ by :user:`Thomas - Moreau ` and `Olivier Grisel`_. +- Optimization schedule improvements for Barnes-Hut :class:`manifold.TSNE` + so the results are closer to the one from the reference implementation + `lvdmaaten/bhtsne `_ by :user:`Thomas + Moreau ` and `Olivier Grisel`_. - - Memory usage enhancements: Prevent cast from float32 to float64 in - :class:`decomposition.PCA` and - :func:`decomposition.randomized_svd_low_rank`. - :issue:`9067` by `Raghav RV`_. +- Memory usage enhancements: Prevent cast from float32 to float64 in + :class:`decomposition.PCA` and + :func:`decomposition.randomized_svd_low_rank`. + :issue:`9067` by `Raghav RV`_. Preprocessing and feature selection - - Added ``norm_order`` parameter to :class:`feature_selection.SelectFromModel` - to enable selection of the norm order when ``coef_`` is more than 1D. - :issue:`6181` by :user:`Antoine Wendlinger `. +- Added ``norm_order`` parameter to :class:`feature_selection.SelectFromModel` + to enable selection of the norm order when ``coef_`` is more than 1D. + :issue:`6181` by :user:`Antoine Wendlinger `. - - Added ability to use sparse matrices in :func:`feature_selection.f_regression` - with ``center=True``. :issue:`8065` by :user:`Daniel LeJeune `. +- Added ability to use sparse matrices in :func:`feature_selection.f_regression` + with ``center=True``. :issue:`8065` by :user:`Daniel LeJeune `. - - Small performance improvement to n-gram creation in - :mod:`feature_extraction.text` by binding methods for loops and - special-casing unigrams. :issue:`7567` by :user:`Jaye Doepke ` +- Small performance improvement to n-gram creation in + :mod:`feature_extraction.text` by binding methods for loops and + special-casing unigrams. :issue:`7567` by :user:`Jaye Doepke ` - - Relax assumption on the data for the - :class:`kernel_approximation.SkewedChi2Sampler`. Since the Skewed-Chi2 - kernel is defined on the open interval :math:`(-skewedness; +\infty)^d`, - the transform function should not check whether ``X < 0`` but whether ``X < - -self.skewedness``. :issue:`7573` by :user:`Romain Brault `. +- Relax assumption on the data for the + :class:`kernel_approximation.SkewedChi2Sampler`. Since the Skewed-Chi2 + kernel is defined on the open interval :math:`(-skewedness; +\infty)^d`, + the transform function should not check whether ``X < 0`` but whether ``X < + -self.skewedness``. :issue:`7573` by :user:`Romain Brault `. - - Made default kernel parameters kernel-dependent in - :class:`kernel_approximation.Nystroem`. - :issue:`5229` by :user:`Saurabh Bansod ` and `Andreas Müller`_. +- Made default kernel parameters kernel-dependent in + :class:`kernel_approximation.Nystroem`. + :issue:`5229` by :user:`Saurabh Bansod ` and `Andreas Müller`_. Model evaluation and meta-estimators - - :class:`pipeline.Pipeline` is now able to cache transformers - within a pipeline by using the ``memory`` constructor parameter. - :issue:`7990` by :user:`Guillaume Lemaitre `. +- :class:`pipeline.Pipeline` is now able to cache transformers + within a pipeline by using the ``memory`` constructor parameter. + :issue:`7990` by :user:`Guillaume Lemaitre `. - - :class:`pipeline.Pipeline` steps can now be accessed as attributes of its - ``named_steps`` attribute. :issue:`8586` by :user:`Herilalaina - Rakotoarison `. +- :class:`pipeline.Pipeline` steps can now be accessed as attributes of its + ``named_steps`` attribute. :issue:`8586` by :user:`Herilalaina + Rakotoarison `. - - Added ``sample_weight`` parameter to :meth:`pipeline.Pipeline.score`. - :issue:`7723` by :user:`Mikhail Korobov `. +- Added ``sample_weight`` parameter to :meth:`pipeline.Pipeline.score`. + :issue:`7723` by :user:`Mikhail Korobov `. - - Added ability to set ``n_jobs`` parameter to :func:`pipeline.make_union`. - A ``TypeError`` will be raised for any other kwargs. :issue:`8028` - by :user:`Alexander Booth `. +- Added ability to set ``n_jobs`` parameter to :func:`pipeline.make_union`. + A ``TypeError`` will be raised for any other kwargs. :issue:`8028` + by :user:`Alexander Booth `. - - :class:`model_selection.GridSearchCV`, - :class:`model_selection.RandomizedSearchCV` and - :func:`model_selection.cross_val_score` now allow estimators with callable - kernels which were previously prohibited. - :issue:`8005` by `Andreas Müller`_ . +- :class:`model_selection.GridSearchCV`, + :class:`model_selection.RandomizedSearchCV` and + :func:`model_selection.cross_val_score` now allow estimators with callable + kernels which were previously prohibited. + :issue:`8005` by `Andreas Müller`_ . - - :func:`model_selection.cross_val_predict` now returns output of the - correct shape for all values of the argument ``method``. - :issue:`7863` by :user:`Aman Dalmia `. +- :func:`model_selection.cross_val_predict` now returns output of the + correct shape for all values of the argument ``method``. + :issue:`7863` by :user:`Aman Dalmia `. - - Added ``shuffle`` and ``random_state`` parameters to shuffle training - data before taking prefixes of it based on training sizes in - :func:`model_selection.learning_curve`. - :issue:`7506` by :user:`Narine Kokhlikyan `. +- Added ``shuffle`` and ``random_state`` parameters to shuffle training + data before taking prefixes of it based on training sizes in + :func:`model_selection.learning_curve`. + :issue:`7506` by :user:`Narine Kokhlikyan `. - - :class:`model_selection.StratifiedShuffleSplit` now works with multioutput - multiclass (or multilabel) data. :issue:`9044` by `Vlad Niculae`_. +- :class:`model_selection.StratifiedShuffleSplit` now works with multioutput + multiclass (or multilabel) data. :issue:`9044` by `Vlad Niculae`_. - - Speed improvements to :class:`model_selection.StratifiedShuffleSplit`. - :issue:`5991` by :user:`Arthur Mensch ` and `Joel Nothman`_. +- Speed improvements to :class:`model_selection.StratifiedShuffleSplit`. + :issue:`5991` by :user:`Arthur Mensch ` and `Joel Nothman`_. - - Add ``shuffle`` parameter to :func:`model_selection.train_test_split`. - :issue:`8845` by :user:`themrmax ` +- Add ``shuffle`` parameter to :func:`model_selection.train_test_split`. + :issue:`8845` by :user:`themrmax ` +- :class:`multioutput.MultiOutputRegressor` and :class:`multioutput.MultiOutputClassifier` + now support online learning using ``partial_fit``. + :issue: `8053` by :user:`Peng Yu `. - :class:`multioutput.MultiOutputRegressor` and :class:`multioutput.MultiOutputClassifier` now support online learning using ``partial_fit``. :issue:`8053` by :user:`Peng Yu `. - - Add ``max_train_size`` parameter to :class:`model_selection.TimeSeriesSplit` - :issue:`8282` by :user:`Aman Dalmia `. +- Add ``max_train_size`` parameter to :class:`model_selection.TimeSeriesSplit` + :issue:`8282` by :user:`Aman Dalmia `. - - More clustering metrics are now available through :func:`metrics.get_scorer` - and ``scoring`` parameters. :issue:`8117` by `Raghav RV`_. +- More clustering metrics are now available through :func:`metrics.get_scorer` + and ``scoring`` parameters. :issue:`8117` by `Raghav RV`_. Metrics - - :func:`metrics.matthews_corrcoef` now support multiclass classification. - :issue:`8094` by :user:`Jon Crall `. +- :func:`metrics.matthews_corrcoef` now support multiclass classification. + :issue:`8094` by :user:`Jon Crall `. - - Add ``sample_weight`` parameter to :func:`metrics.cohen_kappa_score`. - :issue:`8335` by :user:`Victor Poughon `. +- Add ``sample_weight`` parameter to :func:`metrics.cohen_kappa_score`. + :issue:`8335` by :user:`Victor Poughon `. Miscellaneous - - :func:`utils.check_estimator` now attempts to ensure that methods - transform, predict, etc. do not set attributes on the estimator. - :issue:`7533` by :user:`Ekaterina Krivich `. +- :func:`utils.check_estimator` now attempts to ensure that methods + transform, predict, etc. do not set attributes on the estimator. + :issue:`7533` by :user:`Ekaterina Krivich `. - - Added type checking to the ``accept_sparse`` parameter in - :mod:`utils.validation` methods. This parameter now accepts only boolean, - string, or list/tuple of strings. ``accept_sparse=None`` is deprecated and - should be replaced by ``accept_sparse=False``. - :issue:`7880` by :user:`Josh Karnofsky `. +- Added type checking to the ``accept_sparse`` parameter in + :mod:`utils.validation` methods. This parameter now accepts only boolean, + string, or list/tuple of strings. ``accept_sparse=None`` is deprecated and + should be replaced by ``accept_sparse=False``. + :issue:`7880` by :user:`Josh Karnofsky `. - - Make it possible to load a chunk of an svmlight formatted file by - passing a range of bytes to :func:`datasets.load_svmlight_file`. - :issue:`935` by :user:`Olivier Grisel `. +- Make it possible to load a chunk of an svmlight formatted file by + passing a range of bytes to :func:`datasets.load_svmlight_file`. + :issue:`935` by :user:`Olivier Grisel `. - - :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor` - now accept non-finite features. :issue:`8931` by :user:`Attractadore`. +- :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor` + now accept non-finite features. :issue:`8931` by :user:`Attractadore`. Bug fixes ......... Trees and ensembles - - Fixed a memory leak in trees when using trees with ``criterion='mae'``. - :issue:`8002` by `Raghav RV`_. +- Fixed a memory leak in trees when using trees with ``criterion='mae'``. + :issue:`8002` by `Raghav RV`_. - - Fixed a bug where :class:`ensemble.IsolationForest` uses an - an incorrect formula for the average path length - :issue:`8549` by `Peter Wang `_. +- Fixed a bug where :class:`ensemble.IsolationForest` uses an + an incorrect formula for the average path length + :issue:`8549` by `Peter Wang `_. - - Fixed a bug where :class:`ensemble.AdaBoostClassifier` throws - ``ZeroDivisionError`` while fitting data with single class labels. - :issue:`7501` by :user:`Dominik Krzeminski `. +- Fixed a bug where :class:`ensemble.AdaBoostClassifier` throws + ``ZeroDivisionError`` while fitting data with single class labels. + :issue:`7501` by :user:`Dominik Krzeminski `. - - Fixed a bug in :class:`ensemble.GradientBoostingClassifier` and - :class:`ensemble.GradientBoostingRegressor` where a float being compared - to ``0.0`` using ``==`` caused a divide by zero error. :issue:`7970` by - :user:`He Chen `. +- Fixed a bug in :class:`ensemble.GradientBoostingClassifier` and + :class:`ensemble.GradientBoostingRegressor` where a float being compared + to ``0.0`` using ``==`` caused a divide by zero error. :issue:`7970` by + :user:`He Chen `. - - Fix a bug where :class:`ensemble.GradientBoostingClassifier` and - :class:`ensemble.GradientBoostingRegressor` ignored the - ``min_impurity_split`` parameter. - :issue:`8006` by :user:`Sebastian Pölsterl `. +- Fix a bug where :class:`ensemble.GradientBoostingClassifier` and + :class:`ensemble.GradientBoostingRegressor` ignored the + ``min_impurity_split`` parameter. + :issue:`8006` by :user:`Sebastian Pölsterl `. - - Fixed ``oob_score`` in :class:`ensemble.BaggingClassifier`. - :issue:`8936` by :user:`Michael Lewis ` +- Fixed ``oob_score`` in :class:`ensemble.BaggingClassifier`. + :issue:`8936` by :user:`Michael Lewis ` - - Fixed excessive memory usage in prediction for random forests estimators. - :issue:`8672` by :user:`Mike Benfield `. +- Fixed excessive memory usage in prediction for random forests estimators. + :issue:`8672` by :user:`Mike Benfield `. - - Fixed a bug where ``sample_weight`` as a list broke random forests in Python 2 - :issue:`8068` by :user:`xor`. +- Fixed a bug where ``sample_weight`` as a list broke random forests in Python 2 + :issue:`8068` by :user:`xor`. - - Fixed a bug where :class:`ensemble.IsolationForest` fails when - ``max_features`` is less than 1. - :issue:`5732` by :user:`Ishank Gulati `. +- Fixed a bug where :class:`ensemble.IsolationForest` fails when + ``max_features`` is less than 1. + :issue:`5732` by :user:`Ishank Gulati `. - - Fix a bug where gradient boosting with ``loss='quantile'`` computed - negative errors for negative values of ``ytrue - ypred`` leading to wrong - values when calling ``__call__``. - :issue:`8087` by :user:`Alexis Mignon ` +- Fix a bug where gradient boosting with ``loss='quantile'`` computed + negative errors for negative values of ``ytrue - ypred`` leading to wrong + values when calling ``__call__``. + :issue:`8087` by :user:`Alexis Mignon ` - - Fix a bug where :class:`ensemble.VotingClassifier` raises an error - when a numpy array is passed in for weights. :issue:`7983` by - :user:`Vincent Pham `. +- Fix a bug where :class:`ensemble.VotingClassifier` raises an error + when a numpy array is passed in for weights. :issue:`7983` by + :user:`Vincent Pham `. - - Fixed a bug where :func:`tree.export_graphviz` raised an error - when the length of features_names does not match n_features in the decision - tree. :issue:`8512` by :user:`Li Li `. +- Fixed a bug where :func:`tree.export_graphviz` raised an error + when the length of features_names does not match n_features in the decision + tree. :issue:`8512` by :user:`Li Li `. Linear, kernelized and related models - - Fixed a bug where :func:`linear_model.RANSACRegressor.fit` may run until - ``max_iter`` if it finds a large inlier group early. :issue:`8251` by - :user:`aivision2020`. +- Fixed a bug where :func:`linear_model.RANSACRegressor.fit` may run until + ``max_iter`` if it finds a large inlier group early. :issue:`8251` by + :user:`aivision2020`. - - Fixed a bug where :class:`naive_bayes.MultinomialNB` and - :class:`naive_bayes.BernoulliNB` failed when ``alpha=0``. :issue:`5814` by - :user:`Yichuan Liu ` and :user:`Herilalaina Rakotoarison - `. +- Fixed a bug where :class:`naive_bayes.MultinomialNB` and + :class:`naive_bayes.BernoulliNB` failed when ``alpha=0``. :issue:`5814` by + :user:`Yichuan Liu ` and :user:`Herilalaina Rakotoarison + `. - - Fixed a bug where :class:`linear_model.LassoLars` does not give - the same result as the LassoLars implementation available - in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez `. +- Fixed a bug where :class:`linear_model.LassoLars` does not give + the same result as the LassoLars implementation available + in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez `. - - Fixed a bug in :class:`linear_model.RandomizedLasso`, - :class:`linear_model.Lars`, :class:`linear_model.LassoLars`, - :class:`linear_model.LarsCV` and :class:`linear_model.LassoLarsCV`, - where the parameter ``precompute`` was not used consistently across - classes, and some values proposed in the docstring could raise errors. - :issue:`5359` by `Tom Dupre la Tour`_. +- Fixed a bug in :class:`linear_model.RandomizedLasso`, + :class:`linear_model.Lars`, :class:`linear_model.LassoLars`, + :class:`linear_model.LarsCV` and :class:`linear_model.LassoLarsCV`, + where the parameter ``precompute`` was not used consistently across + classes, and some values proposed in the docstring could raise errors. + :issue:`5359` by `Tom Dupre la Tour`_. - - Fix inconsistent results between :class:`linear_model.RidgeCV` and - :class:`linear_model.Ridge` when using ``normalize=True``. :issue:`9302` - by `Alexandre Gramfort`_. +- Fix inconsistent results between :class:`linear_model.RidgeCV` and + :class:`linear_model.Ridge` when using ``normalize=True``. :issue:`9302` + by `Alexandre Gramfort`_. - - Fix a bug where :func:`linear_model.LassoLars.fit` sometimes - left ``coef_`` as a list, rather than an ndarray. - :issue:`8160` by :user:`CJ Carey `. +- Fix a bug where :func:`linear_model.LassoLars.fit` sometimes + left ``coef_`` as a list, rather than an ndarray. + :issue:`8160` by :user:`CJ Carey `. - - Fix :func:`linear_model.BayesianRidge.fit` to return - ridge parameter ``alpha_`` and ``lambda_`` consistent with calculated - coefficients ``coef_`` and ``intercept_``. - :issue:`8224` by :user:`Peter Gedeck `. +- Fix :func:`linear_model.BayesianRidge.fit` to return + ridge parameter ``alpha_`` and ``lambda_`` consistent with calculated + coefficients ``coef_`` and ``intercept_``. + :issue:`8224` by :user:`Peter Gedeck `. - - Fixed a bug in :class:`svm.OneClassSVM` where it returned floats instead of - integer classes. :issue:`8676` by :user:`Vathsala Achar `. +- Fixed a bug in :class:`svm.OneClassSVM` where it returned floats instead of + integer classes. :issue:`8676` by :user:`Vathsala Achar `. - - Fix AIC/BIC criterion computation in :class:`linear_model.LassoLarsIC`. - :issue:`9022` by `Alexandre Gramfort`_ and :user:`Mehmet Basbug `. +- Fix AIC/BIC criterion computation in :class:`linear_model.LassoLarsIC`. + :issue:`9022` by `Alexandre Gramfort`_ and :user:`Mehmet Basbug `. - - Fixed a memory leak in our LibLinear implementation. :issue:`9024` by - :user:`Sergei Lebedev ` +- Fixed a memory leak in our LibLinear implementation. :issue:`9024` by + :user:`Sergei Lebedev ` - - Fix bug where stratified CV splitters did not work with - :class:`linear_model.LassoCV`. :issue:`8973` by - :user:`Paulo Haddad `. +- Fix bug where stratified CV splitters did not work with + :class:`linear_model.LassoCV`. :issue:`8973` by + :user:`Paulo Haddad `. - - Fixed a bug in :class:`gaussian_process.GaussianProcessRegressor` - when the standard deviation and covariance predicted without fit - would fail with a unmeaningful error by default. - :issue:`6573` by :user:`Quazi Marufur Rahman ` and - `Manoj Kumar`_. +- Fixed a bug in :class:`gaussian_process.GaussianProcessRegressor` + when the standard deviation and covariance predicted without fit + would fail with a unmeaningful error by default. + :issue:`6573` by :user:`Quazi Marufur Rahman ` and + `Manoj Kumar`_. Other predictors - - Fix :class:`semi_supervised.BaseLabelPropagation` to correctly implement - ``LabelPropagation`` and ``LabelSpreading`` as done in the referenced - papers. :issue:`9239` - by :user:`Andre Ambrosio Boechat `, :user:`Utkarsh Upadhyay - `, and `Joel Nothman`_. +- Fix :class:`semi_supervised.BaseLabelPropagation` to correctly implement + ``LabelPropagation`` and ``LabelSpreading`` as done in the referenced + papers. :issue:`9239` + by :user:`Andre Ambrosio Boechat `, :user:`Utkarsh Upadhyay + `, and `Joel Nothman`_. Decomposition, manifold learning and clustering - - Fixed the implementation of :class:`manifold.TSNE`: - - ``early_exageration`` parameter had no effect and is now used for the - first 250 optimization iterations. - - Fixed the ``AssertionError: Tree consistency failed`` exception - reported in :issue:`8992`. - - Improve the learning schedule to match the one from the reference - implementation `lvdmaaten/bhtsne `_. +- Fixed the implementation of :class:`manifold.TSNE`: +- ``early_exageration`` parameter had no effect and is now used for the + first 250 optimization iterations. +- Fixed the ``AssertionError: Tree consistency failed`` exception + reported in :issue:`8992`. +- Improve the learning schedule to match the one from the reference + implementation `lvdmaaten/bhtsne `_. by :user:`Thomas Moreau ` and `Olivier Grisel`_. - - Fix a bug in :class:`decomposition.LatentDirichletAllocation` - where the ``perplexity`` method was returning incorrect results because - the ``transform`` method returns normalized document topic distributions - as of version 0.18. :issue:`7954` by :user:`Gary Foreman `. +- Fix a bug in :class:`decomposition.LatentDirichletAllocation` + where the ``perplexity`` method was returning incorrect results because + the ``transform`` method returns normalized document topic distributions + as of version 0.18. :issue:`7954` by :user:`Gary Foreman `. - - Fix output shape and bugs with n_jobs > 1 in - :class:`decomposition.SparseCoder` transform and - :func:`decomposition.sparse_encode` - for one-dimensional data and one component. - This also impacts the output shape of :class:`decomposition.DictionaryLearning`. - :issue:`8086` by `Andreas Müller`_. +- Fix output shape and bugs with n_jobs > 1 in + :class:`decomposition.SparseCoder` transform and + :func:`decomposition.sparse_encode` + for one-dimensional data and one component. + This also impacts the output shape of :class:`decomposition.DictionaryLearning`. + :issue:`8086` by `Andreas Müller`_. - - Fixed the implementation of ``explained_variance_`` - in :class:`decomposition.PCA`, - :class:`decomposition.RandomizedPCA` and - :class:`decomposition.IncrementalPCA`. - :issue:`9105` by `Hanmin Qin `_. +- Fixed the implementation of ``explained_variance_`` + in :class:`decomposition.PCA`, + :class:`decomposition.RandomizedPCA` and + :class:`decomposition.IncrementalPCA`. + :issue:`9105` by `Hanmin Qin `_. - - Fixed a bug where :class:`cluster.DBSCAN` gives incorrect - result when input is a precomputed sparse matrix with initial - rows all zero. :issue:`8306` by :user:`Akshay Gupta ` +- Fixed a bug where :class:`cluster.DBSCAN` gives incorrect + result when input is a precomputed sparse matrix with initial + rows all zero. :issue:`8306` by :user:`Akshay Gupta ` - - Fix a bug regarding fitting :class:`cluster.KMeans` with a sparse - array X and initial centroids, where X's means were unnecessarily being - subtracted from the centroids. :issue:`7872` by :user:`Josh Karnofsky `. +- Fix a bug regarding fitting :class:`cluster.KMeans` with a sparse + array X and initial centroids, where X's means were unnecessarily being + subtracted from the centroids. :issue:`7872` by :user:`Josh Karnofsky `. - - Fixes to the input validation in :class:`covariance.EllipticEnvelope`. - :issue:`8086` by `Andreas Müller`_. +- Fixes to the input validation in :class:`covariance.EllipticEnvelope`. + :issue:`8086` by `Andreas Müller`_. - - Fixed a bug in :class:`covariance.MinCovDet` where inputting data - that produced a singular covariance matrix would cause the helper method - ``_c_step`` to throw an exception. - :issue:`3367` by :user:`Jeremy Steward ` +- Fixed a bug in :class:`covariance.MinCovDet` where inputting data + that produced a singular covariance matrix would cause the helper method + ``_c_step`` to throw an exception. + :issue:`3367` by :user:`Jeremy Steward ` - - Fixed a bug in :class:`manifold.TSNE` affecting convergence of the - gradient descent. :issue:`8768` by :user:`David DeTomaso `. +- Fixed a bug in :class:`manifold.TSNE` affecting convergence of the + gradient descent. :issue:`8768` by :user:`David DeTomaso `. - - Fixed a bug in :class:`manifold.TSNE` where it stored the incorrect - ``kl_divergence_``. :issue:`6507` by :user:`Sebastian Saeger `. +- Fixed a bug in :class:`manifold.TSNE` where it stored the incorrect + ``kl_divergence_``. :issue:`6507` by :user:`Sebastian Saeger `. - - Fixed improper scaling in :class:`cross_decomposition.PLSRegression` - with ``scale=True``. :issue:`7819` by :user:`jayzed82 `. +- Fixed improper scaling in :class:`cross_decomposition.PLSRegression` + with ``scale=True``. :issue:`7819` by :user:`jayzed82 `. - - :class:`cluster.bicluster.SpectralCoclustering` and - :class:`cluster.bicluster.SpectralBiclustering` ``fit`` method conforms - with API by accepting ``y`` and returning the object. :issue:`6126`, - :issue:`7814` by :user:`Laurent Direr ` and :user:`Maniteja - Nandana `. +- :class:`cluster.bicluster.SpectralCoclustering` and + :class:`cluster.bicluster.SpectralBiclustering` ``fit`` method conforms + with API by accepting ``y`` and returning the object. :issue:`6126`, + :issue:`7814` by :user:`Laurent Direr ` and :user:`Maniteja + Nandana `. - - Fix bug where :mod:`mixture` ``sample`` methods did not return as many - samples as requested. :issue:`7702` by :user:`Levi John Wolf `. +- Fix bug where :mod:`mixture` ``sample`` methods did not return as many + samples as requested. :issue:`7702` by :user:`Levi John Wolf `. Preprocessing and feature selection - - For sparse matrices, :func:`preprocessing.normalize` with ``return_norm=True`` - will now raise a ``NotImplementedError`` with 'l1' or 'l2' norm and with - norm 'max' the norms returned will be the same as for dense matrices. - :issue:`7771` by `Ang Lu `_. +- For sparse matrices, :func:`preprocessing.normalize` with ``return_norm=True`` + will now raise a ``NotImplementedError`` with 'l1' or 'l2' norm and with + norm 'max' the norms returned will be the same as for dense matrices. + :issue:`7771` by `Ang Lu `_. - - Fix a bug where :class:`feature_selection.SelectFdr` did not - exactly implement Benjamini-Hochberg procedure. It formerly may have - selected fewer features than it should. - :issue:`7490` by :user:`Peng Meng `. +- Fix a bug where :class:`feature_selection.SelectFdr` did not + exactly implement Benjamini-Hochberg procedure. It formerly may have + selected fewer features than it should. + :issue:`7490` by :user:`Peng Meng `. - - Fixed a bug where :class:`linear_model.RandomizedLasso` and - :class:`linear_model.RandomizedLogisticRegression` breaks for - sparse input. :issue:`8259` by :user:`Aman Dalmia `. +- Fixed a bug where :class:`linear_model.RandomizedLasso` and + :class:`linear_model.RandomizedLogisticRegression` breaks for + sparse input. :issue:`8259` by :user:`Aman Dalmia `. - - Fix a bug where :class:`feature_extraction.FeatureHasher` - mandatorily applied a sparse random projection to the hashed features, - preventing the use of - :class:`feature_extraction.text.HashingVectorizer` in a - pipeline with :class:`feature_extraction.text.TfidfTransformer`. - :issue:`7565` by :user:`Roman Yurchak `. +- Fix a bug where :class:`feature_extraction.FeatureHasher` + mandatorily applied a sparse random projection to the hashed features, + preventing the use of + :class:`feature_extraction.text.HashingVectorizer` in a + pipeline with :class:`feature_extraction.text.TfidfTransformer`. + :issue:`7565` by :user:`Roman Yurchak `. - - Fix a bug where :class:`feature_selection.mutual_info_regression` did not - correctly use ``n_neighbors``. :issue:`8181` by :user:`Guillaume Lemaitre - `. +- Fix a bug where :class:`feature_selection.mutual_info_regression` did not + correctly use ``n_neighbors``. :issue:`8181` by :user:`Guillaume Lemaitre + `. Model evaluation and meta-estimators - - Fixed a bug where :func:`model_selection.BaseSearchCV.inverse_transform` - returns ``self.best_estimator_.transform()`` instead of - ``self.best_estimator_.inverse_transform()``. - :issue:`8344` by :user:`Akshay Gupta ` and :user:`Rasmus Eriksson `. +- Fixed a bug where :func:`model_selection.BaseSearchCV.inverse_transform` + returns ``self.best_estimator_.transform()`` instead of + ``self.best_estimator_.inverse_transform()``. + :issue:`8344` by :user:`Akshay Gupta ` and :user:`Rasmus Eriksson `. - - Added ``classes_`` attribute to :class:`model_selection.GridSearchCV`, - :class:`model_selection.RandomizedSearchCV`, :class:`grid_search.GridSearchCV`, - and :class:`grid_search.RandomizedSearchCV` that matches the ``classes_`` - attribute of ``best_estimator_``. :issue:`7661` and :issue:`8295` - by :user:`Alyssa Batula `, :user:`Dylan Werner-Meier `, - and :user:`Stephen Hoover `. +- Added ``classes_`` attribute to :class:`model_selection.GridSearchCV`, + :class:`model_selection.RandomizedSearchCV`, :class:`grid_search.GridSearchCV`, + and :class:`grid_search.RandomizedSearchCV` that matches the ``classes_`` + attribute of ``best_estimator_``. :issue:`7661` and :issue:`8295` + by :user:`Alyssa Batula `, :user:`Dylan Werner-Meier `, + and :user:`Stephen Hoover `. - - Fixed a bug where :func:`model_selection.validation_curve` - reused the same estimator for each parameter value. - :issue:`7365` by :user:`Aleksandr Sandrovskii `. +- Fixed a bug where :func:`model_selection.validation_curve` + reused the same estimator for each parameter value. + :issue:`7365` by :user:`Aleksandr Sandrovskii `. - - :func:`model_selection.permutation_test_score` now works with Pandas - types. :issue:`5697` by :user:`Stijn Tonk `. +- :func:`model_selection.permutation_test_score` now works with Pandas + types. :issue:`5697` by :user:`Stijn Tonk `. - - Several fixes to input validation in - :class:`multiclass.OutputCodeClassifier` - :issue:`8086` by `Andreas Müller`_. +- Several fixes to input validation in + :class:`multiclass.OutputCodeClassifier` + :issue:`8086` by `Andreas Müller`_. - - :class:`multiclass.OneVsOneClassifier`'s ``partial_fit`` now ensures all - classes are provided up-front. :issue:`6250` by - :user:`Asish Panda `. +- :class:`multiclass.OneVsOneClassifier`'s ``partial_fit`` now ensures all + classes are provided up-front. :issue:`6250` by + :user:`Asish Panda `. - - Fix :func:`multioutput.MultiOutputClassifier.predict_proba` to return a - list of 2d arrays, rather than a 3d array. In the case where different - target columns had different numbers of classes, a ``ValueError`` would be - raised on trying to stack matrices with different dimensions. - :issue:`8093` by :user:`Peter Bull `. +- Fix :func:`multioutput.MultiOutputClassifier.predict_proba` to return a + list of 2d arrays, rather than a 3d array. In the case where different + target columns had different numbers of classes, a ``ValueError`` would be + raised on trying to stack matrices with different dimensions. + :issue:`8093` by :user:`Peter Bull `. Metrics - - :func:`metrics.average_precision_score` no longer linearly - interpolates between operating points, and instead weighs precisions - by the change in recall since the last operating point, as per the - `Wikipedia entry `_. - (`#7356 `_). By - :user:`Nick Dingwall ` and `Gael Varoquaux`_. +- :func:`metrics.average_precision_score` no longer linearly + interpolates between operating points, and instead weighs precisions + by the change in recall since the last operating point, as per the + `Wikipedia entry `_. + (`#7356 `_). By + :user:`Nick Dingwall ` and `Gael Varoquaux`_. - - Fix a bug in :func:`metrics.classification._check_targets` - which would return ``'binary'`` if ``y_true`` and ``y_pred`` were - both ``'binary'`` but the union of ``y_true`` and ``y_pred`` was - ``'multiclass'``. :issue:`8377` by `Loic Esteve`_. +- Fix a bug in :func:`metrics.classification._check_targets` + which would return ``'binary'`` if ``y_true`` and ``y_pred`` were + both ``'binary'`` but the union of ``y_true`` and ``y_pred`` was + ``'multiclass'``. :issue:`8377` by `Loic Esteve`_. - - Fixed an integer overflow bug in :func:`metrics.confusion_matrix` and - hence :func:`metrics.cohen_kappa_score`. :issue:`8354`, :issue:`7929` - by `Joel Nothman`_ and :user:`Jon Crall `. +- Fixed an integer overflow bug in :func:`metrics.confusion_matrix` and + hence :func:`metrics.cohen_kappa_score`. :issue:`8354`, :issue:`7929` + by `Joel Nothman`_ and :user:`Jon Crall `. - - Fixed passing of ``gamma`` parameter to the ``chi2`` kernel in - :func:`metrics.pairwise.pairwise_kernels` :issue:`5211` by - :user:`Nick Rhinehart `, - :user:`Saurabh Bansod ` and `Andreas Müller`_. +- Fixed passing of ``gamma`` parameter to the ``chi2`` kernel in + :func:`metrics.pairwise.pairwise_kernels` :issue:`5211` by + :user:`Nick Rhinehart `, + :user:`Saurabh Bansod ` and `Andreas Müller`_. Miscellaneous - - Fixed a bug when :func:`datasets.make_classification` fails - when generating more than 30 features. :issue:`8159` by - :user:`Herilalaina Rakotoarison `. +- Fixed a bug when :func:`datasets.make_classification` fails + when generating more than 30 features. :issue:`8159` by + :user:`Herilalaina Rakotoarison `. - - Fixed a bug where :func:`datasets.make_moons` gives an - incorrect result when ``n_samples`` is odd. - :issue:`8198` by :user:`Josh Levy `. +- Fixed a bug where :func:`datasets.make_moons` gives an + incorrect result when ``n_samples`` is odd. + :issue:`8198` by :user:`Josh Levy `. - - Some ``fetch_`` functions in :mod:`datasets` were ignoring the - ``download_if_missing`` keyword. :issue:`7944` by :user:`Ralf Gommers `. +- Some ``fetch_`` functions in :mod:`datasets` were ignoring the + ``download_if_missing`` keyword. :issue:`7944` by :user:`Ralf Gommers `. - - Fix estimators to accept a ``sample_weight`` parameter of type - ``pandas.Series`` in their ``fit`` function. :issue:`7825` by - `Kathleen Chen`_. +- Fix estimators to accept a ``sample_weight`` parameter of type + ``pandas.Series`` in their ``fit`` function. :issue:`7825` by + `Kathleen Chen`_. - - Fix a bug in cases where ``numpy.cumsum`` may be numerically unstable, - raising an exception if instability is identified. :issue:`7376` and - :issue:`7331` by `Joel Nothman`_ and :user:`yangarbiter`. +- Fix a bug in cases where ``numpy.cumsum`` may be numerically unstable, + raising an exception if instability is identified. :issue:`7376` and + :issue:`7331` by `Joel Nothman`_ and :user:`yangarbiter`. - - Fix a bug where :meth:`base.BaseEstimator.__getstate__` - obstructed pickling customizations of child-classes, when used in a - multiple inheritance context. - :issue:`8316` by :user:`Holger Peters `. +- Fix a bug where :meth:`base.BaseEstimator.__getstate__` + obstructed pickling customizations of child-classes, when used in a + multiple inheritance context. + :issue:`8316` by :user:`Holger Peters `. - - Update Sphinx-Gallery from 0.1.4 to 0.1.7 for resolving links in - documentation build with Sphinx>1.5 :issue:`8010`, :issue:`7986` by - :user:`Oscar Najera ` +- Update Sphinx-Gallery from 0.1.4 to 0.1.7 for resolving links in + documentation build with Sphinx>1.5 :issue:`8010`, :issue:`7986` by + :user:`Oscar Najera ` - - Add ``data_home`` parameter to :func:`sklearn.datasets.fetch_kddcup99`. - :issue:`9289` by `Loic Esteve`_. +- Add ``data_home`` parameter to :func:`sklearn.datasets.fetch_kddcup99`. + :issue:`9289` by `Loic Esteve`_. - - Fix dataset loaders using Python 3 version of makedirs to also work in - Python 2. :issue:`9284` by :user:`Sebastin Santy `. +- Fix dataset loaders using Python 3 version of makedirs to also work in + Python 2. :issue:`9284` by :user:`Sebastin Santy `. - - Several minor issues were fixed with thanks to the alerts of - [lgtm.com](http://lgtm.com). :issue:`9278` by :user:`Jean Helie `, - among others. +- Several minor issues were fixed with thanks to the alerts of + [lgtm.com](http://lgtm.com). :issue:`9278` by :user:`Jean Helie `, + among others. API changes summary ------------------- Trees and ensembles - - Gradient boosting base models are no longer estimators. By `Andreas Müller`_. +- Gradient boosting base models are no longer estimators. By `Andreas Müller`_. - - All tree based estimators now accept a ``min_impurity_decrease`` - parameter in lieu of the ``min_impurity_split``, which is now deprecated. - The ``min_impurity_decrease`` helps stop splitting the nodes in which - the weighted impurity decrease from splitting is no longer alteast - ``min_impurity_decrease``. :issue:`8449` by `Raghav RV`_. +- All tree based estimators now accept a ``min_impurity_decrease`` + parameter in lieu of the ``min_impurity_split``, which is now deprecated. + The ``min_impurity_decrease`` helps stop splitting the nodes in which + the weighted impurity decrease from splitting is no longer alteast + ``min_impurity_decrease``. :issue:`8449` by `Raghav RV`_. Linear, kernelized and related models - - ``n_iter`` parameter is deprecated in :class:`linear_model.SGDClassifier`, - :class:`linear_model.SGDRegressor`, - :class:`linear_model.PassiveAggressiveClassifier`, - :class:`linear_model.PassiveAggressiveRegressor` and - :class:`linear_model.Perceptron`. By `Tom Dupre la Tour`_. +- ``n_iter`` parameter is deprecated in :class:`linear_model.SGDClassifier`, + :class:`linear_model.SGDRegressor`, + :class:`linear_model.PassiveAggressiveClassifier`, + :class:`linear_model.PassiveAggressiveRegressor` and + :class:`linear_model.Perceptron`. By `Tom Dupre la Tour`_. Other predictors - - :class:`neighbors.LSHForest` has been deprecated and will be - removed in 0.21 due to poor performance. - :issue:`9078` by :user:`Laurent Direr `. +- :class:`neighbors.LSHForest` has been deprecated and will be + removed in 0.21 due to poor performance. + :issue:`9078` by :user:`Laurent Direr `. - - :class:`neighbors.NearestCentroid` no longer purports to support - ``metric='precomputed'`` which now raises an error. :issue:`8515` by - :user:`Sergul Aydore `. +- :class:`neighbors.NearestCentroid` no longer purports to support + ``metric='precomputed'`` which now raises an error. :issue:`8515` by + :user:`Sergul Aydore `. - - The ``alpha`` parameter of :class:`semi_supervised.LabelPropagation` now - has no effect and is deprecated to be removed in 0.21. :issue:`9239` - by :user:`Andre Ambrosio Boechat `, :user:`Utkarsh Upadhyay - `, and `Joel Nothman`_. +- The ``alpha`` parameter of :class:`semi_supervised.LabelPropagation` now + has no effect and is deprecated to be removed in 0.21. :issue:`9239` + by :user:`Andre Ambrosio Boechat `, :user:`Utkarsh Upadhyay + `, and `Joel Nothman`_. Decomposition, manifold learning and clustering - - Deprecate the ``doc_topic_distr`` argument of the ``perplexity`` method - in :class:`decomposition.LatentDirichletAllocation` because the - user no longer has access to the unnormalized document topic distribution - needed for the perplexity calculation. :issue:`7954` by - :user:`Gary Foreman `. +- Deprecate the ``doc_topic_distr`` argument of the ``perplexity`` method + in :class:`decomposition.LatentDirichletAllocation` because the + user no longer has access to the unnormalized document topic distribution + needed for the perplexity calculation. :issue:`7954` by + :user:`Gary Foreman `. - - The ``n_topics`` parameter of :class:`decomposition.LatentDirichletAllocation` - has been renamed to ``n_components`` and will be removed in version 0.21. - :issue:`8922` by :user:`Attractadore`. +- The ``n_topics`` parameter of :class:`decomposition.LatentDirichletAllocation` + has been renamed to ``n_components`` and will be removed in version 0.21. + :issue:`8922` by :user:`Attractadore`. - - :meth:`decomposition.SparsePCA.transform`'s ``ridge_alpha`` parameter is - deprecated in preference for class parameter. - :issue:`8137` by :user:`Naoya Kanai `. +- :meth:`decomposition.SparsePCA.transform`'s ``ridge_alpha`` parameter is + deprecated in preference for class parameter. + :issue:`8137` by :user:`Naoya Kanai `. - - :class:`cluster.DBSCAN` now has a ``metric_params`` parameter. - :issue:`8139` by :user:`Naoya Kanai `. +- :class:`cluster.DBSCAN` now has a ``metric_params`` parameter. + :issue:`8139` by :user:`Naoya Kanai `. Preprocessing and feature selection - - :class:`feature_selection.SelectFromModel` now has a ``partial_fit`` - method only if the underlying estimator does. By `Andreas Müller`_. +- :class:`feature_selection.SelectFromModel` now has a ``partial_fit`` + method only if the underlying estimator does. By `Andreas Müller`_. - - :class:`feature_selection.SelectFromModel` now validates the ``threshold`` - parameter and sets the ``threshold_`` attribute during the call to - ``fit``, and no longer during the call to ``transform```. By `Andreas - Müller`_. +- :class:`feature_selection.SelectFromModel` now validates the ``threshold`` + parameter and sets the ``threshold_`` attribute during the call to + ``fit``, and no longer during the call to ``transform```. By `Andreas + Müller`_. - - The ``non_negative`` parameter in :class:`feature_extraction.FeatureHasher` - has been deprecated, and replaced with a more principled alternative, - ``alternate_sign``. - :issue:`7565` by :user:`Roman Yurchak `. +- The ``non_negative`` parameter in :class:`feature_extraction.FeatureHasher` + has been deprecated, and replaced with a more principled alternative, + ``alternate_sign``. + :issue:`7565` by :user:`Roman Yurchak `. - - :class:`linear_model.RandomizedLogisticRegression`, - and :class:`linear_model.RandomizedLasso` have been deprecated and will - be removed in version 0.21. - :issue:`8995` by :user:`Ramana.S `. +- :class:`linear_model.RandomizedLogisticRegression`, + and :class:`linear_model.RandomizedLasso` have been deprecated and will + be removed in version 0.21. + :issue:`8995` by :user:`Ramana.S `. Model evaluation and meta-estimators - - Deprecate the ``fit_params`` constructor input to the - :class:`model_selection.GridSearchCV` and - :class:`model_selection.RandomizedSearchCV` in favor - of passing keyword parameters to the ``fit`` methods - of those classes. Data-dependent parameters needed for model - training should be passed as keyword arguments to ``fit``, - and conforming to this convention will allow the hyperparameter - selection classes to be used with tools such as - :func:`model_selection.cross_val_predict`. - :issue:`2879` by :user:`Stephen Hoover `. - - - In version 0.21, the default behavior of splitters that use the - ``test_size`` and ``train_size`` parameter will change, such that - specifying ``train_size`` alone will cause ``test_size`` to be the - remainder. :issue:`7459` by :user:`Nelson Liu `. - - - :class:`multiclass.OneVsRestClassifier` now has ``partial_fit``, - ``decision_function`` and ``predict_proba`` methods only when the - underlying estimator does. :issue:`7812` by `Andreas Müller`_ and - :user:`Mikhail Korobov `. - - - :class:`multiclass.OneVsRestClassifier` now has a ``partial_fit`` method - only if the underlying estimator does. By `Andreas Müller`_. - - - The ``decision_function`` output shape for binary classification in - :class:`multiclass.OneVsRestClassifier` and - :class:`multiclass.OneVsOneClassifier` is now ``(n_samples,)`` to conform - to scikit-learn conventions. :issue:`9100` by `Andreas Müller`_. - - - The :func:`multioutput.MultiOutputClassifier.predict_proba` - function used to return a 3d array (``n_samples``, ``n_classes``, - ``n_outputs``). In the case where different target columns had different - numbers of classes, a ``ValueError`` would be raised on trying to stack - matrices with different dimensions. This function now returns a list of - arrays where the length of the list is ``n_outputs``, and each array is - (``n_samples``, ``n_classes``) for that particular output. - :issue:`8093` by :user:`Peter Bull `. - - - Replace attribute ``named_steps`` ``dict`` to :class:`utils.Bunch` - in :class:`pipeline.Pipeline` to enable tab completion in interactive - environment. In the case conflict value on ``named_steps`` and ``dict`` - attribute, ``dict`` behavior will be prioritized. - :issue:`8481` by :user:`Herilalaina Rakotoarison `. +- Deprecate the ``fit_params`` constructor input to the + :class:`model_selection.GridSearchCV` and + :class:`model_selection.RandomizedSearchCV` in favor + of passing keyword parameters to the ``fit`` methods + of those classes. Data-dependent parameters needed for model + training should be passed as keyword arguments to ``fit``, + and conforming to this convention will allow the hyperparameter + selection classes to be used with tools such as + :func:`model_selection.cross_val_predict`. + :issue:`2879` by :user:`Stephen Hoover `. + +- In version 0.21, the default behavior of splitters that use the + ``test_size`` and ``train_size`` parameter will change, such that + specifying ``train_size`` alone will cause ``test_size`` to be the + remainder. :issue:`7459` by :user:`Nelson Liu `. + +- :class:`multiclass.OneVsRestClassifier` now has ``partial_fit``, + ``decision_function`` and ``predict_proba`` methods only when the + underlying estimator does. :issue:`7812` by `Andreas Müller`_ and + :user:`Mikhail Korobov `. + +- :class:`multiclass.OneVsRestClassifier` now has a ``partial_fit`` method + only if the underlying estimator does. By `Andreas Müller`_. + +- The ``decision_function`` output shape for binary classification in + :class:`multiclass.OneVsRestClassifier` and + :class:`multiclass.OneVsOneClassifier` is now ``(n_samples,)`` to conform + to scikit-learn conventions. :issue:`9100` by `Andreas Müller`_. + +- The :func:`multioutput.MultiOutputClassifier.predict_proba` + function used to return a 3d array (``n_samples``, ``n_classes``, + ``n_outputs``). In the case where different target columns had different + numbers of classes, a ``ValueError`` would be raised on trying to stack + matrices with different dimensions. This function now returns a list of + arrays where the length of the list is ``n_outputs``, and each array is + (``n_samples``, ``n_classes``) for that particular output. + :issue:`8093` by :user:`Peter Bull `. + +- Replace attribute ``named_steps`` ``dict`` to :class:`utils.Bunch` + in :class:`pipeline.Pipeline` to enable tab completion in interactive + environment. In the case conflict value on ``named_steps`` and ``dict`` + attribute, ``dict`` behavior will be prioritized. + :issue:`8481` by :user:`Herilalaina Rakotoarison `. Miscellaneous - - Deprecate the ``y`` parameter in ``transform`` and ``inverse_transform``. - The method should not accept ``y`` parameter, as it's used at the prediction time. - :issue:`8174` by :user:`Tahar Zanouda `, `Alexandre Gramfort`_ - and `Raghav RV`_. - - - SciPy >= 0.13.3 and NumPy >= 1.8.2 are now the minimum supported versions - for scikit-learn. The following backported functions in - :mod:`utils` have been removed or deprecated accordingly. - :issue:`8854` and :issue:`8874` by :user:`Naoya Kanai ` - - Removed in 0.19: - - - ``utils.fixes.argpartition`` - - ``utils.fixes.array_equal`` - - ``utils.fixes.astype`` - - ``utils.fixes.bincount`` - - ``utils.fixes.expit`` - - ``utils.fixes.frombuffer_empty`` - - ``utils.fixes.in1d`` - - ``utils.fixes.norm`` - - ``utils.fixes.rankdata`` - - ``utils.fixes.safe_copy`` - - Deprecated in 0.19, to be removed in 0.21: - - - ``utils.arpack.eigs`` - - ``utils.arpack.eigsh`` - - ``utils.arpack.svds`` - - ``utils.extmath.fast_dot`` - - ``utils.extmath.logsumexp`` - - ``utils.extmath.norm`` - - ``utils.extmath.pinvh`` - - ``utils.graph.graph_laplacian`` - - ``utils.random.choice`` - - ``utils.sparsetools.connected_components`` - - ``utils.stats.rankdata`` - - - Estimators with both methods ``decision_function`` and ``predict_proba`` - are now required to have a monotonic relation between them. The - method ``check_decision_proba_consistency`` has been added in - **utils.estimator_checks** to check their consistency. - :issue:`7578` by :user:`Shubham Bhardwaj ` - - - All checks in ``utils.estimator_checks``, in particular - :func:`utils.estimator_checks.check_estimator` now accept estimator - instances. Most other checks do not accept - estimator classes any more. :issue:`9019` by `Andreas Müller`_. - - - Ensure that estimators' attributes ending with ``_`` are not set - in the constructor but only in the ``fit`` method. Most notably, - ensemble estimators (deriving from :class:`ensemble.BaseEnsemble`) - now only have ``self.estimators_`` available after ``fit``. - :issue:`7464` by `Lars Buitinck`_ and `Loic Esteve`_. +- Deprecate the ``y`` parameter in ``transform`` and ``inverse_transform``. + The method should not accept ``y`` parameter, as it's used at the prediction time. + :issue:`8174` by :user:`Tahar Zanouda `, `Alexandre Gramfort`_ + and `Raghav RV`_. + +- SciPy >= 0.13.3 and NumPy >= 1.8.2 are now the minimum supported versions + for scikit-learn. The following backported functions in + :mod:`utils` have been removed or deprecated accordingly. + :issue:`8854` and :issue:`8874` by :user:`Naoya Kanai ` + + Removed in 0.19: + + - ``utils.fixes.argpartition`` + - ``utils.fixes.array_equal`` + - ``utils.fixes.astype`` + - ``utils.fixes.bincount`` + - ``utils.fixes.expit`` + - ``utils.fixes.frombuffer_empty`` + - ``utils.fixes.in1d`` + - ``utils.fixes.norm`` + - ``utils.fixes.rankdata`` + - ``utils.fixes.safe_copy`` + + Deprecated in 0.19, to be removed in 0.21: + + - ``utils.arpack.eigs`` + - ``utils.arpack.eigsh`` + - ``utils.arpack.svds`` + - ``utils.extmath.fast_dot`` + - ``utils.extmath.logsumexp`` + - ``utils.extmath.norm`` + - ``utils.extmath.pinvh`` + - ``utils.graph.graph_laplacian`` + - ``utils.random.choice`` + - ``utils.sparsetools.connected_components`` + - ``utils.stats.rankdata`` + +- Estimators with both methods ``decision_function`` and ``predict_proba`` + are now required to have a monotonic relation between them. The + method ``check_decision_proba_consistency`` has been added in + **utils.estimator_checks** to check their consistency. + :issue:`7578` by :user:`Shubham Bhardwaj ` + +- All checks in ``utils.estimator_checks``, in particular + :func:`utils.estimator_checks.check_estimator` now accept estimator + instances. Most other checks do not accept + estimator classes any more. :issue:`9019` by `Andreas Müller`_. + +- Ensure that estimators' attributes ending with ``_`` are not set + in the constructor but only in the ``fit`` method. Most notably, + ensemble estimators (deriving from :class:`ensemble.BaseEnsemble`) + now only have ``self.estimators_`` available after ``fit``. + :issue:`7464` by `Lars Buitinck`_ and `Loic Esteve`_. .. _changes_0_18_2: @@ -848,11 +853,11 @@ Version 0.18.2 Changelog --------- - - Fixes for compatibility with NumPy 1.13.0: :issue:`7946` :issue:`8355` by - `Loic Esteve`_. +- Fixes for compatibility with NumPy 1.13.0: :issue:`7946` :issue:`8355` by + `Loic Esteve`_. - - Minor compatibility changes in the examples :issue:`9010` :issue:`8040` - :issue:`9149`. +- Minor compatibility changes in the examples :issue:`9010` :issue:`8040` + :issue:`9149`. Code Contributors ----------------- @@ -872,132 +877,132 @@ Changelog Enhancements ............ - - Improved ``sample_without_replacement`` speed by utilizing - numpy.random.permutation for most cases. As a result, - samples may differ in this release for a fixed random state. - Affected estimators: +- Improved ``sample_without_replacement`` speed by utilizing + numpy.random.permutation for most cases. As a result, + samples may differ in this release for a fixed random state. + Affected estimators: - - :class:`ensemble.BaggingClassifier` - - :class:`ensemble.BaggingRegressor` - - :class:`linear_model.RANSACRegressor` - - :class:`model_selection.RandomizedSearchCV` - - :class:`random_projection.SparseRandomProjection` + - :class:`ensemble.BaggingClassifier` + - :class:`ensemble.BaggingRegressor` + - :class:`linear_model.RANSACRegressor` + - :class:`model_selection.RandomizedSearchCV` + - :class:`random_projection.SparseRandomProjection` - This also affects the :meth:`datasets.make_classification` - method. + This also affects the :meth:`datasets.make_classification` + method. Bug fixes ......... - - Fix issue where ``min_grad_norm`` and ``n_iter_without_progress`` - parameters were not being utilised by :class:`manifold.TSNE`. - :issue:`6497` by :user:`Sebastian Säger ` - - - Fix bug for svm's decision values when ``decision_function_shape`` - is ``ovr`` in :class:`svm.SVC`. - :class:`svm.SVC`'s decision_function was incorrect from versions - 0.17.0 through 0.18.0. - :issue:`7724` by `Bing Tian Dai`_ - - - Attribute ``explained_variance_ratio`` of - :class:`discriminant_analysis.LinearDiscriminantAnalysis` calculated - with SVD and Eigen solver are now of the same length. :issue:`7632` - by :user:`JPFrancoia ` - - - Fixes issue in :ref:`univariate_feature_selection` where score - functions were not accepting multi-label targets. :issue:`7676` - by :user:`Mohammed Affan ` - - - Fixed setting parameters when calling ``fit`` multiple times on - :class:`feature_selection.SelectFromModel`. :issue:`7756` by `Andreas Müller`_ - - - Fixes issue in ``partial_fit`` method of - :class:`multiclass.OneVsRestClassifier` when number of classes used in - ``partial_fit`` was less than the total number of classes in the - data. :issue:`7786` by `Srivatsan Ramesh`_ - - - Fixes issue in :class:`calibration.CalibratedClassifierCV` where - the sum of probabilities of each class for a data was not 1, and - ``CalibratedClassifierCV`` now handles the case where the training set - has less number of classes than the total data. :issue:`7799` by - `Srivatsan Ramesh`_ - - - Fix a bug where :class:`sklearn.feature_selection.SelectFdr` did not - exactly implement Benjamini-Hochberg procedure. It formerly may have - selected fewer features than it should. - :issue:`7490` by :user:`Peng Meng `. - - - :class:`sklearn.manifold.LocallyLinearEmbedding` now correctly handles - integer inputs. :issue:`6282` by `Jake Vanderplas`_. - - - The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and - regressors now assumes uniform sample weights by default if the - ``sample_weight`` argument is not passed to the ``fit`` function. - Previously, the parameter was silently ignored. :issue:`7301` - by :user:`Nelson Liu `. - - - Numerical issue with :class:`linear_model.RidgeCV` on centered data when - `n_features > n_samples`. :issue:`6178` by `Bertrand Thirion`_ - - - Tree splitting criterion classes' cloning/pickling is now memory safe - :issue:`7680` by :user:`Ibraim Ganiev `. - - - Fixed a bug where :class:`decomposition.NMF` sets its ``n_iters_`` - attribute in `transform()`. :issue:`7553` by :user:`Ekaterina - Krivich `. - - - :class:`sklearn.linear_model.LogisticRegressionCV` now correctly handles - string labels. :issue:`5874` by `Raghav RV`_. - - - Fixed a bug where :func:`sklearn.model_selection.train_test_split` raised - an error when ``stratify`` is a list of string labels. :issue:`7593` by - `Raghav RV`_. - - - Fixed a bug where :class:`sklearn.model_selection.GridSearchCV` and - :class:`sklearn.model_selection.RandomizedSearchCV` were not pickleable - because of a pickling bug in ``np.ma.MaskedArray``. :issue:`7594` by - `Raghav RV`_. - - - All cross-validation utilities in :mod:`sklearn.model_selection` now - permit one time cross-validation splitters for the ``cv`` parameter. Also - non-deterministic cross-validation splitters (where multiple calls to - ``split`` produce dissimilar splits) can be used as ``cv`` parameter. - The :class:`sklearn.model_selection.GridSearchCV` will cross-validate each - parameter setting on the split produced by the first ``split`` call - to the cross-validation splitter. :issue:`7660` by `Raghav RV`_. - - - Fix bug where :meth:`preprocessing.MultiLabelBinarizer.fit_transform` - returned an invalid CSR matrix. - :issue:`7750` by :user:`CJ Carey `. - - - Fixed a bug where :func:`metrics.pairwise.cosine_distances` could return a - small negative distance. :issue:`7732` by :user:`Artsion `. +- Fix issue where ``min_grad_norm`` and ``n_iter_without_progress`` + parameters were not being utilised by :class:`manifold.TSNE`. + :issue:`6497` by :user:`Sebastian Säger ` + +- Fix bug for svm's decision values when ``decision_function_shape`` + is ``ovr`` in :class:`svm.SVC`. + :class:`svm.SVC`'s decision_function was incorrect from versions + 0.17.0 through 0.18.0. + :issue:`7724` by `Bing Tian Dai`_ + +- Attribute ``explained_variance_ratio`` of + :class:`discriminant_analysis.LinearDiscriminantAnalysis` calculated + with SVD and Eigen solver are now of the same length. :issue:`7632` + by :user:`JPFrancoia ` + +- Fixes issue in :ref:`univariate_feature_selection` where score + functions were not accepting multi-label targets. :issue:`7676` + by :user:`Mohammed Affan ` + +- Fixed setting parameters when calling ``fit`` multiple times on + :class:`feature_selection.SelectFromModel`. :issue:`7756` by `Andreas Müller`_ + +- Fixes issue in ``partial_fit`` method of + :class:`multiclass.OneVsRestClassifier` when number of classes used in + ``partial_fit`` was less than the total number of classes in the + data. :issue:`7786` by `Srivatsan Ramesh`_ + +- Fixes issue in :class:`calibration.CalibratedClassifierCV` where + the sum of probabilities of each class for a data was not 1, and + ``CalibratedClassifierCV`` now handles the case where the training set + has less number of classes than the total data. :issue:`7799` by + `Srivatsan Ramesh`_ + +- Fix a bug where :class:`sklearn.feature_selection.SelectFdr` did not + exactly implement Benjamini-Hochberg procedure. It formerly may have + selected fewer features than it should. + :issue:`7490` by :user:`Peng Meng `. + +- :class:`sklearn.manifold.LocallyLinearEmbedding` now correctly handles + integer inputs. :issue:`6282` by `Jake Vanderplas`_. + +- The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and + regressors now assumes uniform sample weights by default if the + ``sample_weight`` argument is not passed to the ``fit`` function. + Previously, the parameter was silently ignored. :issue:`7301` + by :user:`Nelson Liu `. + +- Numerical issue with :class:`linear_model.RidgeCV` on centered data when + `n_features > n_samples`. :issue:`6178` by `Bertrand Thirion`_ + +- Tree splitting criterion classes' cloning/pickling is now memory safe + :issue:`7680` by :user:`Ibraim Ganiev `. + +- Fixed a bug where :class:`decomposition.NMF` sets its ``n_iters_`` + attribute in `transform()`. :issue:`7553` by :user:`Ekaterina + Krivich `. + +- :class:`sklearn.linear_model.LogisticRegressionCV` now correctly handles + string labels. :issue:`5874` by `Raghav RV`_. + +- Fixed a bug where :func:`sklearn.model_selection.train_test_split` raised + an error when ``stratify`` is a list of string labels. :issue:`7593` by + `Raghav RV`_. + +- Fixed a bug where :class:`sklearn.model_selection.GridSearchCV` and + :class:`sklearn.model_selection.RandomizedSearchCV` were not pickleable + because of a pickling bug in ``np.ma.MaskedArray``. :issue:`7594` by + `Raghav RV`_. + +- All cross-validation utilities in :mod:`sklearn.model_selection` now + permit one time cross-validation splitters for the ``cv`` parameter. Also + non-deterministic cross-validation splitters (where multiple calls to + ``split`` produce dissimilar splits) can be used as ``cv`` parameter. + The :class:`sklearn.model_selection.GridSearchCV` will cross-validate each + parameter setting on the split produced by the first ``split`` call + to the cross-validation splitter. :issue:`7660` by `Raghav RV`_. + +- Fix bug where :meth:`preprocessing.MultiLabelBinarizer.fit_transform` + returned an invalid CSR matrix. + :issue:`7750` by :user:`CJ Carey `. + +- Fixed a bug where :func:`metrics.pairwise.cosine_distances` could return a + small negative distance. :issue:`7732` by :user:`Artsion `. API changes summary ------------------- Trees and forests - - The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and - regressors now assumes uniform sample weights by default if the - ``sample_weight`` argument is not passed to the ``fit`` function. - Previously, the parameter was silently ignored. :issue:`7301` by :user:`Nelson - Liu `. +- The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and + regressors now assumes uniform sample weights by default if the + ``sample_weight`` argument is not passed to the ``fit`` function. + Previously, the parameter was silently ignored. :issue:`7301` by :user:`Nelson + Liu `. - - Tree splitting criterion classes' cloning/pickling is now memory safe. - :issue:`7680` by :user:`Ibraim Ganiev `. +- Tree splitting criterion classes' cloning/pickling is now memory safe. + :issue:`7680` by :user:`Ibraim Ganiev `. Linear, kernelized and related models - - Length of ``explained_variance_ratio`` of - :class:`discriminant_analysis.LinearDiscriminantAnalysis` - changed for both Eigen and SVD solvers. The attribute has now a length - of min(n_components, n_classes - 1). :issue:`7632` - by :user:`JPFrancoia ` +- Length of ``explained_variance_ratio`` of + :class:`discriminant_analysis.LinearDiscriminantAnalysis` + changed for both Eigen and SVD solvers. The attribute has now a length + of min(n_components, n_classes - 1). :issue:`7632` + by :user:`JPFrancoia ` - - Numerical issue with :class:`linear_model.RidgeCV` on centered data when - ``n_features > n_samples``. :issue:`6178` by `Bertrand Thirion`_ +- Numerical issue with :class:`linear_model.RidgeCV` on centered data when + ``n_features > n_samples``. :issue:`6178` by `Bertrand Thirion`_ .. _changes_0_18: @@ -1016,101 +1021,101 @@ Version 0.18 Model Selection Enhancements and API Changes -------------------------------------------- - - **The model_selection module** +- **The model_selection module** - The new module :mod:`sklearn.model_selection`, which groups together the - functionalities of formerly :mod:`sklearn.cross_validation`, - :mod:`sklearn.grid_search` and :mod:`sklearn.learning_curve`, introduces new - possibilities such as nested cross-validation and better manipulation of - parameter searches with Pandas. + The new module :mod:`sklearn.model_selection`, which groups together the + functionalities of formerly :mod:`sklearn.cross_validation`, + :mod:`sklearn.grid_search` and :mod:`sklearn.learning_curve`, introduces new + possibilities such as nested cross-validation and better manipulation of + parameter searches with Pandas. - Many things will stay the same but there are some key differences. Read - below to know more about the changes. + Many things will stay the same but there are some key differences. Read + below to know more about the changes. - - **Data-independent CV splitters enabling nested cross-validation** +- **Data-independent CV splitters enabling nested cross-validation** - The new cross-validation splitters, defined in the - :mod:`sklearn.model_selection`, are no longer initialized with any - data-dependent parameters such as ``y``. Instead they expose a - :func:`split` method that takes in the data and yields a generator for the - different splits. + The new cross-validation splitters, defined in the + :mod:`sklearn.model_selection`, are no longer initialized with any + data-dependent parameters such as ``y``. Instead they expose a + :func:`split` method that takes in the data and yields a generator for the + different splits. - This change makes it possible to use the cross-validation splitters to - perform nested cross-validation, facilitated by - :class:`model_selection.GridSearchCV` and - :class:`model_selection.RandomizedSearchCV` utilities. + This change makes it possible to use the cross-validation splitters to + perform nested cross-validation, facilitated by + :class:`model_selection.GridSearchCV` and + :class:`model_selection.RandomizedSearchCV` utilities. - - **The enhanced cv_results_ attribute** +- **The enhanced cv_results_ attribute** - The new ``cv_results_`` attribute (of :class:`model_selection.GridSearchCV` - and :class:`model_selection.RandomizedSearchCV`) introduced in lieu of the - ``grid_scores_`` attribute is a dict of 1D arrays with elements in each - array corresponding to the parameter settings (i.e. search candidates). + The new ``cv_results_`` attribute (of :class:`model_selection.GridSearchCV` + and :class:`model_selection.RandomizedSearchCV`) introduced in lieu of the + ``grid_scores_`` attribute is a dict of 1D arrays with elements in each + array corresponding to the parameter settings (i.e. search candidates). - The ``cv_results_`` dict can be easily imported into ``pandas`` as a - ``DataFrame`` for exploring the search results. + The ``cv_results_`` dict can be easily imported into ``pandas`` as a + ``DataFrame`` for exploring the search results. - The ``cv_results_`` arrays include scores for each cross-validation split - (with keys such as ``'split0_test_score'``), as well as their mean - (``'mean_test_score'``) and standard deviation (``'std_test_score'``). + The ``cv_results_`` arrays include scores for each cross-validation split + (with keys such as ``'split0_test_score'``), as well as their mean + (``'mean_test_score'``) and standard deviation (``'std_test_score'``). - The ranks for the search candidates (based on their mean - cross-validation score) is available at ``cv_results_['rank_test_score']``. + The ranks for the search candidates (based on their mean + cross-validation score) is available at ``cv_results_['rank_test_score']``. - The parameter values for each parameter is stored separately as numpy - masked object arrays. The value, for that search candidate, is masked if - the corresponding parameter is not applicable. Additionally a list of all - the parameter dicts are stored at ``cv_results_['params']``. + The parameter values for each parameter is stored separately as numpy + masked object arrays. The value, for that search candidate, is masked if + the corresponding parameter is not applicable. Additionally a list of all + the parameter dicts are stored at ``cv_results_['params']``. - - **Parameters n_folds and n_iter renamed to n_splits** +- **Parameters n_folds and n_iter renamed to n_splits** - Some parameter names have changed: - The ``n_folds`` parameter in new :class:`model_selection.KFold`, - :class:`model_selection.GroupKFold` (see below for the name change), - and :class:`model_selection.StratifiedKFold` is now renamed to - ``n_splits``. The ``n_iter`` parameter in - :class:`model_selection.ShuffleSplit`, the new class - :class:`model_selection.GroupShuffleSplit` and - :class:`model_selection.StratifiedShuffleSplit` is now renamed to - ``n_splits``. + Some parameter names have changed: + The ``n_folds`` parameter in new :class:`model_selection.KFold`, + :class:`model_selection.GroupKFold` (see below for the name change), + and :class:`model_selection.StratifiedKFold` is now renamed to + ``n_splits``. The ``n_iter`` parameter in + :class:`model_selection.ShuffleSplit`, the new class + :class:`model_selection.GroupShuffleSplit` and + :class:`model_selection.StratifiedShuffleSplit` is now renamed to + ``n_splits``. - - **Rename of splitter classes which accepts group labels along with data** +- **Rename of splitter classes which accepts group labels along with data** - The cross-validation splitters ``LabelKFold``, - ``LabelShuffleSplit``, ``LeaveOneLabelOut`` and ``LeavePLabelOut`` have - been renamed to :class:`model_selection.GroupKFold`, - :class:`model_selection.GroupShuffleSplit`, - :class:`model_selection.LeaveOneGroupOut` and - :class:`model_selection.LeavePGroupsOut` respectively. + The cross-validation splitters ``LabelKFold``, + ``LabelShuffleSplit``, ``LeaveOneLabelOut`` and ``LeavePLabelOut`` have + been renamed to :class:`model_selection.GroupKFold`, + :class:`model_selection.GroupShuffleSplit`, + :class:`model_selection.LeaveOneGroupOut` and + :class:`model_selection.LeavePGroupsOut` respectively. - Note the change from singular to plural form in - :class:`model_selection.LeavePGroupsOut`. + Note the change from singular to plural form in + :class:`model_selection.LeavePGroupsOut`. - - **Fit parameter labels renamed to groups** +- **Fit parameter labels renamed to groups** - The ``labels`` parameter in the :func:`split` method of the newly renamed - splitters :class:`model_selection.GroupKFold`, - :class:`model_selection.LeaveOneGroupOut`, - :class:`model_selection.LeavePGroupsOut`, - :class:`model_selection.GroupShuffleSplit` is renamed to ``groups`` - following the new nomenclature of their class names. + The ``labels`` parameter in the :func:`split` method of the newly renamed + splitters :class:`model_selection.GroupKFold`, + :class:`model_selection.LeaveOneGroupOut`, + :class:`model_selection.LeavePGroupsOut`, + :class:`model_selection.GroupShuffleSplit` is renamed to ``groups`` + following the new nomenclature of their class names. - - **Parameter n_labels renamed to n_groups** +- **Parameter n_labels renamed to n_groups** - The parameter ``n_labels`` in the newly renamed - :class:`model_selection.LeavePGroupsOut` is changed to ``n_groups``. + The parameter ``n_labels`` in the newly renamed + :class:`model_selection.LeavePGroupsOut` is changed to ``n_groups``. - - Training scores and Timing information +- Training scores and Timing information - ``cv_results_`` also includes the training scores for each - cross-validation split (with keys such as ``'split0_train_score'``), as - well as their mean (``'mean_train_score'``) and standard deviation - (``'std_train_score'``). To avoid the cost of evaluating training score, - set ``return_train_score=False``. + ``cv_results_`` also includes the training scores for each + cross-validation split (with keys such as ``'split0_train_score'``), as + well as their mean (``'mean_train_score'``) and standard deviation + (``'std_train_score'``). To avoid the cost of evaluating training score, + set ``return_train_score=False``. - Additionally the mean and standard deviation of the times taken to split, - train and score the model across all the cross-validation splits is - available at the key ``'mean_time'`` and ``'std_time'`` respectively. + Additionally the mean and standard deviation of the times taken to split, + train and score the model across all the cross-validation splits is + available at the key ``'mean_time'`` and ``'std_time'`` respectively. Changelog --------- @@ -1120,399 +1125,399 @@ New features Classifiers and Regressors - - The Gaussian Process module has been reimplemented and now offers classification - and regression estimators through :class:`gaussian_process.GaussianProcessClassifier` - and :class:`gaussian_process.GaussianProcessRegressor`. Among other things, the new - implementation supports kernel engineering, gradient-based hyperparameter optimization or - sampling of functions from GP prior and GP posterior. Extensive documentation and - examples are provided. By `Jan Hendrik Metzen`_. +- The Gaussian Process module has been reimplemented and now offers classification + and regression estimators through :class:`gaussian_process.GaussianProcessClassifier` + and :class:`gaussian_process.GaussianProcessRegressor`. Among other things, the new + implementation supports kernel engineering, gradient-based hyperparameter optimization or + sampling of functions from GP prior and GP posterior. Extensive documentation and + examples are provided. By `Jan Hendrik Metzen`_. - - Added new supervised learning algorithm: :ref:`Multi-layer Perceptron ` - :issue:`3204` by :user:`Issam H. Laradji ` +- Added new supervised learning algorithm: :ref:`Multi-layer Perceptron ` + :issue:`3204` by :user:`Issam H. Laradji ` - - Added :class:`linear_model.HuberRegressor`, a linear model robust to outliers. - :issue:`5291` by `Manoj Kumar`_. +- Added :class:`linear_model.HuberRegressor`, a linear model robust to outliers. + :issue:`5291` by `Manoj Kumar`_. - - Added the :class:`multioutput.MultiOutputRegressor` meta-estimator. It - converts single output regressors to multi-output regressors by fitting - one regressor per output. By :user:`Tim Head `. +- Added the :class:`multioutput.MultiOutputRegressor` meta-estimator. It + converts single output regressors to multi-output regressors by fitting + one regressor per output. By :user:`Tim Head `. Other estimators - - New :class:`mixture.GaussianMixture` and :class:`mixture.BayesianGaussianMixture` - replace former mixture models, employing faster inference - for sounder results. :issue:`7295` by :user:`Wei Xue ` and - :user:`Thierry Guillemot `. +- New :class:`mixture.GaussianMixture` and :class:`mixture.BayesianGaussianMixture` + replace former mixture models, employing faster inference + for sounder results. :issue:`7295` by :user:`Wei Xue ` and + :user:`Thierry Guillemot `. - - Class :class:`decomposition.RandomizedPCA` is now factored into :class:`decomposition.PCA` - and it is available calling with parameter ``svd_solver='randomized'``. - The default number of ``n_iter`` for ``'randomized'`` has changed to 4. The old - behavior of PCA is recovered by ``svd_solver='full'``. An additional solver - calls ``arpack`` and performs truncated (non-randomized) SVD. By default, - the best solver is selected depending on the size of the input and the - number of components requested. :issue:`5299` by :user:`Giorgio Patrini `. +- Class :class:`decomposition.RandomizedPCA` is now factored into :class:`decomposition.PCA` + and it is available calling with parameter ``svd_solver='randomized'``. + The default number of ``n_iter`` for ``'randomized'`` has changed to 4. The old + behavior of PCA is recovered by ``svd_solver='full'``. An additional solver + calls ``arpack`` and performs truncated (non-randomized) SVD. By default, + the best solver is selected depending on the size of the input and the + number of components requested. :issue:`5299` by :user:`Giorgio Patrini `. - - Added two functions for mutual information estimation: - :func:`feature_selection.mutual_info_classif` and - :func:`feature_selection.mutual_info_regression`. These functions can be - used in :class:`feature_selection.SelectKBest` and - :class:`feature_selection.SelectPercentile` as score functions. - By :user:`Andrea Bravi ` and :user:`Nikolay Mayorov `. +- Added two functions for mutual information estimation: + :func:`feature_selection.mutual_info_classif` and + :func:`feature_selection.mutual_info_regression`. These functions can be + used in :class:`feature_selection.SelectKBest` and + :class:`feature_selection.SelectPercentile` as score functions. + By :user:`Andrea Bravi ` and :user:`Nikolay Mayorov `. - - Added the :class:`ensemble.IsolationForest` class for anomaly detection based on - random forests. By `Nicolas Goix`_. +- Added the :class:`ensemble.IsolationForest` class for anomaly detection based on + random forests. By `Nicolas Goix`_. - - Added ``algorithm="elkan"`` to :class:`cluster.KMeans` implementing - Elkan's fast K-Means algorithm. By `Andreas Müller`_. +- Added ``algorithm="elkan"`` to :class:`cluster.KMeans` implementing + Elkan's fast K-Means algorithm. By `Andreas Müller`_. Model selection and evaluation - - Added :func:`metrics.cluster.fowlkes_mallows_score`, the Fowlkes Mallows - Index which measures the similarity of two clusterings of a set of points - By :user:`Arnaud Fouchet ` and :user:`Thierry Guillemot `. +- Added :func:`metrics.cluster.fowlkes_mallows_score`, the Fowlkes Mallows + Index which measures the similarity of two clusterings of a set of points + By :user:`Arnaud Fouchet ` and :user:`Thierry Guillemot `. - - Added :func:`metrics.calinski_harabaz_score`, which computes the Calinski - and Harabaz score to evaluate the resulting clustering of a set of points. - By :user:`Arnaud Fouchet ` and :user:`Thierry Guillemot `. +- Added :func:`metrics.calinski_harabaz_score`, which computes the Calinski + and Harabaz score to evaluate the resulting clustering of a set of points. + By :user:`Arnaud Fouchet ` and :user:`Thierry Guillemot `. - - Added new cross-validation splitter - :class:`model_selection.TimeSeriesSplit` to handle time series data. - :issue:`6586` by :user:`YenChen Lin ` +- Added new cross-validation splitter + :class:`model_selection.TimeSeriesSplit` to handle time series data. + :issue:`6586` by :user:`YenChen Lin ` - - The cross-validation iterators are replaced by cross-validation splitters - available from :mod:`sklearn.model_selection`, allowing for nested - cross-validation. See :ref:`model_selection_changes` for more information. - :issue:`4294` by `Raghav RV`_. +- The cross-validation iterators are replaced by cross-validation splitters + available from :mod:`sklearn.model_selection`, allowing for nested + cross-validation. See :ref:`model_selection_changes` for more information. + :issue:`4294` by `Raghav RV`_. Enhancements ............ Trees and ensembles - - Added a new splitting criterion for :class:`tree.DecisionTreeRegressor`, - the mean absolute error. This criterion can also be used in - :class:`ensemble.ExtraTreesRegressor`, - :class:`ensemble.RandomForestRegressor`, and the gradient boosting - estimators. :issue:`6667` by :user:`Nelson Liu `. +- Added a new splitting criterion for :class:`tree.DecisionTreeRegressor`, + the mean absolute error. This criterion can also be used in + :class:`ensemble.ExtraTreesRegressor`, + :class:`ensemble.RandomForestRegressor`, and the gradient boosting + estimators. :issue:`6667` by :user:`Nelson Liu `. - - Added weighted impurity-based early stopping criterion for decision tree - growth. :issue:`6954` by :user:`Nelson Liu ` +- Added weighted impurity-based early stopping criterion for decision tree + growth. :issue:`6954` by :user:`Nelson Liu ` - - The random forest, extra tree and decision tree estimators now has a - method ``decision_path`` which returns the decision path of samples in - the tree. By `Arnaud Joly`_. +- The random forest, extra tree and decision tree estimators now has a + method ``decision_path`` which returns the decision path of samples in + the tree. By `Arnaud Joly`_. - - A new example has been added unveiling the decision tree structure. - By `Arnaud Joly`_. +- A new example has been added unveiling the decision tree structure. + By `Arnaud Joly`_. - - Random forest, extra trees, decision trees and gradient boosting estimator - accept the parameter ``min_samples_split`` and ``min_samples_leaf`` - provided as a percentage of the training samples. By :user:`yelite ` and `Arnaud Joly`_. +- Random forest, extra trees, decision trees and gradient boosting estimator + accept the parameter ``min_samples_split`` and ``min_samples_leaf`` + provided as a percentage of the training samples. By :user:`yelite ` and `Arnaud Joly`_. - - Gradient boosting estimators accept the parameter ``criterion`` to specify - to splitting criterion used in built decision trees. - :issue:`6667` by :user:`Nelson Liu `. +- Gradient boosting estimators accept the parameter ``criterion`` to specify + to splitting criterion used in built decision trees. + :issue:`6667` by :user:`Nelson Liu `. - - The memory footprint is reduced (sometimes greatly) for - :class:`ensemble.bagging.BaseBagging` and classes that inherit from it, - i.e, :class:`ensemble.BaggingClassifier`, - :class:`ensemble.BaggingRegressor`, and :class:`ensemble.IsolationForest`, - by dynamically generating attribute ``estimators_samples_`` only when it is - needed. By :user:`David Staub `. +- The memory footprint is reduced (sometimes greatly) for + :class:`ensemble.bagging.BaseBagging` and classes that inherit from it, + i.e, :class:`ensemble.BaggingClassifier`, + :class:`ensemble.BaggingRegressor`, and :class:`ensemble.IsolationForest`, + by dynamically generating attribute ``estimators_samples_`` only when it is + needed. By :user:`David Staub `. - - Added ``n_jobs`` and ``sample_weight`` parameters for - :class:`ensemble.VotingClassifier` to fit underlying estimators in parallel. - :issue:`5805` by :user:`Ibraim Ganiev `. +- Added ``n_jobs`` and ``sample_weight`` parameters for + :class:`ensemble.VotingClassifier` to fit underlying estimators in parallel. + :issue:`5805` by :user:`Ibraim Ganiev `. Linear, kernelized and related models - - In :class:`linear_model.LogisticRegression`, the SAG solver is now - available in the multinomial case. :issue:`5251` by `Tom Dupre la Tour`_. +- In :class:`linear_model.LogisticRegression`, the SAG solver is now + available in the multinomial case. :issue:`5251` by `Tom Dupre la Tour`_. - - :class:`linear_model.RANSACRegressor`, :class:`svm.LinearSVC` and - :class:`svm.LinearSVR` now support ``sample_weight``. - By :user:`Imaculate `. +- :class:`linear_model.RANSACRegressor`, :class:`svm.LinearSVC` and + :class:`svm.LinearSVR` now support ``sample_weight``. + By :user:`Imaculate `. - - Add parameter ``loss`` to :class:`linear_model.RANSACRegressor` to measure the - error on the samples for every trial. By `Manoj Kumar`_. +- Add parameter ``loss`` to :class:`linear_model.RANSACRegressor` to measure the + error on the samples for every trial. By `Manoj Kumar`_. - - Prediction of out-of-sample events with Isotonic Regression - (:class:`isotonic.IsotonicRegression`) is now much faster (over 1000x in tests with synthetic - data). By :user:`Jonathan Arfa `. +- Prediction of out-of-sample events with Isotonic Regression + (:class:`isotonic.IsotonicRegression`) is now much faster (over 1000x in tests with synthetic + data). By :user:`Jonathan Arfa `. - - Isotonic regression (:class:`isotonic.IsotonicRegression`) now uses a better algorithm to avoid - `O(n^2)` behavior in pathological cases, and is also generally faster - (:issue:`#6691`). By `Antony Lee`_. +- Isotonic regression (:class:`isotonic.IsotonicRegression`) now uses a better algorithm to avoid + `O(n^2)` behavior in pathological cases, and is also generally faster + (:issue:`#6691`). By `Antony Lee`_. - - :class:`naive_bayes.GaussianNB` now accepts data-independent class-priors - through the parameter ``priors``. By :user:`Guillaume Lemaitre `. +- :class:`naive_bayes.GaussianNB` now accepts data-independent class-priors + through the parameter ``priors``. By :user:`Guillaume Lemaitre `. - - :class:`linear_model.ElasticNet` and :class:`linear_model.Lasso` - now works with ``np.float32`` input data without converting it - into ``np.float64``. This allows to reduce the memory - consumption. :issue:`6913` by :user:`YenChen Lin `. +- :class:`linear_model.ElasticNet` and :class:`linear_model.Lasso` + now works with ``np.float32`` input data without converting it + into ``np.float64``. This allows to reduce the memory + consumption. :issue:`6913` by :user:`YenChen Lin `. - - :class:`semi_supervised.LabelPropagation` and :class:`semi_supervised.LabelSpreading` - now accept arbitrary kernel functions in addition to strings ``knn`` and ``rbf``. - :issue:`5762` by :user:`Utkarsh Upadhyay `. +- :class:`semi_supervised.LabelPropagation` and :class:`semi_supervised.LabelSpreading` + now accept arbitrary kernel functions in addition to strings ``knn`` and ``rbf``. + :issue:`5762` by :user:`Utkarsh Upadhyay `. Decomposition, manifold learning and clustering - - Added ``inverse_transform`` function to :class:`decomposition.NMF` to compute - data matrix of original shape. By :user:`Anish Shah `. +- Added ``inverse_transform`` function to :class:`decomposition.NMF` to compute + data matrix of original shape. By :user:`Anish Shah `. - - :class:`cluster.KMeans` and :class:`cluster.MiniBatchKMeans` now works - with ``np.float32`` and ``np.float64`` input data without converting it. - This allows to reduce the memory consumption by using ``np.float32``. - :issue:`6846` by :user:`Sebastian Säger ` and - :user:`YenChen Lin `. +- :class:`cluster.KMeans` and :class:`cluster.MiniBatchKMeans` now works + with ``np.float32`` and ``np.float64`` input data without converting it. + This allows to reduce the memory consumption by using ``np.float32``. + :issue:`6846` by :user:`Sebastian Säger ` and + :user:`YenChen Lin `. Preprocessing and feature selection - - :class:`preprocessing.RobustScaler` now accepts ``quantile_range`` parameter. - :issue:`5929` by :user:`Konstantin Podshumok `. +- :class:`preprocessing.RobustScaler` now accepts ``quantile_range`` parameter. + :issue:`5929` by :user:`Konstantin Podshumok `. - - :class:`feature_extraction.FeatureHasher` now accepts string values. - :issue:`6173` by :user:`Ryad Zenine ` and - :user:`Devashish Deshpande `. +- :class:`feature_extraction.FeatureHasher` now accepts string values. + :issue:`6173` by :user:`Ryad Zenine ` and + :user:`Devashish Deshpande `. - - Keyword arguments can now be supplied to ``func`` in - :class:`preprocessing.FunctionTransformer` by means of the ``kw_args`` - parameter. By `Brian McFee`_. +- Keyword arguments can now be supplied to ``func`` in + :class:`preprocessing.FunctionTransformer` by means of the ``kw_args`` + parameter. By `Brian McFee`_. - - :class:`feature_selection.SelectKBest` and :class:`feature_selection.SelectPercentile` - now accept score functions that take X, y as input and return only the scores. - By :user:`Nikolay Mayorov `. +- :class:`feature_selection.SelectKBest` and :class:`feature_selection.SelectPercentile` + now accept score functions that take X, y as input and return only the scores. + By :user:`Nikolay Mayorov `. Model evaluation and meta-estimators - - :class:`multiclass.OneVsOneClassifier` and :class:`multiclass.OneVsRestClassifier` - now support ``partial_fit``. By :user:`Asish Panda ` and - :user:`Philipp Dowling `. +- :class:`multiclass.OneVsOneClassifier` and :class:`multiclass.OneVsRestClassifier` + now support ``partial_fit``. By :user:`Asish Panda ` and + :user:`Philipp Dowling `. - - Added support for substituting or disabling :class:`pipeline.Pipeline` - and :class:`pipeline.FeatureUnion` components using the ``set_params`` - interface that powers :mod:`sklearn.grid_search`. - See :ref:`sphx_glr_auto_examples_plot_compare_reduction.py` - By `Joel Nothman`_ and :user:`Robert McGibbon `. +- Added support for substituting or disabling :class:`pipeline.Pipeline` + and :class:`pipeline.FeatureUnion` components using the ``set_params`` + interface that powers :mod:`sklearn.grid_search`. + See :ref:`sphx_glr_auto_examples_plot_compare_reduction.py` + By `Joel Nothman`_ and :user:`Robert McGibbon `. - - The new ``cv_results_`` attribute of :class:`model_selection.GridSearchCV` - (and :class:`model_selection.RandomizedSearchCV`) can be easily imported - into pandas as a ``DataFrame``. Ref :ref:`model_selection_changes` for - more information. :issue:`6697` by `Raghav RV`_. +- The new ``cv_results_`` attribute of :class:`model_selection.GridSearchCV` + (and :class:`model_selection.RandomizedSearchCV`) can be easily imported + into pandas as a ``DataFrame``. Ref :ref:`model_selection_changes` for + more information. :issue:`6697` by `Raghav RV`_. - - Generalization of :func:`model_selection.cross_val_predict`. - One can pass method names such as `predict_proba` to be used in the cross - validation framework instead of the default `predict`. - By :user:`Ori Ziv ` and :user:`Sears Merritt `. +- Generalization of :func:`model_selection.cross_val_predict`. + One can pass method names such as `predict_proba` to be used in the cross + validation framework instead of the default `predict`. + By :user:`Ori Ziv ` and :user:`Sears Merritt `. - - The training scores and time taken for training followed by scoring for - each search candidate are now available at the ``cv_results_`` dict. - See :ref:`model_selection_changes` for more information. - :issue:`7325` by :user:`Eugene Chen ` and `Raghav RV`_. +- The training scores and time taken for training followed by scoring for + each search candidate are now available at the ``cv_results_`` dict. + See :ref:`model_selection_changes` for more information. + :issue:`7325` by :user:`Eugene Chen ` and `Raghav RV`_. Metrics - - Added ``labels`` flag to :class:`metrics.log_loss` to explicitly provide - the labels when the number of classes in ``y_true`` and ``y_pred`` differ. - :issue:`7239` by :user:`Hong Guangguo ` with help from - :user:`Mads Jensen ` and :user:`Nelson Liu `. +- Added ``labels`` flag to :class:`metrics.log_loss` to explicitly provide + the labels when the number of classes in ``y_true`` and ``y_pred`` differ. + :issue:`7239` by :user:`Hong Guangguo ` with help from + :user:`Mads Jensen ` and :user:`Nelson Liu `. - - Support sparse contingency matrices in cluster evaluation - (:mod:`metrics.cluster.supervised`) to scale to a large number of - clusters. - :issue:`7419` by :user:`Gregory Stupp ` and `Joel Nothman`_. +- Support sparse contingency matrices in cluster evaluation + (:mod:`metrics.cluster.supervised`) to scale to a large number of + clusters. + :issue:`7419` by :user:`Gregory Stupp ` and `Joel Nothman`_. - - Add ``sample_weight`` parameter to :func:`metrics.matthews_corrcoef`. - By :user:`Jatin Shah ` and `Raghav RV`_. +- Add ``sample_weight`` parameter to :func:`metrics.matthews_corrcoef`. + By :user:`Jatin Shah ` and `Raghav RV`_. - - Speed up :func:`metrics.silhouette_score` by using vectorized operations. - By `Manoj Kumar`_. +- Speed up :func:`metrics.silhouette_score` by using vectorized operations. + By `Manoj Kumar`_. - - Add ``sample_weight`` parameter to :func:`metrics.confusion_matrix`. - By :user:`Bernardo Stein `. +- Add ``sample_weight`` parameter to :func:`metrics.confusion_matrix`. + By :user:`Bernardo Stein `. Miscellaneous - - Added ``n_jobs`` parameter to :class:`feature_selection.RFECV` to compute - the score on the test folds in parallel. By `Manoj Kumar`_ - - - Codebase does not contain C/C++ cython generated files: they are - generated during build. Distribution packages will still contain generated - C/C++ files. By :user:`Arthur Mensch `. - - - Reduce the memory usage for 32-bit float input arrays of - :func:`utils.sparse_func.mean_variance_axis` and - :func:`utils.sparse_func.incr_mean_variance_axis` by supporting cython - fused types. By :user:`YenChen Lin `. - - - The :func:`ignore_warnings` now accept a category argument to ignore only - the warnings of a specified type. By :user:`Thierry Guillemot `. - - - Added parameter ``return_X_y`` and return type ``(data, target) : tuple`` option to - :func:`load_iris` dataset - :issue:`7049`, - :func:`load_breast_cancer` dataset - :issue:`7152`, - :func:`load_digits` dataset, - :func:`load_diabetes` dataset, - :func:`load_linnerud` dataset, - :func:`load_boston` dataset - :issue:`7154` by - :user:`Manvendra Singh`. - - - Simplification of the ``clone`` function, deprecate support for estimators - that modify parameters in ``__init__``. :issue:`5540` by `Andreas Müller`_. - - - When unpickling a scikit-learn estimator in a different version than the one - the estimator was trained with, a ``UserWarning`` is raised, see :ref:`the documentation - on model persistence ` for more details. (:issue:`7248`) - By `Andreas Müller`_. +- Added ``n_jobs`` parameter to :class:`feature_selection.RFECV` to compute + the score on the test folds in parallel. By `Manoj Kumar`_ + +- Codebase does not contain C/C++ cython generated files: they are + generated during build. Distribution packages will still contain generated + C/C++ files. By :user:`Arthur Mensch `. + +- Reduce the memory usage for 32-bit float input arrays of + :func:`utils.sparse_func.mean_variance_axis` and + :func:`utils.sparse_func.incr_mean_variance_axis` by supporting cython + fused types. By :user:`YenChen Lin `. + +- The :func:`ignore_warnings` now accept a category argument to ignore only + the warnings of a specified type. By :user:`Thierry Guillemot `. + +- Added parameter ``return_X_y`` and return type ``(data, target) : tuple`` option to + :func:`load_iris` dataset + :issue:`7049`, + :func:`load_breast_cancer` dataset + :issue:`7152`, + :func:`load_digits` dataset, + :func:`load_diabetes` dataset, + :func:`load_linnerud` dataset, + :func:`load_boston` dataset + :issue:`7154` by + :user:`Manvendra Singh`. + +- Simplification of the ``clone`` function, deprecate support for estimators + that modify parameters in ``__init__``. :issue:`5540` by `Andreas Müller`_. + +- When unpickling a scikit-learn estimator in a different version than the one + the estimator was trained with, a ``UserWarning`` is raised, see :ref:`the documentation + on model persistence ` for more details. (:issue:`7248`) + By `Andreas Müller`_. Bug fixes ......... Trees and ensembles - - Random forest, extra trees, decision trees and gradient boosting - won't accept anymore ``min_samples_split=1`` as at least 2 samples - are required to split a decision tree node. By `Arnaud Joly`_ +- Random forest, extra trees, decision trees and gradient boosting + won't accept anymore ``min_samples_split=1`` as at least 2 samples + are required to split a decision tree node. By `Arnaud Joly`_ - - :class:`ensemble.VotingClassifier` now raises ``NotFittedError`` if ``predict``, - ``transform`` or ``predict_proba`` are called on the non-fitted estimator. - by `Sebastian Raschka`_. +- :class:`ensemble.VotingClassifier` now raises ``NotFittedError`` if ``predict``, + ``transform`` or ``predict_proba`` are called on the non-fitted estimator. + by `Sebastian Raschka`_. - - Fix bug where :class:`ensemble.AdaBoostClassifier` and - :class:`ensemble.AdaBoostRegressor` would perform poorly if the - ``random_state`` was fixed - (:issue:`7411`). By `Joel Nothman`_. +- Fix bug where :class:`ensemble.AdaBoostClassifier` and + :class:`ensemble.AdaBoostRegressor` would perform poorly if the + ``random_state`` was fixed + (:issue:`7411`). By `Joel Nothman`_. - - Fix bug in ensembles with randomization where the ensemble would not - set ``random_state`` on base estimators in a pipeline or similar nesting. - (:issue:`7411`). Note, results for :class:`ensemble.BaggingClassifier` - :class:`ensemble.BaggingRegressor`, :class:`ensemble.AdaBoostClassifier` - and :class:`ensemble.AdaBoostRegressor` will now differ from previous - versions. By `Joel Nothman`_. +- Fix bug in ensembles with randomization where the ensemble would not + set ``random_state`` on base estimators in a pipeline or similar nesting. + (:issue:`7411`). Note, results for :class:`ensemble.BaggingClassifier` + :class:`ensemble.BaggingRegressor`, :class:`ensemble.AdaBoostClassifier` + and :class:`ensemble.AdaBoostRegressor` will now differ from previous + versions. By `Joel Nothman`_. Linear, kernelized and related models - - Fixed incorrect gradient computation for ``loss='squared_epsilon_insensitive'`` in - :class:`linear_model.SGDClassifier` and :class:`linear_model.SGDRegressor` - (:issue:`6764`). By :user:`Wenhua Yang `. +- Fixed incorrect gradient computation for ``loss='squared_epsilon_insensitive'`` in + :class:`linear_model.SGDClassifier` and :class:`linear_model.SGDRegressor` + (:issue:`6764`). By :user:`Wenhua Yang `. - - Fix bug in :class:`linear_model.LogisticRegressionCV` where - ``solver='liblinear'`` did not accept ``class_weights='balanced``. - (:issue:`6817`). By `Tom Dupre la Tour`_. +- Fix bug in :class:`linear_model.LogisticRegressionCV` where + ``solver='liblinear'`` did not accept ``class_weights='balanced``. + (:issue:`6817`). By `Tom Dupre la Tour`_. - - Fix bug in :class:`neighbors.RadiusNeighborsClassifier` where an error - occurred when there were outliers being labelled and a weight function - specified (:issue:`6902`). By - `LeonieBorne `_. +- Fix bug in :class:`neighbors.RadiusNeighborsClassifier` where an error + occurred when there were outliers being labelled and a weight function + specified (:issue:`6902`). By + `LeonieBorne `_. - - Fix :class:`linear_model.ElasticNet` sparse decision function to match - output with dense in the multioutput case. +- Fix :class:`linear_model.ElasticNet` sparse decision function to match + output with dense in the multioutput case. Decomposition, manifold learning and clustering - - :class:`decomposition.RandomizedPCA` default number of `iterated_power` is 4 instead of 3. - :issue:`5141` by :user:`Giorgio Patrini `. +- :class:`decomposition.RandomizedPCA` default number of `iterated_power` is 4 instead of 3. + :issue:`5141` by :user:`Giorgio Patrini `. - - :func:`utils.extmath.randomized_svd` performs 4 power iterations by default, instead or 0. - In practice this is enough for obtaining a good approximation of the - true eigenvalues/vectors in the presence of noise. When `n_components` is - small (``< .1 * min(X.shape)``) `n_iter` is set to 7, unless the user specifies - a higher number. This improves precision with few components. - :issue:`5299` by :user:`Giorgio Patrini`. +- :func:`utils.extmath.randomized_svd` performs 4 power iterations by default, instead or 0. + In practice this is enough for obtaining a good approximation of the + true eigenvalues/vectors in the presence of noise. When `n_components` is + small (``< .1 * min(X.shape)``) `n_iter` is set to 7, unless the user specifies + a higher number. This improves precision with few components. + :issue:`5299` by :user:`Giorgio Patrini`. - - Whiten/non-whiten inconsistency between components of :class:`decomposition.PCA` - and :class:`decomposition.RandomizedPCA` (now factored into PCA, see the - New features) is fixed. `components_` are stored with no whitening. - :issue:`5299` by :user:`Giorgio Patrini `. +- Whiten/non-whiten inconsistency between components of :class:`decomposition.PCA` + and :class:`decomposition.RandomizedPCA` (now factored into PCA, see the + New features) is fixed. `components_` are stored with no whitening. + :issue:`5299` by :user:`Giorgio Patrini `. - - Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized - Laplacian matrix was incorrectly set to 1. :issue:`4995` by :user:`Peter Fischer `. +- Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized + Laplacian matrix was incorrectly set to 1. :issue:`4995` by :user:`Peter Fischer `. - - Fixed incorrect initialization of :func:`utils.arpack.eigsh` on all - occurrences. Affects :class:`cluster.bicluster.SpectralBiclustering`, - :class:`decomposition.KernelPCA`, :class:`manifold.LocallyLinearEmbedding`, - and :class:`manifold.SpectralEmbedding` (:issue:`5012`). By - :user:`Peter Fischer `. +- Fixed incorrect initialization of :func:`utils.arpack.eigsh` on all + occurrences. Affects :class:`cluster.bicluster.SpectralBiclustering`, + :class:`decomposition.KernelPCA`, :class:`manifold.LocallyLinearEmbedding`, + and :class:`manifold.SpectralEmbedding` (:issue:`5012`). By + :user:`Peter Fischer `. - - Attribute ``explained_variance_ratio_`` calculated with the SVD solver - of :class:`discriminant_analysis.LinearDiscriminantAnalysis` now returns - correct results. By :user:`JPFrancoia ` +- Attribute ``explained_variance_ratio_`` calculated with the SVD solver + of :class:`discriminant_analysis.LinearDiscriminantAnalysis` now returns + correct results. By :user:`JPFrancoia ` Preprocessing and feature selection - - :func:`preprocessing.data._transform_selected` now always passes a copy - of ``X`` to transform function when ``copy=True`` (:issue:`7194`). By `Caio - Oliveira `_. +- :func:`preprocessing.data._transform_selected` now always passes a copy + of ``X`` to transform function when ``copy=True`` (:issue:`7194`). By `Caio + Oliveira `_. Model evaluation and meta-estimators - - :class:`model_selection.StratifiedKFold` now raises error if all n_labels - for individual classes is less than n_folds. - :issue:`6182` by :user:`Devashish Deshpande `. +- :class:`model_selection.StratifiedKFold` now raises error if all n_labels + for individual classes is less than n_folds. + :issue:`6182` by :user:`Devashish Deshpande `. - - Fixed bug in :class:`model_selection.StratifiedShuffleSplit` - where train and test sample could overlap in some edge cases, - see :issue:`6121` for - more details. By `Loic Esteve`_. +- Fixed bug in :class:`model_selection.StratifiedShuffleSplit` + where train and test sample could overlap in some edge cases, + see :issue:`6121` for + more details. By `Loic Esteve`_. - - Fix in :class:`sklearn.model_selection.StratifiedShuffleSplit` to - return splits of size ``train_size`` and ``test_size`` in all cases - (:issue:`6472`). By `Andreas Müller`_. +- Fix in :class:`sklearn.model_selection.StratifiedShuffleSplit` to + return splits of size ``train_size`` and ``test_size`` in all cases + (:issue:`6472`). By `Andreas Müller`_. - - Cross-validation of :class:`OneVsOneClassifier` and - :class:`OneVsRestClassifier` now works with precomputed kernels. - :issue:`7350` by :user:`Russell Smith `. +- Cross-validation of :class:`OneVsOneClassifier` and + :class:`OneVsRestClassifier` now works with precomputed kernels. + :issue:`7350` by :user:`Russell Smith `. - - Fix incomplete ``predict_proba`` method delegation from - :class:`model_selection.GridSearchCV` to - :class:`linear_model.SGDClassifier` (:issue:`7159`) - by `Yichuan Liu `_. +- Fix incomplete ``predict_proba`` method delegation from + :class:`model_selection.GridSearchCV` to + :class:`linear_model.SGDClassifier` (:issue:`7159`) + by `Yichuan Liu `_. Metrics - - Fix bug in :func:`metrics.silhouette_score` in which clusters of - size 1 were incorrectly scored. They should get a score of 0. - By `Joel Nothman`_. +- Fix bug in :func:`metrics.silhouette_score` in which clusters of + size 1 were incorrectly scored. They should get a score of 0. + By `Joel Nothman`_. - - Fix bug in :func:`metrics.silhouette_samples` so that it now works with - arbitrary labels, not just those ranging from 0 to n_clusters - 1. +- Fix bug in :func:`metrics.silhouette_samples` so that it now works with + arbitrary labels, not just those ranging from 0 to n_clusters - 1. - - Fix bug where expected and adjusted mutual information were incorrect if - cluster contingency cells exceeded ``2**16``. By `Joel Nothman`_. +- Fix bug where expected and adjusted mutual information were incorrect if + cluster contingency cells exceeded ``2**16``. By `Joel Nothman`_. - - :func:`metrics.pairwise.pairwise_distances` now converts arrays to - boolean arrays when required in ``scipy.spatial.distance``. - :issue:`5460` by `Tom Dupre la Tour`_. +- :func:`metrics.pairwise.pairwise_distances` now converts arrays to + boolean arrays when required in ``scipy.spatial.distance``. + :issue:`5460` by `Tom Dupre la Tour`_. - - Fix sparse input support in :func:`metrics.silhouette_score` as well as - example examples/text/document_clustering.py. By :user:`YenChen Lin `. +- Fix sparse input support in :func:`metrics.silhouette_score` as well as + example examples/text/document_clustering.py. By :user:`YenChen Lin `. - - :func:`metrics.roc_curve` and :func:`metrics.precision_recall_curve` no - longer round ``y_score`` values when creating ROC curves; this was causing - problems for users with very small differences in scores (:issue:`7353`). +- :func:`metrics.roc_curve` and :func:`metrics.precision_recall_curve` no + longer round ``y_score`` values when creating ROC curves; this was causing + problems for users with very small differences in scores (:issue:`7353`). Miscellaneous - - :func:`model_selection.tests._search._check_param_grid` now works correctly with all types - that extends/implements `Sequence` (except string), including range (Python 3.x) and xrange - (Python 2.x). :issue:`7323` by Viacheslav Kovalevskyi. +- :func:`model_selection.tests._search._check_param_grid` now works correctly with all types + that extends/implements `Sequence` (except string), including range (Python 3.x) and xrange + (Python 2.x). :issue:`7323` by Viacheslav Kovalevskyi. - - :func:`utils.extmath.randomized_range_finder` is more numerically stable when many - power iterations are requested, since it applies LU normalization by default. - If ``n_iter<2`` numerical issues are unlikely, thus no normalization is applied. - Other normalization options are available: ``'none', 'LU'`` and ``'QR'``. - :issue:`5141` by :user:`Giorgio Patrini `. +- :func:`utils.extmath.randomized_range_finder` is more numerically stable when many + power iterations are requested, since it applies LU normalization by default. + If ``n_iter<2`` numerical issues are unlikely, thus no normalization is applied. + Other normalization options are available: ``'none', 'LU'`` and ``'QR'``. + :issue:`5141` by :user:`Giorgio Patrini `. - - Fix a bug where some formats of ``scipy.sparse`` matrix, and estimators - with them as parameters, could not be passed to :func:`base.clone`. - By `Loic Esteve`_. +- Fix a bug where some formats of ``scipy.sparse`` matrix, and estimators + with them as parameters, could not be passed to :func:`base.clone`. + By `Loic Esteve`_. - - :func:`datasets.load_svmlight_file` now is able to read long int QID values. - :issue:`7101` by :user:`Ibraim Ganiev `. +- :func:`datasets.load_svmlight_file` now is able to read long int QID values. + :issue:`7101` by :user:`Ibraim Ganiev `. API changes summary @@ -1520,74 +1525,74 @@ API changes summary Linear, kernelized and related models - - ``residual_metric`` has been deprecated in :class:`linear_model.RANSACRegressor`. - Use ``loss`` instead. By `Manoj Kumar`_. +- ``residual_metric`` has been deprecated in :class:`linear_model.RANSACRegressor`. + Use ``loss`` instead. By `Manoj Kumar`_. - - Access to public attributes ``.X_`` and ``.y_`` has been deprecated in - :class:`isotonic.IsotonicRegression`. By :user:`Jonathan Arfa `. +- Access to public attributes ``.X_`` and ``.y_`` has been deprecated in + :class:`isotonic.IsotonicRegression`. By :user:`Jonathan Arfa `. Decomposition, manifold learning and clustering - - The old :class:`mixture.DPGMM` is deprecated in favor of the new - :class:`mixture.BayesianGaussianMixture` (with the parameter - ``weight_concentration_prior_type='dirichlet_process'``). - The new class solves the computational - problems of the old class and computes the Gaussian mixture with a - Dirichlet process prior faster than before. - :issue:`7295` by :user:`Wei Xue ` and :user:`Thierry Guillemot `. - - - The old :class:`mixture.VBGMM` is deprecated in favor of the new - :class:`mixture.BayesianGaussianMixture` (with the parameter - ``weight_concentration_prior_type='dirichlet_distribution'``). - The new class solves the computational - problems of the old class and computes the Variational Bayesian Gaussian - mixture faster than before. - :issue:`6651` by :user:`Wei Xue ` and :user:`Thierry Guillemot `. - - - The old :class:`mixture.GMM` is deprecated in favor of the new - :class:`mixture.GaussianMixture`. The new class computes the Gaussian mixture - faster than before and some of computational problems have been solved. - :issue:`6666` by :user:`Wei Xue ` and :user:`Thierry Guillemot `. +- The old :class:`mixture.DPGMM` is deprecated in favor of the new + :class:`mixture.BayesianGaussianMixture` (with the parameter + ``weight_concentration_prior_type='dirichlet_process'``). + The new class solves the computational + problems of the old class and computes the Gaussian mixture with a + Dirichlet process prior faster than before. + :issue:`7295` by :user:`Wei Xue ` and :user:`Thierry Guillemot `. + +- The old :class:`mixture.VBGMM` is deprecated in favor of the new + :class:`mixture.BayesianGaussianMixture` (with the parameter + ``weight_concentration_prior_type='dirichlet_distribution'``). + The new class solves the computational + problems of the old class and computes the Variational Bayesian Gaussian + mixture faster than before. + :issue:`6651` by :user:`Wei Xue ` and :user:`Thierry Guillemot `. + +- The old :class:`mixture.GMM` is deprecated in favor of the new + :class:`mixture.GaussianMixture`. The new class computes the Gaussian mixture + faster than before and some of computational problems have been solved. + :issue:`6666` by :user:`Wei Xue ` and :user:`Thierry Guillemot `. Model evaluation and meta-estimators - - The :mod:`sklearn.cross_validation`, :mod:`sklearn.grid_search` and - :mod:`sklearn.learning_curve` have been deprecated and the classes and - functions have been reorganized into the :mod:`sklearn.model_selection` - module. Ref :ref:`model_selection_changes` for more information. - :issue:`4294` by `Raghav RV`_. - - - The ``grid_scores_`` attribute of :class:`model_selection.GridSearchCV` - and :class:`model_selection.RandomizedSearchCV` is deprecated in favor of - the attribute ``cv_results_``. - Ref :ref:`model_selection_changes` for more information. - :issue:`6697` by `Raghav RV`_. - - - The parameters ``n_iter`` or ``n_folds`` in old CV splitters are replaced - by the new parameter ``n_splits`` since it can provide a consistent - and unambiguous interface to represent the number of train-test splits. - :issue:`7187` by :user:`YenChen Lin `. - - - ``classes`` parameter was renamed to ``labels`` in - :func:`metrics.hamming_loss`. :issue:`7260` by :user:`Sebastián Vanrell `. - - - The splitter classes ``LabelKFold``, ``LabelShuffleSplit``, - ``LeaveOneLabelOut`` and ``LeavePLabelsOut`` are renamed to - :class:`model_selection.GroupKFold`, - :class:`model_selection.GroupShuffleSplit`, - :class:`model_selection.LeaveOneGroupOut` - and :class:`model_selection.LeavePGroupsOut` respectively. - Also the parameter ``labels`` in the :func:`split` method of the newly - renamed splitters :class:`model_selection.LeaveOneGroupOut` and - :class:`model_selection.LeavePGroupsOut` is renamed to - ``groups``. Additionally in :class:`model_selection.LeavePGroupsOut`, - the parameter ``n_labels`` is renamed to ``n_groups``. - :issue:`6660` by `Raghav RV`_. - - - Error and loss names for ``scoring`` parameters are now prefixed by - ``'neg_'``, such as ``neg_mean_squared_error``. The unprefixed versions - are deprecated and will be removed in version 0.20. - :issue:`7261` by :user:`Tim Head `. +- The :mod:`sklearn.cross_validation`, :mod:`sklearn.grid_search` and + :mod:`sklearn.learning_curve` have been deprecated and the classes and + functions have been reorganized into the :mod:`sklearn.model_selection` + module. Ref :ref:`model_selection_changes` for more information. + :issue:`4294` by `Raghav RV`_. + +- The ``grid_scores_`` attribute of :class:`model_selection.GridSearchCV` + and :class:`model_selection.RandomizedSearchCV` is deprecated in favor of + the attribute ``cv_results_``. + Ref :ref:`model_selection_changes` for more information. + :issue:`6697` by `Raghav RV`_. + +- The parameters ``n_iter`` or ``n_folds`` in old CV splitters are replaced + by the new parameter ``n_splits`` since it can provide a consistent + and unambiguous interface to represent the number of train-test splits. + :issue:`7187` by :user:`YenChen Lin `. + +- ``classes`` parameter was renamed to ``labels`` in + :func:`metrics.hamming_loss`. :issue:`7260` by :user:`Sebastián Vanrell `. + +- The splitter classes ``LabelKFold``, ``LabelShuffleSplit``, + ``LeaveOneLabelOut`` and ``LeavePLabelsOut`` are renamed to + :class:`model_selection.GroupKFold`, + :class:`model_selection.GroupShuffleSplit`, + :class:`model_selection.LeaveOneGroupOut` + and :class:`model_selection.LeavePGroupsOut` respectively. + Also the parameter ``labels`` in the :func:`split` method of the newly + renamed splitters :class:`model_selection.LeaveOneGroupOut` and + :class:`model_selection.LeavePGroupsOut` is renamed to + ``groups``. Additionally in :class:`model_selection.LeavePGroupsOut`, + the parameter ``n_labels`` is renamed to ``n_groups``. + :issue:`6660` by `Raghav RV`_. + +- Error and loss names for ``scoring`` parameters are now prefixed by + ``'neg_'``, such as ``neg_mean_squared_error``. The unprefixed versions + are deprecated and will be removed in version 0.20. + :issue:`7261` by :user:`Tim Head `. Code Contributors ----------------- @@ -1660,29 +1665,29 @@ Bug fixes ......... - - Upgrade vendored joblib to version 0.9.4 that fixes an important bug in - ``joblib.Parallel`` that can silently yield to wrong results when working - on datasets larger than 1MB: - https://github.com/joblib/joblib/blob/0.9.4/CHANGES.rst +- Upgrade vendored joblib to version 0.9.4 that fixes an important bug in + ``joblib.Parallel`` that can silently yield to wrong results when working + on datasets larger than 1MB: + https://github.com/joblib/joblib/blob/0.9.4/CHANGES.rst - - Fixed reading of Bunch pickles generated with scikit-learn - version <= 0.16. This can affect users who have already - downloaded a dataset with scikit-learn 0.16 and are loading it - with scikit-learn 0.17. See :issue:`6196` for - how this affected :func:`datasets.fetch_20newsgroups`. By `Loic - Esteve`_. +- Fixed reading of Bunch pickles generated with scikit-learn + version <= 0.16. This can affect users who have already + downloaded a dataset with scikit-learn 0.16 and are loading it + with scikit-learn 0.17. See :issue:`6196` for + how this affected :func:`datasets.fetch_20newsgroups`. By `Loic + Esteve`_. - - Fixed a bug that prevented using ROC AUC score to perform grid search on - several CPU / cores on large arrays. See :issue:`6147` - By `Olivier Grisel`_. +- Fixed a bug that prevented using ROC AUC score to perform grid search on + several CPU / cores on large arrays. See :issue:`6147` + By `Olivier Grisel`_. - - Fixed a bug that prevented to properly set the ``presort`` parameter - in :class:`ensemble.GradientBoostingRegressor`. See :issue:`5857` - By Andrew McCulloh. +- Fixed a bug that prevented to properly set the ``presort`` parameter + in :class:`ensemble.GradientBoostingRegressor`. See :issue:`5857` + By Andrew McCulloh. - - Fixed a joblib error when evaluating the perplexity of a - :class:`decomposition.LatentDirichletAllocation` model. See :issue:`6258` - By Chyi-Kwei Yau. +- Fixed a joblib error when evaluating the perplexity of a + :class:`decomposition.LatentDirichletAllocation` model. See :issue:`6258` + By Chyi-Kwei Yau. .. _changes_0_17: @@ -1698,425 +1703,425 @@ Changelog New features ............ - - All the Scaler classes but :class:`preprocessing.RobustScaler` can be fitted online by - calling `partial_fit`. By :user:`Giorgio Patrini `. - - - The new class :class:`ensemble.VotingClassifier` implements a - "majority rule" / "soft voting" ensemble classifier to combine - estimators for classification. By `Sebastian Raschka`_. - - - The new class :class:`preprocessing.RobustScaler` provides an - alternative to :class:`preprocessing.StandardScaler` for feature-wise - centering and range normalization that is robust to outliers. - By :user:`Thomas Unterthiner `. - - - The new class :class:`preprocessing.MaxAbsScaler` provides an - alternative to :class:`preprocessing.MinMaxScaler` for feature-wise - range normalization when the data is already centered or sparse. - By :user:`Thomas Unterthiner `. - - - The new class :class:`preprocessing.FunctionTransformer` turns a Python - function into a ``Pipeline``-compatible transformer object. - By Joe Jevnik. - - - The new classes :class:`cross_validation.LabelKFold` and - :class:`cross_validation.LabelShuffleSplit` generate train-test folds, - respectively similar to :class:`cross_validation.KFold` and - :class:`cross_validation.ShuffleSplit`, except that the folds are - conditioned on a label array. By `Brian McFee`_, :user:`Jean - Kossaifi ` and `Gilles Louppe`_. - - - :class:`decomposition.LatentDirichletAllocation` implements the Latent - Dirichlet Allocation topic model with online variational - inference. By :user:`Chyi-Kwei Yau `, with code based on an implementation - by Matt Hoffman. (:issue:`3659`) - - - The new solver ``sag`` implements a Stochastic Average Gradient descent - and is available in both :class:`linear_model.LogisticRegression` and - :class:`linear_model.Ridge`. This solver is very efficient for large - datasets. By :user:`Danny Sullivan ` and `Tom Dupre la Tour`_. - (:issue:`4738`) - - - The new solver ``cd`` implements a Coordinate Descent in - :class:`decomposition.NMF`. Previous solver based on Projected Gradient is - still available setting new parameter ``solver`` to ``pg``, but is - deprecated and will be removed in 0.19, along with - :class:`decomposition.ProjectedGradientNMF` and parameters ``sparseness``, - ``eta``, ``beta`` and ``nls_max_iter``. New parameters ``alpha`` and - ``l1_ratio`` control L1 and L2 regularization, and ``shuffle`` adds a - shuffling step in the ``cd`` solver. - By `Tom Dupre la Tour`_ and `Mathieu Blondel`_. +- All the Scaler classes but :class:`preprocessing.RobustScaler` can be fitted online by + calling `partial_fit`. By :user:`Giorgio Patrini `. + +- The new class :class:`ensemble.VotingClassifier` implements a + "majority rule" / "soft voting" ensemble classifier to combine + estimators for classification. By `Sebastian Raschka`_. + +- The new class :class:`preprocessing.RobustScaler` provides an + alternative to :class:`preprocessing.StandardScaler` for feature-wise + centering and range normalization that is robust to outliers. + By :user:`Thomas Unterthiner `. + +- The new class :class:`preprocessing.MaxAbsScaler` provides an + alternative to :class:`preprocessing.MinMaxScaler` for feature-wise + range normalization when the data is already centered or sparse. + By :user:`Thomas Unterthiner `. + +- The new class :class:`preprocessing.FunctionTransformer` turns a Python + function into a ``Pipeline``-compatible transformer object. + By Joe Jevnik. + +- The new classes :class:`cross_validation.LabelKFold` and + :class:`cross_validation.LabelShuffleSplit` generate train-test folds, + respectively similar to :class:`cross_validation.KFold` and + :class:`cross_validation.ShuffleSplit`, except that the folds are + conditioned on a label array. By `Brian McFee`_, :user:`Jean + Kossaifi ` and `Gilles Louppe`_. + +- :class:`decomposition.LatentDirichletAllocation` implements the Latent + Dirichlet Allocation topic model with online variational + inference. By :user:`Chyi-Kwei Yau `, with code based on an implementation + by Matt Hoffman. (:issue:`3659`) + +- The new solver ``sag`` implements a Stochastic Average Gradient descent + and is available in both :class:`linear_model.LogisticRegression` and + :class:`linear_model.Ridge`. This solver is very efficient for large + datasets. By :user:`Danny Sullivan ` and `Tom Dupre la Tour`_. + (:issue:`4738`) + +- The new solver ``cd`` implements a Coordinate Descent in + :class:`decomposition.NMF`. Previous solver based on Projected Gradient is + still available setting new parameter ``solver`` to ``pg``, but is + deprecated and will be removed in 0.19, along with + :class:`decomposition.ProjectedGradientNMF` and parameters ``sparseness``, + ``eta``, ``beta`` and ``nls_max_iter``. New parameters ``alpha`` and + ``l1_ratio`` control L1 and L2 regularization, and ``shuffle`` adds a + shuffling step in the ``cd`` solver. + By `Tom Dupre la Tour`_ and `Mathieu Blondel`_. Enhancements ............ - - :class:`manifold.TSNE` now supports approximate optimization via the - Barnes-Hut method, leading to much faster fitting. By Christopher Erick Moody. - (:issue:`4025`) +- :class:`manifold.TSNE` now supports approximate optimization via the + Barnes-Hut method, leading to much faster fitting. By Christopher Erick Moody. + (:issue:`4025`) - - :class:`cluster.mean_shift_.MeanShift` now supports parallel execution, - as implemented in the ``mean_shift`` function. By :user:`Martino - Sorbaro `. +- :class:`cluster.mean_shift_.MeanShift` now supports parallel execution, + as implemented in the ``mean_shift`` function. By :user:`Martino + Sorbaro `. - - :class:`naive_bayes.GaussianNB` now supports fitting with ``sample_weight``. - By `Jan Hendrik Metzen`_. +- :class:`naive_bayes.GaussianNB` now supports fitting with ``sample_weight``. + By `Jan Hendrik Metzen`_. - - :class:`dummy.DummyClassifier` now supports a prior fitting strategy. - By `Arnaud Joly`_. +- :class:`dummy.DummyClassifier` now supports a prior fitting strategy. + By `Arnaud Joly`_. - - Added a ``fit_predict`` method for :class:`mixture.GMM` and subclasses. - By :user:`Cory Lorenz `. +- Added a ``fit_predict`` method for :class:`mixture.GMM` and subclasses. + By :user:`Cory Lorenz `. - - Added the :func:`metrics.label_ranking_loss` metric. - By `Arnaud Joly`_. +- Added the :func:`metrics.label_ranking_loss` metric. + By `Arnaud Joly`_. - - Added the :func:`metrics.cohen_kappa_score` metric. +- Added the :func:`metrics.cohen_kappa_score` metric. - - Added a ``warm_start`` constructor parameter to the bagging ensemble - models to increase the size of the ensemble. By :user:`Tim Head `. +- Added a ``warm_start`` constructor parameter to the bagging ensemble + models to increase the size of the ensemble. By :user:`Tim Head `. - - Added option to use multi-output regression metrics without averaging. - By Konstantin Shmelkov and :user:`Michael Eickenberg`. +- Added option to use multi-output regression metrics without averaging. + By Konstantin Shmelkov and :user:`Michael Eickenberg`. - - Added ``stratify`` option to :func:`cross_validation.train_test_split` - for stratified splitting. By Miroslav Batchkarov. +- Added ``stratify`` option to :func:`cross_validation.train_test_split` + for stratified splitting. By Miroslav Batchkarov. - - The :func:`tree.export_graphviz` function now supports aesthetic - improvements for :class:`tree.DecisionTreeClassifier` and - :class:`tree.DecisionTreeRegressor`, including options for coloring nodes - by their majority class or impurity, showing variable names, and using - node proportions instead of raw sample counts. By `Trevor Stephens`_. +- The :func:`tree.export_graphviz` function now supports aesthetic + improvements for :class:`tree.DecisionTreeClassifier` and + :class:`tree.DecisionTreeRegressor`, including options for coloring nodes + by their majority class or impurity, showing variable names, and using + node proportions instead of raw sample counts. By `Trevor Stephens`_. - - Improved speed of ``newton-cg`` solver in - :class:`linear_model.LogisticRegression`, by avoiding loss computation. - By `Mathieu Blondel`_ and `Tom Dupre la Tour`_. +- Improved speed of ``newton-cg`` solver in + :class:`linear_model.LogisticRegression`, by avoiding loss computation. + By `Mathieu Blondel`_ and `Tom Dupre la Tour`_. - - The ``class_weight="auto"`` heuristic in classifiers supporting - ``class_weight`` was deprecated and replaced by the ``class_weight="balanced"`` - option, which has a simpler formula and interpretation. - By `Hanna Wallach`_ and `Andreas Müller`_. +- The ``class_weight="auto"`` heuristic in classifiers supporting + ``class_weight`` was deprecated and replaced by the ``class_weight="balanced"`` + option, which has a simpler formula and interpretation. + By `Hanna Wallach`_ and `Andreas Müller`_. - - Add ``class_weight`` parameter to automatically weight samples by class - frequency for :class:`linear_model.PassiveAgressiveClassifier`. By - `Trevor Stephens`_. +- Add ``class_weight`` parameter to automatically weight samples by class + frequency for :class:`linear_model.PassiveAgressiveClassifier`. By + `Trevor Stephens`_. - - Added backlinks from the API reference pages to the user guide. By - `Andreas Müller`_. +- Added backlinks from the API reference pages to the user guide. By + `Andreas Müller`_. - - The ``labels`` parameter to :func:`sklearn.metrics.f1_score`, - :func:`sklearn.metrics.fbeta_score`, - :func:`sklearn.metrics.recall_score` and - :func:`sklearn.metrics.precision_score` has been extended. - It is now possible to ignore one or more labels, such as where - a multiclass problem has a majority class to ignore. By `Joel Nothman`_. +- The ``labels`` parameter to :func:`sklearn.metrics.f1_score`, + :func:`sklearn.metrics.fbeta_score`, + :func:`sklearn.metrics.recall_score` and + :func:`sklearn.metrics.precision_score` has been extended. + It is now possible to ignore one or more labels, such as where + a multiclass problem has a majority class to ignore. By `Joel Nothman`_. - - Add ``sample_weight`` support to :class:`linear_model.RidgeClassifier`. - By `Trevor Stephens`_. +- Add ``sample_weight`` support to :class:`linear_model.RidgeClassifier`. + By `Trevor Stephens`_. - - Provide an option for sparse output from - :func:`sklearn.metrics.pairwise.cosine_similarity`. By - :user:`Jaidev Deshpande `. +- Provide an option for sparse output from + :func:`sklearn.metrics.pairwise.cosine_similarity`. By + :user:`Jaidev Deshpande `. - - Add :func:`minmax_scale` to provide a function interface for - :class:`MinMaxScaler`. By :user:`Thomas Unterthiner `. +- Add :func:`minmax_scale` to provide a function interface for + :class:`MinMaxScaler`. By :user:`Thomas Unterthiner `. - - ``dump_svmlight_file`` now handles multi-label datasets. - By Chih-Wei Chang. +- ``dump_svmlight_file`` now handles multi-label datasets. + By Chih-Wei Chang. - - RCV1 dataset loader (:func:`sklearn.datasets.fetch_rcv1`). - By `Tom Dupre la Tour`_. +- RCV1 dataset loader (:func:`sklearn.datasets.fetch_rcv1`). + By `Tom Dupre la Tour`_. - - The "Wisconsin Breast Cancer" classical two-class classification dataset - is now included in scikit-learn, available with - :func:`sklearn.dataset.load_breast_cancer`. +- The "Wisconsin Breast Cancer" classical two-class classification dataset + is now included in scikit-learn, available with + :func:`sklearn.dataset.load_breast_cancer`. - - Upgraded to joblib 0.9.3 to benefit from the new automatic batching of - short tasks. This makes it possible for scikit-learn to benefit from - parallelism when many very short tasks are executed in parallel, for - instance by the :class:`grid_search.GridSearchCV` meta-estimator - with ``n_jobs > 1`` used with a large grid of parameters on a small - dataset. By `Vlad Niculae`_, `Olivier Grisel`_ and `Loic Esteve`_. +- Upgraded to joblib 0.9.3 to benefit from the new automatic batching of + short tasks. This makes it possible for scikit-learn to benefit from + parallelism when many very short tasks are executed in parallel, for + instance by the :class:`grid_search.GridSearchCV` meta-estimator + with ``n_jobs > 1`` used with a large grid of parameters on a small + dataset. By `Vlad Niculae`_, `Olivier Grisel`_ and `Loic Esteve`_. - - For more details about changes in joblib 0.9.3 see the release notes: - https://github.com/joblib/joblib/blob/master/CHANGES.rst#release-093 +- For more details about changes in joblib 0.9.3 see the release notes: + https://github.com/joblib/joblib/blob/master/CHANGES.rst#release-093 - - Improved speed (3 times per iteration) of - :class:`decomposition.DictLearning` with coordinate descent method - from :class:`linear_model.Lasso`. By :user:`Arthur Mensch `. +- Improved speed (3 times per iteration) of + :class:`decomposition.DictLearning` with coordinate descent method + from :class:`linear_model.Lasso`. By :user:`Arthur Mensch `. - - Parallel processing (threaded) for queries of nearest neighbors - (using the ball-tree) by Nikolay Mayorov. +- Parallel processing (threaded) for queries of nearest neighbors + (using the ball-tree) by Nikolay Mayorov. - - Allow :func:`datasets.make_multilabel_classification` to output - a sparse ``y``. By Kashif Rasul. +- Allow :func:`datasets.make_multilabel_classification` to output + a sparse ``y``. By Kashif Rasul. - - :class:`cluster.DBSCAN` now accepts a sparse matrix of precomputed - distances, allowing memory-efficient distance precomputation. By - `Joel Nothman`_. +- :class:`cluster.DBSCAN` now accepts a sparse matrix of precomputed + distances, allowing memory-efficient distance precomputation. By + `Joel Nothman`_. - - :class:`tree.DecisionTreeClassifier` now exposes an ``apply`` method - for retrieving the leaf indices samples are predicted as. By - :user:`Daniel Galvez ` and `Gilles Louppe`_. +- :class:`tree.DecisionTreeClassifier` now exposes an ``apply`` method + for retrieving the leaf indices samples are predicted as. By + :user:`Daniel Galvez ` and `Gilles Louppe`_. - - Speed up decision tree regressors, random forest regressors, extra trees - regressors and gradient boosting estimators by computing a proxy - of the impurity improvement during the tree growth. The proxy quantity is - such that the split that maximizes this value also maximizes the impurity - improvement. By `Arnaud Joly`_, :user:`Jacob Schreiber ` - and `Gilles Louppe`_. +- Speed up decision tree regressors, random forest regressors, extra trees + regressors and gradient boosting estimators by computing a proxy + of the impurity improvement during the tree growth. The proxy quantity is + such that the split that maximizes this value also maximizes the impurity + improvement. By `Arnaud Joly`_, :user:`Jacob Schreiber ` + and `Gilles Louppe`_. - - Speed up tree based methods by reducing the number of computations needed - when computing the impurity measure taking into account linear - relationship of the computed statistics. The effect is particularly - visible with extra trees and on datasets with categorical or sparse - features. By `Arnaud Joly`_. +- Speed up tree based methods by reducing the number of computations needed + when computing the impurity measure taking into account linear + relationship of the computed statistics. The effect is particularly + visible with extra trees and on datasets with categorical or sparse + features. By `Arnaud Joly`_. - - :class:`ensemble.GradientBoostingRegressor` and - :class:`ensemble.GradientBoostingClassifier` now expose an ``apply`` - method for retrieving the leaf indices each sample ends up in under - each try. By :user:`Jacob Schreiber `. +- :class:`ensemble.GradientBoostingRegressor` and + :class:`ensemble.GradientBoostingClassifier` now expose an ``apply`` + method for retrieving the leaf indices each sample ends up in under + each try. By :user:`Jacob Schreiber `. - - Add ``sample_weight`` support to :class:`linear_model.LinearRegression`. - By Sonny Hu. (:issue:`#4881`) +- Add ``sample_weight`` support to :class:`linear_model.LinearRegression`. + By Sonny Hu. (:issue:`#4881`) - - Add ``n_iter_without_progress`` to :class:`manifold.TSNE` to control - the stopping criterion. By Santi Villalba. (:issue:`5186`) +- Add ``n_iter_without_progress`` to :class:`manifold.TSNE` to control + the stopping criterion. By Santi Villalba. (:issue:`5186`) - - Added optional parameter ``random_state`` in :class:`linear_model.Ridge` - , to set the seed of the pseudo random generator used in ``sag`` solver. By `Tom Dupre la Tour`_. +- Added optional parameter ``random_state`` in :class:`linear_model.Ridge` + , to set the seed of the pseudo random generator used in ``sag`` solver. By `Tom Dupre la Tour`_. - - Added optional parameter ``warm_start`` in - :class:`linear_model.LogisticRegression`. If set to True, the solvers - ``lbfgs``, ``newton-cg`` and ``sag`` will be initialized with the - coefficients computed in the previous fit. By `Tom Dupre la Tour`_. +- Added optional parameter ``warm_start`` in + :class:`linear_model.LogisticRegression`. If set to True, the solvers + ``lbfgs``, ``newton-cg`` and ``sag`` will be initialized with the + coefficients computed in the previous fit. By `Tom Dupre la Tour`_. - - Added ``sample_weight`` support to :class:`linear_model.LogisticRegression` for - the ``lbfgs``, ``newton-cg``, and ``sag`` solvers. By `Valentin Stolbunov`_. - Support added to the ``liblinear`` solver. By `Manoj Kumar`_. +- Added ``sample_weight`` support to :class:`linear_model.LogisticRegression` for + the ``lbfgs``, ``newton-cg``, and ``sag`` solvers. By `Valentin Stolbunov`_. + Support added to the ``liblinear`` solver. By `Manoj Kumar`_. - - Added optional parameter ``presort`` to :class:`ensemble.GradientBoostingRegressor` - and :class:`ensemble.GradientBoostingClassifier`, keeping default behavior - the same. This allows gradient boosters to turn off presorting when building - deep trees or using sparse data. By :user:`Jacob Schreiber `. +- Added optional parameter ``presort`` to :class:`ensemble.GradientBoostingRegressor` + and :class:`ensemble.GradientBoostingClassifier`, keeping default behavior + the same. This allows gradient boosters to turn off presorting when building + deep trees or using sparse data. By :user:`Jacob Schreiber `. - - Altered :func:`metrics.roc_curve` to drop unnecessary thresholds by - default. By :user:`Graham Clenaghan `. +- Altered :func:`metrics.roc_curve` to drop unnecessary thresholds by + default. By :user:`Graham Clenaghan `. - - Added :class:`feature_selection.SelectFromModel` meta-transformer which can - be used along with estimators that have `coef_` or `feature_importances_` - attribute to select important features of the input data. By - :user:`Maheshakya Wijewardena `, `Joel Nothman`_ and `Manoj Kumar`_. +- Added :class:`feature_selection.SelectFromModel` meta-transformer which can + be used along with estimators that have `coef_` or `feature_importances_` + attribute to select important features of the input data. By + :user:`Maheshakya Wijewardena `, `Joel Nothman`_ and `Manoj Kumar`_. - - Added :func:`metrics.pairwise.laplacian_kernel`. By `Clyde Fare `_. +- Added :func:`metrics.pairwise.laplacian_kernel`. By `Clyde Fare `_. - - :class:`covariance.GraphLasso` allows separate control of the convergence criterion - for the Elastic-Net subproblem via the ``enet_tol`` parameter. +- :class:`covariance.GraphLasso` allows separate control of the convergence criterion + for the Elastic-Net subproblem via the ``enet_tol`` parameter. - - Improved verbosity in :class:`decomposition.DictionaryLearning`. +- Improved verbosity in :class:`decomposition.DictionaryLearning`. - - :class:`ensemble.RandomForestClassifier` and - :class:`ensemble.RandomForestRegressor` no longer explicitly store the - samples used in bagging, resulting in a much reduced memory footprint for - storing random forest models. +- :class:`ensemble.RandomForestClassifier` and + :class:`ensemble.RandomForestRegressor` no longer explicitly store the + samples used in bagging, resulting in a much reduced memory footprint for + storing random forest models. - - Added ``positive`` option to :class:`linear_model.Lars` and - :func:`linear_model.lars_path` to force coefficients to be positive. - (:issue:`5131`) +- Added ``positive`` option to :class:`linear_model.Lars` and + :func:`linear_model.lars_path` to force coefficients to be positive. + (:issue:`5131`) - - Added the ``X_norm_squared`` parameter to :func:`metrics.pairwise.euclidean_distances` - to provide precomputed squared norms for ``X``. +- Added the ``X_norm_squared`` parameter to :func:`metrics.pairwise.euclidean_distances` + to provide precomputed squared norms for ``X``. - - Added the ``fit_predict`` method to :class:`pipeline.Pipeline`. +- Added the ``fit_predict`` method to :class:`pipeline.Pipeline`. - - Added the :func:`preprocessing.min_max_scale` function. +- Added the :func:`preprocessing.min_max_scale` function. Bug fixes ......... - - Fixed non-determinism in :class:`dummy.DummyClassifier` with sparse - multi-label output. By `Andreas Müller`_. +- Fixed non-determinism in :class:`dummy.DummyClassifier` with sparse + multi-label output. By `Andreas Müller`_. - - Fixed the output shape of :class:`linear_model.RANSACRegressor` to - ``(n_samples, )``. By `Andreas Müller`_. +- Fixed the output shape of :class:`linear_model.RANSACRegressor` to + ``(n_samples, )``. By `Andreas Müller`_. - - Fixed bug in :class:`decomposition.DictLearning` when ``n_jobs < 0``. By - `Andreas Müller`_. +- Fixed bug in :class:`decomposition.DictLearning` when ``n_jobs < 0``. By + `Andreas Müller`_. - - Fixed bug where :class:`grid_search.RandomizedSearchCV` could consume a - lot of memory for large discrete grids. By `Joel Nothman`_. +- Fixed bug where :class:`grid_search.RandomizedSearchCV` could consume a + lot of memory for large discrete grids. By `Joel Nothman`_. - - Fixed bug in :class:`linear_model.LogisticRegressionCV` where `penalty` was ignored - in the final fit. By `Manoj Kumar`_. +- Fixed bug in :class:`linear_model.LogisticRegressionCV` where `penalty` was ignored + in the final fit. By `Manoj Kumar`_. - - Fixed bug in :class:`ensemble.forest.ForestClassifier` while computing - oob_score and X is a sparse.csc_matrix. By :user:`Ankur Ankan `. +- Fixed bug in :class:`ensemble.forest.ForestClassifier` while computing + oob_score and X is a sparse.csc_matrix. By :user:`Ankur Ankan `. - - All regressors now consistently handle and warn when given ``y`` that is of - shape ``(n_samples, 1)``. By `Andreas Müller`_ and Henry Lin. - (:issue:`5431`) +- All regressors now consistently handle and warn when given ``y`` that is of + shape ``(n_samples, 1)``. By `Andreas Müller`_ and Henry Lin. + (:issue:`5431`) - - Fix in :class:`cluster.KMeans` cluster reassignment for sparse input by - `Lars Buitinck`_. +- Fix in :class:`cluster.KMeans` cluster reassignment for sparse input by + `Lars Buitinck`_. - - Fixed a bug in :class:`lda.LDA` that could cause asymmetric covariance - matrices when using shrinkage. By `Martin Billinger`_. +- Fixed a bug in :class:`lda.LDA` that could cause asymmetric covariance + matrices when using shrinkage. By `Martin Billinger`_. - - Fixed :func:`cross_validation.cross_val_predict` for estimators with - sparse predictions. By Buddha Prakash. +- Fixed :func:`cross_validation.cross_val_predict` for estimators with + sparse predictions. By Buddha Prakash. - - Fixed the ``predict_proba`` method of :class:`linear_model.LogisticRegression` - to use soft-max instead of one-vs-rest normalization. By `Manoj Kumar`_. - (:issue:`5182`) +- Fixed the ``predict_proba`` method of :class:`linear_model.LogisticRegression` + to use soft-max instead of one-vs-rest normalization. By `Manoj Kumar`_. + (:issue:`5182`) - - Fixed the :func:`partial_fit` method of :class:`linear_model.SGDClassifier` - when called with ``average=True``. By :user:`Andrew Lamb `. - (:issue:`5282`) +- Fixed the :func:`partial_fit` method of :class:`linear_model.SGDClassifier` + when called with ``average=True``. By :user:`Andrew Lamb `. + (:issue:`5282`) - - Dataset fetchers use different filenames under Python 2 and Python 3 to - avoid pickling compatibility issues. By `Olivier Grisel`_. - (:issue:`5355`) +- Dataset fetchers use different filenames under Python 2 and Python 3 to + avoid pickling compatibility issues. By `Olivier Grisel`_. + (:issue:`5355`) - - Fixed a bug in :class:`naive_bayes.GaussianNB` which caused classification - results to depend on scale. By `Jake Vanderplas`_. +- Fixed a bug in :class:`naive_bayes.GaussianNB` which caused classification + results to depend on scale. By `Jake Vanderplas`_. - - Fixed temporarily :class:`linear_model.Ridge`, which was incorrect - when fitting the intercept in the case of sparse data. The fix - automatically changes the solver to 'sag' in this case. - :issue:`5360` by `Tom Dupre la Tour`_. +- Fixed temporarily :class:`linear_model.Ridge`, which was incorrect + when fitting the intercept in the case of sparse data. The fix + automatically changes the solver to 'sag' in this case. + :issue:`5360` by `Tom Dupre la Tour`_. - - Fixed a performance bug in :class:`decomposition.RandomizedPCA` on data - with a large number of features and fewer samples. (:issue:`4478`) - By `Andreas Müller`_, `Loic Esteve`_ and :user:`Giorgio Patrini `. +- Fixed a performance bug in :class:`decomposition.RandomizedPCA` on data + with a large number of features and fewer samples. (:issue:`4478`) + By `Andreas Müller`_, `Loic Esteve`_ and :user:`Giorgio Patrini `. - - Fixed bug in :class:`cross_decomposition.PLS` that yielded unstable and - platform dependent output, and failed on `fit_transform`. - By :user:`Arthur Mensch `. +- Fixed bug in :class:`cross_decomposition.PLS` that yielded unstable and + platform dependent output, and failed on `fit_transform`. + By :user:`Arthur Mensch `. - - Fixes to the ``Bunch`` class used to store datasets. +- Fixes to the ``Bunch`` class used to store datasets. - - Fixed :func:`ensemble.plot_partial_dependence` ignoring the - ``percentiles`` parameter. +- Fixed :func:`ensemble.plot_partial_dependence` ignoring the + ``percentiles`` parameter. - - Providing a ``set`` as vocabulary in ``CountVectorizer`` no longer - leads to inconsistent results when pickling. +- Providing a ``set`` as vocabulary in ``CountVectorizer`` no longer + leads to inconsistent results when pickling. - - Fixed the conditions on when a precomputed Gram matrix needs to - be recomputed in :class:`linear_model.LinearRegression`, - :class:`linear_model.OrthogonalMatchingPursuit`, - :class:`linear_model.Lasso` and :class:`linear_model.ElasticNet`. +- Fixed the conditions on when a precomputed Gram matrix needs to + be recomputed in :class:`linear_model.LinearRegression`, + :class:`linear_model.OrthogonalMatchingPursuit`, + :class:`linear_model.Lasso` and :class:`linear_model.ElasticNet`. - - Fixed inconsistent memory layout in the coordinate descent solver - that affected :class:`linear_model.DictionaryLearning` and - :class:`covariance.GraphLasso`. (:issue:`5337`) - By `Olivier Grisel`_. +- Fixed inconsistent memory layout in the coordinate descent solver + that affected :class:`linear_model.DictionaryLearning` and + :class:`covariance.GraphLasso`. (:issue:`5337`) + By `Olivier Grisel`_. - - :class:`manifold.LocallyLinearEmbedding` no longer ignores the ``reg`` - parameter. +- :class:`manifold.LocallyLinearEmbedding` no longer ignores the ``reg`` + parameter. - - Nearest Neighbor estimators with custom distance metrics can now be pickled. - (:issue:`4362`) +- Nearest Neighbor estimators with custom distance metrics can now be pickled. + (:issue:`4362`) - - Fixed a bug in :class:`pipeline.FeatureUnion` where ``transformer_weights`` - were not properly handled when performing grid-searches. +- Fixed a bug in :class:`pipeline.FeatureUnion` where ``transformer_weights`` + were not properly handled when performing grid-searches. - - Fixed a bug in :class:`linear_model.LogisticRegression` and - :class:`linear_model.LogisticRegressionCV` when using - ``class_weight='balanced'```or ``class_weight='auto'``. - By `Tom Dupre la Tour`_. +- Fixed a bug in :class:`linear_model.LogisticRegression` and + :class:`linear_model.LogisticRegressionCV` when using + ``class_weight='balanced'```or ``class_weight='auto'``. + By `Tom Dupre la Tour`_. - - Fixed bug :issue:`5495` when - doing OVR(SVC(decision_function_shape="ovr")). Fixed by - :user:`Elvis Dohmatob `. +- Fixed bug :issue:`5495` when + doing OVR(SVC(decision_function_shape="ovr")). Fixed by + :user:`Elvis Dohmatob `. API changes summary ------------------- - - Attribute `data_min`, `data_max` and `data_range` in - :class:`preprocessing.MinMaxScaler` are deprecated and won't be available - from 0.19. Instead, the class now exposes `data_min_`, `data_max_` - and `data_range_`. By :user:`Giorgio Patrini `. +- Attribute `data_min`, `data_max` and `data_range` in + :class:`preprocessing.MinMaxScaler` are deprecated and won't be available + from 0.19. Instead, the class now exposes `data_min_`, `data_max_` + and `data_range_`. By :user:`Giorgio Patrini `. - - All Scaler classes now have an `scale_` attribute, the feature-wise - rescaling applied by their `transform` methods. The old attribute `std_` - in :class:`preprocessing.StandardScaler` is deprecated and superseded - by `scale_`; it won't be available in 0.19. By :user:`Giorgio Patrini `. +- All Scaler classes now have an `scale_` attribute, the feature-wise + rescaling applied by their `transform` methods. The old attribute `std_` + in :class:`preprocessing.StandardScaler` is deprecated and superseded + by `scale_`; it won't be available in 0.19. By :user:`Giorgio Patrini `. - - :class:`svm.SVC`` and :class:`svm.NuSVC` now have an ``decision_function_shape`` - parameter to make their decision function of shape ``(n_samples, n_classes)`` - by setting ``decision_function_shape='ovr'``. This will be the default behavior - starting in 0.19. By `Andreas Müller`_. +- :class:`svm.SVC`` and :class:`svm.NuSVC` now have an ``decision_function_shape`` + parameter to make their decision function of shape ``(n_samples, n_classes)`` + by setting ``decision_function_shape='ovr'``. This will be the default behavior + starting in 0.19. By `Andreas Müller`_. - - Passing 1D data arrays as input to estimators is now deprecated as it - caused confusion in how the array elements should be interpreted - as features or as samples. All data arrays are now expected - to be explicitly shaped ``(n_samples, n_features)``. - By :user:`Vighnesh Birodkar `. +- Passing 1D data arrays as input to estimators is now deprecated as it + caused confusion in how the array elements should be interpreted + as features or as samples. All data arrays are now expected + to be explicitly shaped ``(n_samples, n_features)``. + By :user:`Vighnesh Birodkar `. - - :class:`lda.LDA` and :class:`qda.QDA` have been moved to - :class:`discriminant_analysis.LinearDiscriminantAnalysis` and - :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`. +- :class:`lda.LDA` and :class:`qda.QDA` have been moved to + :class:`discriminant_analysis.LinearDiscriminantAnalysis` and + :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`. - - The ``store_covariance`` and ``tol`` parameters have been moved from - the fit method to the constructor in - :class:`discriminant_analysis.LinearDiscriminantAnalysis` and the - ``store_covariances`` and ``tol`` parameters have been moved from the - fit method to the constructor in - :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`. +- The ``store_covariance`` and ``tol`` parameters have been moved from + the fit method to the constructor in + :class:`discriminant_analysis.LinearDiscriminantAnalysis` and the + ``store_covariances`` and ``tol`` parameters have been moved from the + fit method to the constructor in + :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`. - - Models inheriting from ``_LearntSelectorMixin`` will no longer support the - transform methods. (i.e, RandomForests, GradientBoosting, LogisticRegression, - DecisionTrees, SVMs and SGD related models). Wrap these models around the - metatransfomer :class:`feature_selection.SelectFromModel` to remove - features (according to `coefs_` or `feature_importances_`) - which are below a certain threshold value instead. +- Models inheriting from ``_LearntSelectorMixin`` will no longer support the + transform methods. (i.e, RandomForests, GradientBoosting, LogisticRegression, + DecisionTrees, SVMs and SGD related models). Wrap these models around the + metatransfomer :class:`feature_selection.SelectFromModel` to remove + features (according to `coefs_` or `feature_importances_`) + which are below a certain threshold value instead. - - :class:`cluster.KMeans` re-runs cluster-assignments in case of non-convergence, - to ensure consistency of ``predict(X)`` and ``labels_``. By - :user:`Vighnesh Birodkar `. +- :class:`cluster.KMeans` re-runs cluster-assignments in case of non-convergence, + to ensure consistency of ``predict(X)`` and ``labels_``. By + :user:`Vighnesh Birodkar `. - - Classifier and Regressor models are now tagged as such using the - ``_estimator_type`` attribute. +- Classifier and Regressor models are now tagged as such using the + ``_estimator_type`` attribute. - - Cross-validation iterators always provide indices into training and test set, - not boolean masks. +- Cross-validation iterators always provide indices into training and test set, + not boolean masks. - - The ``decision_function`` on all regressors was deprecated and will be - removed in 0.19. Use ``predict`` instead. +- The ``decision_function`` on all regressors was deprecated and will be + removed in 0.19. Use ``predict`` instead. - - :func:`datasets.load_lfw_pairs` is deprecated and will be removed in 0.19. - Use :func:`datasets.fetch_lfw_pairs` instead. +- :func:`datasets.load_lfw_pairs` is deprecated and will be removed in 0.19. + Use :func:`datasets.fetch_lfw_pairs` instead. - - The deprecated ``hmm`` module was removed. +- The deprecated ``hmm`` module was removed. - - The deprecated ``Bootstrap`` cross-validation iterator was removed. +- The deprecated ``Bootstrap`` cross-validation iterator was removed. - - The deprecated ``Ward`` and ``WardAgglomerative`` classes have been removed. - Use :class:`clustering.AgglomerativeClustering` instead. +- The deprecated ``Ward`` and ``WardAgglomerative`` classes have been removed. + Use :class:`clustering.AgglomerativeClustering` instead. - - :func:`cross_validation.check_cv` is now a public function. +- :func:`cross_validation.check_cv` is now a public function. - - The property ``residues_`` of :class:`linear_model.LinearRegression` is deprecated - and will be removed in 0.19. +- The property ``residues_`` of :class:`linear_model.LinearRegression` is deprecated + and will be removed in 0.19. - - The deprecated ``n_jobs`` parameter of :class:`linear_model.LinearRegression` has been moved - to the constructor. +- The deprecated ``n_jobs`` parameter of :class:`linear_model.LinearRegression` has been moved + to the constructor. - - Removed deprecated ``class_weight`` parameter from :class:`linear_model.SGDClassifier`'s ``fit`` - method. Use the construction parameter instead. +- Removed deprecated ``class_weight`` parameter from :class:`linear_model.SGDClassifier`'s ``fit`` + method. Use the construction parameter instead. - - The deprecated support for the sequence of sequences (or list of lists) multilabel - format was removed. To convert to and from the supported binary - indicator matrix format, use - :class:`MultiLabelBinarizer `. +- The deprecated support for the sequence of sequences (or list of lists) multilabel + format was removed. To convert to and from the supported binary + indicator matrix format, use + :class:`MultiLabelBinarizer `. - - The behavior of calling the ``inverse_transform`` method of ``Pipeline.pipeline`` will - change in 0.19. It will no longer reshape one-dimensional input to two-dimensional input. +- The behavior of calling the ``inverse_transform`` method of ``Pipeline.pipeline`` will + change in 0.19. It will no longer reshape one-dimensional input to two-dimensional input. - - The deprecated attributes ``indicator_matrix_``, ``multilabel_`` and ``classes_`` of - :class:`preprocessing.LabelBinarizer` were removed. +- The deprecated attributes ``indicator_matrix_``, ``multilabel_`` and ``classes_`` of + :class:`preprocessing.LabelBinarizer` were removed. - - Using ``gamma=0`` in :class:`svm.SVC` and :class:`svm.SVR` to automatically set the - gamma to ``1. / n_features`` is deprecated and will be removed in 0.19. - Use ``gamma="auto"`` instead. +- Using ``gamma=0`` in :class:`svm.SVC` and :class:`svm.SVR` to automatically set the + gamma to ``1. / n_features`` is deprecated and will be removed in 0.19. + Use ``gamma="auto"`` instead. Code Contributors ----------------- @@ -2166,26 +2171,26 @@ Changelog Bug fixes ......... - - Allow input data larger than ``block_size`` in - :class:`covariance.LedoitWolf` by `Andreas Müller`_. +- Allow input data larger than ``block_size`` in + :class:`covariance.LedoitWolf` by `Andreas Müller`_. - - Fix a bug in :class:`isotonic.IsotonicRegression` deduplication that - caused unstable result in :class:`calibration.CalibratedClassifierCV` by - `Jan Hendrik Metzen`_. +- Fix a bug in :class:`isotonic.IsotonicRegression` deduplication that + caused unstable result in :class:`calibration.CalibratedClassifierCV` by + `Jan Hendrik Metzen`_. - - Fix sorting of labels in func:`preprocessing.label_binarize` by Michael Heilman. +- Fix sorting of labels in func:`preprocessing.label_binarize` by Michael Heilman. - - Fix several stability and convergence issues in - :class:`cross_decomposition.CCA` and - :class:`cross_decomposition.PLSCanonical` by `Andreas Müller`_ +- Fix several stability and convergence issues in + :class:`cross_decomposition.CCA` and + :class:`cross_decomposition.PLSCanonical` by `Andreas Müller`_ - - Fix a bug in :class:`cluster.KMeans` when ``precompute_distances=False`` - on fortran-ordered data. +- Fix a bug in :class:`cluster.KMeans` when ``precompute_distances=False`` + on fortran-ordered data. - - Fix a speed regression in :class:`ensemble.RandomForestClassifier`'s ``predict`` - and ``predict_proba`` by `Andreas Müller`_. +- Fix a speed regression in :class:`ensemble.RandomForestClassifier`'s ``predict`` + and ``predict_proba`` by `Andreas Müller`_. - - Fix a regression where ``utils.shuffle`` converted lists and dataframes to arrays, by `Olivier Grisel`_ +- Fix a regression where ``utils.shuffle`` converted lists and dataframes to arrays, by `Olivier Grisel`_ .. _changes_0_16: @@ -2197,25 +2202,25 @@ Version 0.16 Highlights ----------- - - Speed improvements (notably in :class:`cluster.DBSCAN`), reduced memory - requirements, bug-fixes and better default settings. +- Speed improvements (notably in :class:`cluster.DBSCAN`), reduced memory + requirements, bug-fixes and better default settings. - - Multinomial Logistic regression and a path algorithm in - :class:`linear_model.LogisticRegressionCV`. +- Multinomial Logistic regression and a path algorithm in + :class:`linear_model.LogisticRegressionCV`. - - Out-of core learning of PCA via :class:`decomposition.IncrementalPCA`. +- Out-of core learning of PCA via :class:`decomposition.IncrementalPCA`. - - Probability callibration of classifiers using - :class:`calibration.CalibratedClassifierCV`. +- Probability callibration of classifiers using + :class:`calibration.CalibratedClassifierCV`. - - :class:`cluster.Birch` clustering method for large-scale datasets. +- :class:`cluster.Birch` clustering method for large-scale datasets. - - Scalable approximate nearest neighbors search with Locality-sensitive - hashing forests in :class:`neighbors.LSHForest`. +- Scalable approximate nearest neighbors search with Locality-sensitive + hashing forests in :class:`neighbors.LSHForest`. - - Improved error messages and better validation when using malformed input data. +- Improved error messages and better validation when using malformed input data. - - More robust integration with pandas dataframes. +- More robust integration with pandas dataframes. Changelog --------- @@ -2223,438 +2228,438 @@ Changelog New features ............ - - The new :class:`neighbors.LSHForest` implements locality-sensitive hashing - for approximate nearest neighbors search. By :user:`Maheshakya Wijewardena`. +- The new :class:`neighbors.LSHForest` implements locality-sensitive hashing + for approximate nearest neighbors search. By :user:`Maheshakya Wijewardena`. - - Added :class:`svm.LinearSVR`. This class uses the liblinear implementation - of Support Vector Regression which is much faster for large - sample sizes than :class:`svm.SVR` with linear kernel. By - `Fabian Pedregosa`_ and Qiang Luo. +- Added :class:`svm.LinearSVR`. This class uses the liblinear implementation + of Support Vector Regression which is much faster for large + sample sizes than :class:`svm.SVR` with linear kernel. By + `Fabian Pedregosa`_ and Qiang Luo. - - Incremental fit for :class:`GaussianNB `. +- Incremental fit for :class:`GaussianNB `. - - Added ``sample_weight`` support to :class:`dummy.DummyClassifier` and - :class:`dummy.DummyRegressor`. By `Arnaud Joly`_. +- Added ``sample_weight`` support to :class:`dummy.DummyClassifier` and + :class:`dummy.DummyRegressor`. By `Arnaud Joly`_. - - Added the :func:`metrics.label_ranking_average_precision_score` metrics. - By `Arnaud Joly`_. +- Added the :func:`metrics.label_ranking_average_precision_score` metrics. + By `Arnaud Joly`_. - - Add the :func:`metrics.coverage_error` metrics. By `Arnaud Joly`_. +- Add the :func:`metrics.coverage_error` metrics. By `Arnaud Joly`_. - - Added :class:`linear_model.LogisticRegressionCV`. By - `Manoj Kumar`_, `Fabian Pedregosa`_, `Gael Varoquaux`_ - and `Alexandre Gramfort`_. +- Added :class:`linear_model.LogisticRegressionCV`. By + `Manoj Kumar`_, `Fabian Pedregosa`_, `Gael Varoquaux`_ + and `Alexandre Gramfort`_. - - Added ``warm_start`` constructor parameter to make it possible for any - trained forest model to grow additional trees incrementally. By - :user:`Laurent Direr`. +- Added ``warm_start`` constructor parameter to make it possible for any + trained forest model to grow additional trees incrementally. By + :user:`Laurent Direr`. - - Added ``sample_weight`` support to :class:`ensemble.GradientBoostingClassifier` and - :class:`ensemble.GradientBoostingRegressor`. By `Peter Prettenhofer`_. +- Added ``sample_weight`` support to :class:`ensemble.GradientBoostingClassifier` and + :class:`ensemble.GradientBoostingRegressor`. By `Peter Prettenhofer`_. - - Added :class:`decomposition.IncrementalPCA`, an implementation of the PCA - algorithm that supports out-of-core learning with a ``partial_fit`` - method. By `Kyle Kastner`_. +- Added :class:`decomposition.IncrementalPCA`, an implementation of the PCA + algorithm that supports out-of-core learning with a ``partial_fit`` + method. By `Kyle Kastner`_. - - Averaged SGD for :class:`SGDClassifier ` - and :class:`SGDRegressor ` By - :user:`Danny Sullivan `. +- Averaged SGD for :class:`SGDClassifier ` + and :class:`SGDRegressor ` By + :user:`Danny Sullivan `. - - Added :func:`cross_val_predict ` - function which computes cross-validated estimates. By `Luis Pedro Coelho`_ +- Added :func:`cross_val_predict ` + function which computes cross-validated estimates. By `Luis Pedro Coelho`_ - - Added :class:`linear_model.TheilSenRegressor`, a robust - generalized-median-based estimator. By :user:`Florian Wilhelm `. +- Added :class:`linear_model.TheilSenRegressor`, a robust + generalized-median-based estimator. By :user:`Florian Wilhelm `. - - Added :func:`metrics.median_absolute_error`, a robust metric. - By `Gael Varoquaux`_ and :user:`Florian Wilhelm `. +- Added :func:`metrics.median_absolute_error`, a robust metric. + By `Gael Varoquaux`_ and :user:`Florian Wilhelm `. - - Add :class:`cluster.Birch`, an online clustering algorithm. By - `Manoj Kumar`_, `Alexandre Gramfort`_ and `Joel Nothman`_. +- Add :class:`cluster.Birch`, an online clustering algorithm. By + `Manoj Kumar`_, `Alexandre Gramfort`_ and `Joel Nothman`_. - - Added shrinkage support to :class:`discriminant_analysis.LinearDiscriminantAnalysis` - using two new solvers. By :user:`Clemens Brunner ` and `Martin Billinger`_. +- Added shrinkage support to :class:`discriminant_analysis.LinearDiscriminantAnalysis` + using two new solvers. By :user:`Clemens Brunner ` and `Martin Billinger`_. - - Added :class:`kernel_ridge.KernelRidge`, an implementation of - kernelized ridge regression. - By `Mathieu Blondel`_ and `Jan Hendrik Metzen`_. +- Added :class:`kernel_ridge.KernelRidge`, an implementation of + kernelized ridge regression. + By `Mathieu Blondel`_ and `Jan Hendrik Metzen`_. - - All solvers in :class:`linear_model.Ridge` now support `sample_weight`. - By `Mathieu Blondel`_. +- All solvers in :class:`linear_model.Ridge` now support `sample_weight`. + By `Mathieu Blondel`_. - - Added :class:`cross_validation.PredefinedSplit` cross-validation - for fixed user-provided cross-validation folds. - By :user:`Thomas Unterthiner `. +- Added :class:`cross_validation.PredefinedSplit` cross-validation + for fixed user-provided cross-validation folds. + By :user:`Thomas Unterthiner `. - - Added :class:`calibration.CalibratedClassifierCV`, an approach for - calibrating the predicted probabilities of a classifier. - By `Alexandre Gramfort`_, `Jan Hendrik Metzen`_, `Mathieu Blondel`_ - and :user:`Balazs Kegl `. +- Added :class:`calibration.CalibratedClassifierCV`, an approach for + calibrating the predicted probabilities of a classifier. + By `Alexandre Gramfort`_, `Jan Hendrik Metzen`_, `Mathieu Blondel`_ + and :user:`Balazs Kegl `. Enhancements ............ - - Add option ``return_distance`` in :func:`hierarchical.ward_tree` - to return distances between nodes for both structured and unstructured - versions of the algorithm. By `Matteo Visconti di Oleggio Castello`_. - The same option was added in :func:`hierarchical.linkage_tree`. - By `Manoj Kumar`_ +- Add option ``return_distance`` in :func:`hierarchical.ward_tree` + to return distances between nodes for both structured and unstructured + versions of the algorithm. By `Matteo Visconti di Oleggio Castello`_. + The same option was added in :func:`hierarchical.linkage_tree`. + By `Manoj Kumar`_ - - Add support for sample weights in scorer objects. Metrics with sample - weight support will automatically benefit from it. By `Noel Dawe`_ and - `Vlad Niculae`_. +- Add support for sample weights in scorer objects. Metrics with sample + weight support will automatically benefit from it. By `Noel Dawe`_ and + `Vlad Niculae`_. - - Added ``newton-cg`` and `lbfgs` solver support in - :class:`linear_model.LogisticRegression`. By `Manoj Kumar`_. +- Added ``newton-cg`` and `lbfgs` solver support in + :class:`linear_model.LogisticRegression`. By `Manoj Kumar`_. - - Add ``selection="random"`` parameter to implement stochastic coordinate - descent for :class:`linear_model.Lasso`, :class:`linear_model.ElasticNet` - and related. By `Manoj Kumar`_. +- Add ``selection="random"`` parameter to implement stochastic coordinate + descent for :class:`linear_model.Lasso`, :class:`linear_model.ElasticNet` + and related. By `Manoj Kumar`_. - - Add ``sample_weight`` parameter to - :func:`metrics.jaccard_similarity_score` and :func:`metrics.log_loss`. - By :user:`Jatin Shah `. +- Add ``sample_weight`` parameter to + :func:`metrics.jaccard_similarity_score` and :func:`metrics.log_loss`. + By :user:`Jatin Shah `. - - Support sparse multilabel indicator representation in - :class:`preprocessing.LabelBinarizer` and - :class:`multiclass.OneVsRestClassifier` (by :user:`Hamzeh Alsalhi ` with thanks - to Rohit Sivaprasad), as well as evaluation metrics (by - `Joel Nothman`_). +- Support sparse multilabel indicator representation in + :class:`preprocessing.LabelBinarizer` and + :class:`multiclass.OneVsRestClassifier` (by :user:`Hamzeh Alsalhi ` with thanks + to Rohit Sivaprasad), as well as evaluation metrics (by + `Joel Nothman`_). - - Add ``sample_weight`` parameter to `metrics.jaccard_similarity_score`. - By `Jatin Shah`. +- Add ``sample_weight`` parameter to `metrics.jaccard_similarity_score`. + By `Jatin Shah`. - - Add support for multiclass in `metrics.hinge_loss`. Added ``labels=None`` - as optional parameter. By `Saurabh Jha`. +- Add support for multiclass in `metrics.hinge_loss`. Added ``labels=None`` + as optional parameter. By `Saurabh Jha`. - - Add ``sample_weight`` parameter to `metrics.hinge_loss`. - By `Saurabh Jha`. +- Add ``sample_weight`` parameter to `metrics.hinge_loss`. + By `Saurabh Jha`. - - Add ``multi_class="multinomial"`` option in - :class:`linear_model.LogisticRegression` to implement a Logistic - Regression solver that minimizes the cross-entropy or multinomial loss - instead of the default One-vs-Rest setting. Supports `lbfgs` and - `newton-cg` solvers. By `Lars Buitinck`_ and `Manoj Kumar`_. Solver option - `newton-cg` by Simon Wu. +- Add ``multi_class="multinomial"`` option in + :class:`linear_model.LogisticRegression` to implement a Logistic + Regression solver that minimizes the cross-entropy or multinomial loss + instead of the default One-vs-Rest setting. Supports `lbfgs` and + `newton-cg` solvers. By `Lars Buitinck`_ and `Manoj Kumar`_. Solver option + `newton-cg` by Simon Wu. - - ``DictVectorizer`` can now perform ``fit_transform`` on an iterable in a - single pass, when giving the option ``sort=False``. By :user:`Dan - Blanchard `. +- ``DictVectorizer`` can now perform ``fit_transform`` on an iterable in a + single pass, when giving the option ``sort=False``. By :user:`Dan + Blanchard `. - - :class:`GridSearchCV` and :class:`RandomizedSearchCV` can now be - configured to work with estimators that may fail and raise errors on - individual folds. This option is controlled by the `error_score` - parameter. This does not affect errors raised on re-fit. By - :user:`Michal Romaniuk `. +- :class:`GridSearchCV` and :class:`RandomizedSearchCV` can now be + configured to work with estimators that may fail and raise errors on + individual folds. This option is controlled by the `error_score` + parameter. This does not affect errors raised on re-fit. By + :user:`Michal Romaniuk `. - - Add ``digits`` parameter to `metrics.classification_report` to allow - report to show different precision of floating point numbers. By - :user:`Ian Gilmore `. +- Add ``digits`` parameter to `metrics.classification_report` to allow + report to show different precision of floating point numbers. By + :user:`Ian Gilmore `. - - Add a quantile prediction strategy to the :class:`dummy.DummyRegressor`. - By :user:`Aaron Staple `. +- Add a quantile prediction strategy to the :class:`dummy.DummyRegressor`. + By :user:`Aaron Staple `. - - Add ``handle_unknown`` option to :class:`preprocessing.OneHotEncoder` to - handle unknown categorical features more gracefully during transform. - By `Manoj Kumar`_. +- Add ``handle_unknown`` option to :class:`preprocessing.OneHotEncoder` to + handle unknown categorical features more gracefully during transform. + By `Manoj Kumar`_. - - Added support for sparse input data to decision trees and their ensembles. - By `Fares Hedyati`_ and `Arnaud Joly`_. +- Added support for sparse input data to decision trees and their ensembles. + By `Fares Hedyati`_ and `Arnaud Joly`_. - - Optimized :class:`cluster.AffinityPropagation` by reducing the number of - memory allocations of large temporary data-structures. By `Antony Lee`_. +- Optimized :class:`cluster.AffinityPropagation` by reducing the number of + memory allocations of large temporary data-structures. By `Antony Lee`_. - - Parellization of the computation of feature importances in random forest. - By `Olivier Grisel`_ and `Arnaud Joly`_. +- Parellization of the computation of feature importances in random forest. + By `Olivier Grisel`_ and `Arnaud Joly`_. - - Add ``n_iter_`` attribute to estimators that accept a ``max_iter`` attribute - in their constructor. By `Manoj Kumar`_. +- Add ``n_iter_`` attribute to estimators that accept a ``max_iter`` attribute + in their constructor. By `Manoj Kumar`_. - - Added decision function for :class:`multiclass.OneVsOneClassifier` - By `Raghav RV`_ and :user:`Kyle Beauchamp `. +- Added decision function for :class:`multiclass.OneVsOneClassifier` + By `Raghav RV`_ and :user:`Kyle Beauchamp `. - - :func:`neighbors.kneighbors_graph` and :func:`radius_neighbors_graph` - support non-Euclidean metrics. By `Manoj Kumar`_ +- :func:`neighbors.kneighbors_graph` and :func:`radius_neighbors_graph` + support non-Euclidean metrics. By `Manoj Kumar`_ - - Parameter ``connectivity`` in :class:`cluster.AgglomerativeClustering` - and family now accept callables that return a connectivity matrix. - By `Manoj Kumar`_. +- Parameter ``connectivity`` in :class:`cluster.AgglomerativeClustering` + and family now accept callables that return a connectivity matrix. + By `Manoj Kumar`_. - - Sparse support for :func:`paired_distances`. By `Joel Nothman`_. +- Sparse support for :func:`paired_distances`. By `Joel Nothman`_. - - :class:`cluster.DBSCAN` now supports sparse input and sample weights and - has been optimized: the inner loop has been rewritten in Cython and - radius neighbors queries are now computed in batch. By `Joel Nothman`_ - and `Lars Buitinck`_. +- :class:`cluster.DBSCAN` now supports sparse input and sample weights and + has been optimized: the inner loop has been rewritten in Cython and + radius neighbors queries are now computed in batch. By `Joel Nothman`_ + and `Lars Buitinck`_. - - Add ``class_weight`` parameter to automatically weight samples by class - frequency for :class:`ensemble.RandomForestClassifier`, - :class:`tree.DecisionTreeClassifier`, :class:`ensemble.ExtraTreesClassifier` - and :class:`tree.ExtraTreeClassifier`. By `Trevor Stephens`_. +- Add ``class_weight`` parameter to automatically weight samples by class + frequency for :class:`ensemble.RandomForestClassifier`, + :class:`tree.DecisionTreeClassifier`, :class:`ensemble.ExtraTreesClassifier` + and :class:`tree.ExtraTreeClassifier`. By `Trevor Stephens`_. - - :class:`grid_search.RandomizedSearchCV` now does sampling without - replacement if all parameters are given as lists. By `Andreas Müller`_. +- :class:`grid_search.RandomizedSearchCV` now does sampling without + replacement if all parameters are given as lists. By `Andreas Müller`_. - - Parallelized calculation of :func:`pairwise_distances` is now supported - for scipy metrics and custom callables. By `Joel Nothman`_. +- Parallelized calculation of :func:`pairwise_distances` is now supported + for scipy metrics and custom callables. By `Joel Nothman`_. - - Allow the fitting and scoring of all clustering algorithms in - :class:`pipeline.Pipeline`. By `Andreas Müller`_. +- Allow the fitting and scoring of all clustering algorithms in + :class:`pipeline.Pipeline`. By `Andreas Müller`_. - - More robust seeding and improved error messages in :class:`cluster.MeanShift` - by `Andreas Müller`_. +- More robust seeding and improved error messages in :class:`cluster.MeanShift` + by `Andreas Müller`_. - - Make the stopping criterion for :class:`mixture.GMM`, - :class:`mixture.DPGMM` and :class:`mixture.VBGMM` less dependent on the - number of samples by thresholding the average log-likelihood change - instead of its sum over all samples. By `Hervé Bredin`_. +- Make the stopping criterion for :class:`mixture.GMM`, + :class:`mixture.DPGMM` and :class:`mixture.VBGMM` less dependent on the + number of samples by thresholding the average log-likelihood change + instead of its sum over all samples. By `Hervé Bredin`_. - - The outcome of :func:`manifold.spectral_embedding` was made deterministic - by flipping the sign of eigenvectors. By :user:`Hasil Sharma `. +- The outcome of :func:`manifold.spectral_embedding` was made deterministic + by flipping the sign of eigenvectors. By :user:`Hasil Sharma `. - - Significant performance and memory usage improvements in - :class:`preprocessing.PolynomialFeatures`. By `Eric Martin`_. +- Significant performance and memory usage improvements in + :class:`preprocessing.PolynomialFeatures`. By `Eric Martin`_. - - Numerical stability improvements for :class:`preprocessing.StandardScaler` - and :func:`preprocessing.scale`. By `Nicolas Goix`_ +- Numerical stability improvements for :class:`preprocessing.StandardScaler` + and :func:`preprocessing.scale`. By `Nicolas Goix`_ - - :class:`svm.SVC` fitted on sparse input now implements ``decision_function``. - By `Rob Zinkov`_ and `Andreas Müller`_. +- :class:`svm.SVC` fitted on sparse input now implements ``decision_function``. + By `Rob Zinkov`_ and `Andreas Müller`_. - - :func:`cross_validation.train_test_split` now preserves the input type, - instead of converting to numpy arrays. +- :func:`cross_validation.train_test_split` now preserves the input type, + instead of converting to numpy arrays. Documentation improvements .......................... - - Added example of using :class:`FeatureUnion` for heterogeneous input. - By :user:`Matt Terry ` +- Added example of using :class:`FeatureUnion` for heterogeneous input. + By :user:`Matt Terry ` - - Documentation on scorers was improved, to highlight the handling of loss - functions. By :user:`Matt Pico `. +- Documentation on scorers was improved, to highlight the handling of loss + functions. By :user:`Matt Pico `. - - A discrepancy between liblinear output and scikit-learn's wrappers - is now noted. By `Manoj Kumar`_. +- A discrepancy between liblinear output and scikit-learn's wrappers + is now noted. By `Manoj Kumar`_. - - Improved documentation generation: examples referring to a class or - function are now shown in a gallery on the class/function's API reference - page. By `Joel Nothman`_. +- Improved documentation generation: examples referring to a class or + function are now shown in a gallery on the class/function's API reference + page. By `Joel Nothman`_. - - More explicit documentation of sample generators and of data - transformation. By `Joel Nothman`_. +- More explicit documentation of sample generators and of data + transformation. By `Joel Nothman`_. - - :class:`sklearn.neighbors.BallTree` and :class:`sklearn.neighbors.KDTree` - used to point to empty pages stating that they are aliases of BinaryTree. - This has been fixed to show the correct class docs. By `Manoj Kumar`_. +- :class:`sklearn.neighbors.BallTree` and :class:`sklearn.neighbors.KDTree` + used to point to empty pages stating that they are aliases of BinaryTree. + This has been fixed to show the correct class docs. By `Manoj Kumar`_. - - Added silhouette plots for analysis of KMeans clustering using - :func:`metrics.silhouette_samples` and :func:`metrics.silhouette_score`. - See :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py` +- Added silhouette plots for analysis of KMeans clustering using + :func:`metrics.silhouette_samples` and :func:`metrics.silhouette_score`. + See :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py` Bug fixes ......... - - Metaestimators now support ducktyping for the presence of ``decision_function``, - ``predict_proba`` and other methods. This fixes behavior of - :class:`grid_search.GridSearchCV`, - :class:`grid_search.RandomizedSearchCV`, :class:`pipeline.Pipeline`, - :class:`feature_selection.RFE`, :class:`feature_selection.RFECV` when nested. - By `Joel Nothman`_ - - - The ``scoring`` attribute of grid-search and cross-validation methods is no longer - ignored when a :class:`grid_search.GridSearchCV` is given as a base estimator or - the base estimator doesn't have predict. - - - The function :func:`hierarchical.ward_tree` now returns the children in - the same order for both the structured and unstructured versions. By - `Matteo Visconti di Oleggio Castello`_. - - - :class:`feature_selection.RFECV` now correctly handles cases when - ``step`` is not equal to 1. By :user:`Nikolay Mayorov ` - - - The :class:`decomposition.PCA` now undoes whitening in its - ``inverse_transform``. Also, its ``components_`` now always have unit - length. By :user:`Michael Eickenberg `. - - - Fix incomplete download of the dataset when - :func:`datasets.download_20newsgroups` is called. By `Manoj Kumar`_. - - - Various fixes to the Gaussian processes subpackage by Vincent Dubourg - and Jan Hendrik Metzen. - - - Calling ``partial_fit`` with ``class_weight=='auto'`` throws an - appropriate error message and suggests a work around. - By :user:`Danny Sullivan `. - - - :class:`RBFSampler ` with ``gamma=g`` - formerly approximated :func:`rbf_kernel ` - with ``gamma=g/2.``; the definition of ``gamma`` is now consistent, - which may substantially change your results if you use a fixed value. - (If you cross-validated over ``gamma``, it probably doesn't matter - too much.) By :user:`Dougal Sutherland `. - - - Pipeline object delegate the ``classes_`` attribute to the underlying - estimator. It allows, for instance, to make bagging of a pipeline object. - By `Arnaud Joly`_ - - - :class:`neighbors.NearestCentroid` now uses the median as the centroid - when metric is set to ``manhattan``. It was using the mean before. - By `Manoj Kumar`_ - - - Fix numerical stability issues in :class:`linear_model.SGDClassifier` - and :class:`linear_model.SGDRegressor` by clipping large gradients and - ensuring that weight decay rescaling is always positive (for large - l2 regularization and large learning rate values). - By `Olivier Grisel`_ - - - When `compute_full_tree` is set to "auto", the full tree is - built when n_clusters is high and is early stopped when n_clusters is - low, while the behavior should be vice-versa in - :class:`cluster.AgglomerativeClustering` (and friends). - This has been fixed By `Manoj Kumar`_ - - - Fix lazy centering of data in :func:`linear_model.enet_path` and - :func:`linear_model.lasso_path`. It was centered around one. It has - been changed to be centered around the origin. By `Manoj Kumar`_ - - - Fix handling of precomputed affinity matrices in - :class:`cluster.AgglomerativeClustering` when using connectivity - constraints. By :user:`Cathy Deng ` - - - Correct ``partial_fit`` handling of ``class_prior`` for - :class:`sklearn.naive_bayes.MultinomialNB` and - :class:`sklearn.naive_bayes.BernoulliNB`. By `Trevor Stephens`_. - - - Fixed a crash in :func:`metrics.precision_recall_fscore_support` - when using unsorted ``labels`` in the multi-label setting. - By `Andreas Müller`_. - - - Avoid skipping the first nearest neighbor in the methods ``radius_neighbors``, - ``kneighbors``, ``kneighbors_graph`` and ``radius_neighbors_graph`` in - :class:`sklearn.neighbors.NearestNeighbors` and family, when the query - data is not the same as fit data. By `Manoj Kumar`_. - - - Fix log-density calculation in the :class:`mixture.GMM` with - tied covariance. By `Will Dawson`_ - - - Fixed a scaling error in :class:`feature_selection.SelectFdr` - where a factor ``n_features`` was missing. By `Andrew Tulloch`_ - - - Fix zero division in :class:`neighbors.KNeighborsRegressor` and related - classes when using distance weighting and having identical data points. - By `Garret-R `_. - - - Fixed round off errors with non positive-definite covariance matrices - in GMM. By :user:`Alexis Mignon `. - - - Fixed a error in the computation of conditional probabilities in - :class:`naive_bayes.BernoulliNB`. By `Hanna Wallach`_. - - - Make the method ``radius_neighbors`` of - :class:`neighbors.NearestNeighbors` return the samples lying on the - boundary for ``algorithm='brute'``. By `Yan Yi`_. - - - Flip sign of ``dual_coef_`` of :class:`svm.SVC` - to make it consistent with the documentation and - ``decision_function``. By Artem Sobolev. +- Metaestimators now support ducktyping for the presence of ``decision_function``, + ``predict_proba`` and other methods. This fixes behavior of + :class:`grid_search.GridSearchCV`, + :class:`grid_search.RandomizedSearchCV`, :class:`pipeline.Pipeline`, + :class:`feature_selection.RFE`, :class:`feature_selection.RFECV` when nested. + By `Joel Nothman`_ + +- The ``scoring`` attribute of grid-search and cross-validation methods is no longer + ignored when a :class:`grid_search.GridSearchCV` is given as a base estimator or + the base estimator doesn't have predict. + +- The function :func:`hierarchical.ward_tree` now returns the children in + the same order for both the structured and unstructured versions. By + `Matteo Visconti di Oleggio Castello`_. + +- :class:`feature_selection.RFECV` now correctly handles cases when + ``step`` is not equal to 1. By :user:`Nikolay Mayorov ` + +- The :class:`decomposition.PCA` now undoes whitening in its + ``inverse_transform``. Also, its ``components_`` now always have unit + length. By :user:`Michael Eickenberg `. + +- Fix incomplete download of the dataset when + :func:`datasets.download_20newsgroups` is called. By `Manoj Kumar`_. + +- Various fixes to the Gaussian processes subpackage by Vincent Dubourg + and Jan Hendrik Metzen. + +- Calling ``partial_fit`` with ``class_weight=='auto'`` throws an + appropriate error message and suggests a work around. + By :user:`Danny Sullivan `. + +- :class:`RBFSampler ` with ``gamma=g`` + formerly approximated :func:`rbf_kernel ` + with ``gamma=g/2.``; the definition of ``gamma`` is now consistent, + which may substantially change your results if you use a fixed value. + (If you cross-validated over ``gamma``, it probably doesn't matter + too much.) By :user:`Dougal Sutherland `. + +- Pipeline object delegate the ``classes_`` attribute to the underlying + estimator. It allows, for instance, to make bagging of a pipeline object. + By `Arnaud Joly`_ + +- :class:`neighbors.NearestCentroid` now uses the median as the centroid + when metric is set to ``manhattan``. It was using the mean before. + By `Manoj Kumar`_ + +- Fix numerical stability issues in :class:`linear_model.SGDClassifier` + and :class:`linear_model.SGDRegressor` by clipping large gradients and + ensuring that weight decay rescaling is always positive (for large + l2 regularization and large learning rate values). + By `Olivier Grisel`_ + +- When `compute_full_tree` is set to "auto", the full tree is + built when n_clusters is high and is early stopped when n_clusters is + low, while the behavior should be vice-versa in + :class:`cluster.AgglomerativeClustering` (and friends). + This has been fixed By `Manoj Kumar`_ + +- Fix lazy centering of data in :func:`linear_model.enet_path` and + :func:`linear_model.lasso_path`. It was centered around one. It has + been changed to be centered around the origin. By `Manoj Kumar`_ + +- Fix handling of precomputed affinity matrices in + :class:`cluster.AgglomerativeClustering` when using connectivity + constraints. By :user:`Cathy Deng ` + +- Correct ``partial_fit`` handling of ``class_prior`` for + :class:`sklearn.naive_bayes.MultinomialNB` and + :class:`sklearn.naive_bayes.BernoulliNB`. By `Trevor Stephens`_. + +- Fixed a crash in :func:`metrics.precision_recall_fscore_support` + when using unsorted ``labels`` in the multi-label setting. + By `Andreas Müller`_. + +- Avoid skipping the first nearest neighbor in the methods ``radius_neighbors``, + ``kneighbors``, ``kneighbors_graph`` and ``radius_neighbors_graph`` in + :class:`sklearn.neighbors.NearestNeighbors` and family, when the query + data is not the same as fit data. By `Manoj Kumar`_. + +- Fix log-density calculation in the :class:`mixture.GMM` with + tied covariance. By `Will Dawson`_ + +- Fixed a scaling error in :class:`feature_selection.SelectFdr` + where a factor ``n_features`` was missing. By `Andrew Tulloch`_ + +- Fix zero division in :class:`neighbors.KNeighborsRegressor` and related + classes when using distance weighting and having identical data points. + By `Garret-R `_. + +- Fixed round off errors with non positive-definite covariance matrices + in GMM. By :user:`Alexis Mignon `. + +- Fixed a error in the computation of conditional probabilities in + :class:`naive_bayes.BernoulliNB`. By `Hanna Wallach`_. + +- Make the method ``radius_neighbors`` of + :class:`neighbors.NearestNeighbors` return the samples lying on the + boundary for ``algorithm='brute'``. By `Yan Yi`_. + +- Flip sign of ``dual_coef_`` of :class:`svm.SVC` + to make it consistent with the documentation and + ``decision_function``. By Artem Sobolev. - - Fixed handling of ties in :class:`isotonic.IsotonicRegression`. - We now use the weighted average of targets (secondary method). By - `Andreas Müller`_ and `Michael Bommarito `_. +- Fixed handling of ties in :class:`isotonic.IsotonicRegression`. + We now use the weighted average of targets (secondary method). By + `Andreas Müller`_ and `Michael Bommarito `_. API changes summary ------------------- - - :class:`GridSearchCV ` and - :func:`cross_val_score ` and other - meta-estimators don't convert pandas DataFrames into arrays any more, - allowing DataFrame specific operations in custom estimators. +- :class:`GridSearchCV ` and + :func:`cross_val_score ` and other + meta-estimators don't convert pandas DataFrames into arrays any more, + allowing DataFrame specific operations in custom estimators. - - :func:`multiclass.fit_ovr`, :func:`multiclass.predict_ovr`, - :func:`predict_proba_ovr`, - :func:`multiclass.fit_ovo`, :func:`multiclass.predict_ovo`, - :func:`multiclass.fit_ecoc` and :func:`multiclass.predict_ecoc` - are deprecated. Use the underlying estimators instead. +- :func:`multiclass.fit_ovr`, :func:`multiclass.predict_ovr`, + :func:`predict_proba_ovr`, + :func:`multiclass.fit_ovo`, :func:`multiclass.predict_ovo`, + :func:`multiclass.fit_ecoc` and :func:`multiclass.predict_ecoc` + are deprecated. Use the underlying estimators instead. - - Nearest neighbors estimators used to take arbitrary keyword arguments - and pass these to their distance metric. This will no longer be supported - in scikit-learn 0.18; use the ``metric_params`` argument instead. +- Nearest neighbors estimators used to take arbitrary keyword arguments + and pass these to their distance metric. This will no longer be supported + in scikit-learn 0.18; use the ``metric_params`` argument instead. - - `n_jobs` parameter of the fit method shifted to the constructor of the +- `n_jobs` parameter of the fit method shifted to the constructor of the LinearRegression class. - - The ``predict_proba`` method of :class:`multiclass.OneVsRestClassifier` - now returns two probabilities per sample in the multiclass case; this - is consistent with other estimators and with the method's documentation, - but previous versions accidentally returned only the positive - probability. Fixed by Will Lamond and `Lars Buitinck`_. - - - Change default value of precompute in :class:`ElasticNet` and :class:`Lasso` - to False. Setting precompute to "auto" was found to be slower when - n_samples > n_features since the computation of the Gram matrix is - computationally expensive and outweighs the benefit of fitting the Gram - for just one alpha. - ``precompute="auto"`` is now deprecated and will be removed in 0.18 - By `Manoj Kumar`_. - - - Expose ``positive`` option in :func:`linear_model.enet_path` and - :func:`linear_model.enet_path` which constrains coefficients to be - positive. By `Manoj Kumar`_. - - - Users should now supply an explicit ``average`` parameter to - :func:`sklearn.metrics.f1_score`, :func:`sklearn.metrics.fbeta_score`, - :func:`sklearn.metrics.recall_score` and - :func:`sklearn.metrics.precision_score` when performing multiclass - or multilabel (i.e. not binary) classification. By `Joel Nothman`_. - - - `scoring` parameter for cross validation now accepts `'f1_micro'`, - `'f1_macro'` or `'f1_weighted'`. `'f1'` is now for binary classification - only. Similar changes apply to `'precision'` and `'recall'`. - By `Joel Nothman`_. - - - The ``fit_intercept``, ``normalize`` and ``return_models`` parameters in - :func:`linear_model.enet_path` and :func:`linear_model.lasso_path` have - been removed. They were deprecated since 0.14 - - - From now onwards, all estimators will uniformly raise ``NotFittedError`` - (:class:`utils.validation.NotFittedError`), when any of the ``predict`` - like methods are called before the model is fit. By `Raghav RV`_. - - - Input data validation was refactored for more consistent input - validation. The ``check_arrays`` function was replaced by ``check_array`` - and ``check_X_y``. By `Andreas Müller`_. - - - Allow ``X=None`` in the methods ``radius_neighbors``, ``kneighbors``, - ``kneighbors_graph`` and ``radius_neighbors_graph`` in - :class:`sklearn.neighbors.NearestNeighbors` and family. If set to None, - then for every sample this avoids setting the sample itself as the - first nearest neighbor. By `Manoj Kumar`_. - - - Add parameter ``include_self`` in :func:`neighbors.kneighbors_graph` - and :func:`neighbors.radius_neighbors_graph` which has to be explicitly - set by the user. If set to True, then the sample itself is considered - as the first nearest neighbor. - - - `thresh` parameter is deprecated in favor of new `tol` parameter in - :class:`GMM`, :class:`DPGMM` and :class:`VBGMM`. See `Enhancements` - section for details. By `Hervé Bredin`_. - - - Estimators will treat input with dtype object as numeric when possible. - By `Andreas Müller`_ - - - Estimators now raise `ValueError` consistently when fitted on empty - data (less than 1 sample or less than 1 feature for 2D input). - By `Olivier Grisel`_. - - - - The ``shuffle`` option of :class:`.linear_model.SGDClassifier`, - :class:`linear_model.SGDRegressor`, :class:`linear_model.Perceptron`, - :class:`linear_model.PassiveAgressiveClassifier` and - :class:`linear_model.PassiveAgressiveRegressor` now defaults to ``True``. - - - :class:`cluster.DBSCAN` now uses a deterministic initialization. The - `random_state` parameter is deprecated. By :user:`Erich Schubert `. +- The ``predict_proba`` method of :class:`multiclass.OneVsRestClassifier` + now returns two probabilities per sample in the multiclass case; this + is consistent with other estimators and with the method's documentation, + but previous versions accidentally returned only the positive + probability. Fixed by Will Lamond and `Lars Buitinck`_. + +- Change default value of precompute in :class:`ElasticNet` and :class:`Lasso` + to False. Setting precompute to "auto" was found to be slower when + n_samples > n_features since the computation of the Gram matrix is + computationally expensive and outweighs the benefit of fitting the Gram + for just one alpha. + ``precompute="auto"`` is now deprecated and will be removed in 0.18 + By `Manoj Kumar`_. + +- Expose ``positive`` option in :func:`linear_model.enet_path` and + :func:`linear_model.enet_path` which constrains coefficients to be + positive. By `Manoj Kumar`_. + +- Users should now supply an explicit ``average`` parameter to + :func:`sklearn.metrics.f1_score`, :func:`sklearn.metrics.fbeta_score`, + :func:`sklearn.metrics.recall_score` and + :func:`sklearn.metrics.precision_score` when performing multiclass + or multilabel (i.e. not binary) classification. By `Joel Nothman`_. + +- `scoring` parameter for cross validation now accepts `'f1_micro'`, + `'f1_macro'` or `'f1_weighted'`. `'f1'` is now for binary classification + only. Similar changes apply to `'precision'` and `'recall'`. + By `Joel Nothman`_. + +- The ``fit_intercept``, ``normalize`` and ``return_models`` parameters in + :func:`linear_model.enet_path` and :func:`linear_model.lasso_path` have + been removed. They were deprecated since 0.14 + +- From now onwards, all estimators will uniformly raise ``NotFittedError`` + (:class:`utils.validation.NotFittedError`), when any of the ``predict`` + like methods are called before the model is fit. By `Raghav RV`_. + +- Input data validation was refactored for more consistent input + validation. The ``check_arrays`` function was replaced by ``check_array`` + and ``check_X_y``. By `Andreas Müller`_. + +- Allow ``X=None`` in the methods ``radius_neighbors``, ``kneighbors``, + ``kneighbors_graph`` and ``radius_neighbors_graph`` in + :class:`sklearn.neighbors.NearestNeighbors` and family. If set to None, + then for every sample this avoids setting the sample itself as the + first nearest neighbor. By `Manoj Kumar`_. + +- Add parameter ``include_self`` in :func:`neighbors.kneighbors_graph` + and :func:`neighbors.radius_neighbors_graph` which has to be explicitly + set by the user. If set to True, then the sample itself is considered + as the first nearest neighbor. + +- `thresh` parameter is deprecated in favor of new `tol` parameter in + :class:`GMM`, :class:`DPGMM` and :class:`VBGMM`. See `Enhancements` + section for details. By `Hervé Bredin`_. + +- Estimators will treat input with dtype object as numeric when possible. + By `Andreas Müller`_ + +- Estimators now raise `ValueError` consistently when fitted on empty + data (less than 1 sample or less than 1 feature for 2D input). + By `Olivier Grisel`_. + + +- The ``shuffle`` option of :class:`.linear_model.SGDClassifier`, + :class:`linear_model.SGDRegressor`, :class:`linear_model.Perceptron`, + :class:`linear_model.PassiveAgressiveClassifier` and + :class:`linear_model.PassiveAgressiveRegressor` now defaults to ``True``. + +- :class:`cluster.DBSCAN` now uses a deterministic initialization. The + `random_state` parameter is deprecated. By :user:`Erich Schubert `. Code Contributors ----------------- @@ -2700,41 +2705,41 @@ Version 0.15.2 Bug fixes --------- - - Fixed handling of the ``p`` parameter of the Minkowski distance that was - previously ignored in nearest neighbors models. By :user:`Nikolay - Mayorov `. +- Fixed handling of the ``p`` parameter of the Minkowski distance that was + previously ignored in nearest neighbors models. By :user:`Nikolay + Mayorov `. - - Fixed duplicated alphas in :class:`linear_model.LassoLars` with early - stopping on 32 bit Python. By `Olivier Grisel`_ and `Fabian Pedregosa`_. +- Fixed duplicated alphas in :class:`linear_model.LassoLars` with early + stopping on 32 bit Python. By `Olivier Grisel`_ and `Fabian Pedregosa`_. - - Fixed the build under Windows when scikit-learn is built with MSVC while - NumPy is built with MinGW. By `Olivier Grisel`_ and :user:`Federico - Vaggi `. +- Fixed the build under Windows when scikit-learn is built with MSVC while + NumPy is built with MinGW. By `Olivier Grisel`_ and :user:`Federico + Vaggi `. - - Fixed an array index overflow bug in the coordinate descent solver. By - `Gael Varoquaux`_. +- Fixed an array index overflow bug in the coordinate descent solver. By + `Gael Varoquaux`_. - - Better handling of numpy 1.9 deprecation warnings. By `Gael Varoquaux`_. +- Better handling of numpy 1.9 deprecation warnings. By `Gael Varoquaux`_. - - Removed unnecessary data copy in :class:`cluster.KMeans`. - By `Gael Varoquaux`_. +- Removed unnecessary data copy in :class:`cluster.KMeans`. + By `Gael Varoquaux`_. - - Explicitly close open files to avoid ``ResourceWarnings`` under Python 3. - By Calvin Giles. +- Explicitly close open files to avoid ``ResourceWarnings`` under Python 3. + By Calvin Giles. - - The ``transform`` of :class:`discriminant_analysis.LinearDiscriminantAnalysis` - now projects the input on the most discriminant directions. By Martin Billinger. +- The ``transform`` of :class:`discriminant_analysis.LinearDiscriminantAnalysis` + now projects the input on the most discriminant directions. By Martin Billinger. - - Fixed potential overflow in ``_tree.safe_realloc`` by `Lars Buitinck`_. +- Fixed potential overflow in ``_tree.safe_realloc`` by `Lars Buitinck`_. - - Performance optimization in :class:`isotonic.IsotonicRegression`. - By Robert Bradshaw. +- Performance optimization in :class:`isotonic.IsotonicRegression`. + By Robert Bradshaw. - - ``nose`` is non-longer a runtime dependency to import ``sklearn``, only for - running the tests. By `Joel Nothman`_. +- ``nose`` is non-longer a runtime dependency to import ``sklearn``, only for + running the tests. By `Joel Nothman`_. - - Many documentation and website fixes by `Joel Nothman`_, `Lars Buitinck`_ - :user:`Matt Pico `, and others. +- Many documentation and website fixes by `Joel Nothman`_, `Lars Buitinck`_ + :user:`Matt Pico `, and others. .. _changes_0_15_1: @@ -2746,35 +2751,35 @@ Version 0.15.1 Bug fixes --------- - - Made :func:`cross_validation.cross_val_score` use - :class:`cross_validation.KFold` instead of - :class:`cross_validation.StratifiedKFold` on multi-output classification - problems. By :user:`Nikolay Mayorov `. +- Made :func:`cross_validation.cross_val_score` use + :class:`cross_validation.KFold` instead of + :class:`cross_validation.StratifiedKFold` on multi-output classification + problems. By :user:`Nikolay Mayorov `. - - Support unseen labels :class:`preprocessing.LabelBinarizer` to restore - the default behavior of 0.14.1 for backward compatibility. By - :user:`Hamzeh Alsalhi `. +- Support unseen labels :class:`preprocessing.LabelBinarizer` to restore + the default behavior of 0.14.1 for backward compatibility. By + :user:`Hamzeh Alsalhi `. - - Fixed the :class:`cluster.KMeans` stopping criterion that prevented early - convergence detection. By Edward Raff and `Gael Varoquaux`_. +- Fixed the :class:`cluster.KMeans` stopping criterion that prevented early + convergence detection. By Edward Raff and `Gael Varoquaux`_. - - Fixed the behavior of :class:`multiclass.OneVsOneClassifier`. - in case of ties at the per-class vote level by computing the correct - per-class sum of prediction scores. By `Andreas Müller`_. +- Fixed the behavior of :class:`multiclass.OneVsOneClassifier`. + in case of ties at the per-class vote level by computing the correct + per-class sum of prediction scores. By `Andreas Müller`_. - - Made :func:`cross_validation.cross_val_score` and - :class:`grid_search.GridSearchCV` accept Python lists as input data. - This is especially useful for cross-validation and model selection of - text processing pipelines. By `Andreas Müller`_. +- Made :func:`cross_validation.cross_val_score` and + :class:`grid_search.GridSearchCV` accept Python lists as input data. + This is especially useful for cross-validation and model selection of + text processing pipelines. By `Andreas Müller`_. - - Fixed data input checks of most estimators to accept input data that - implements the NumPy ``__array__`` protocol. This is the case for - for ``pandas.Series`` and ``pandas.DataFrame`` in recent versions of - pandas. By `Gael Varoquaux`_. +- Fixed data input checks of most estimators to accept input data that + implements the NumPy ``__array__`` protocol. This is the case for + for ``pandas.Series`` and ``pandas.DataFrame`` in recent versions of + pandas. By `Gael Varoquaux`_. - - Fixed a regression for :class:`linear_model.SGDClassifier` with - ``class_weight="auto"`` on data with non-contiguous labels. By - `Olivier Grisel`_. +- Fixed a regression for :class:`linear_model.SGDClassifier` with + ``class_weight="auto"`` on data with non-contiguous labels. By + `Olivier Grisel`_. .. _changes_0_15: @@ -2787,22 +2792,22 @@ Version 0.15 Highlights ----------- - - Many speed and memory improvements all across the code +- Many speed and memory improvements all across the code - - Huge speed and memory improvements to random forests (and extra - trees) that also benefit better from parallel computing. +- Huge speed and memory improvements to random forests (and extra + trees) that also benefit better from parallel computing. - - Incremental fit to :class:`BernoulliRBM ` +- Incremental fit to :class:`BernoulliRBM ` - - Added :class:`cluster.AgglomerativeClustering` for hierarchical - agglomerative clustering with average linkage, complete linkage and - ward strategies. +- Added :class:`cluster.AgglomerativeClustering` for hierarchical + agglomerative clustering with average linkage, complete linkage and + ward strategies. - - Added :class:`linear_model.RANSACRegressor` for robust regression - models. +- Added :class:`linear_model.RANSACRegressor` for robust regression + models. - - Added dimensionality reduction with :class:`manifold.TSNE` which can be - used to visualize high-dimensional data. +- Added dimensionality reduction with :class:`manifold.TSNE` which can be + used to visualize high-dimensional data. Changelog @@ -2811,334 +2816,334 @@ Changelog New features ............ - - Added :class:`ensemble.BaggingClassifier` and - :class:`ensemble.BaggingRegressor` meta-estimators for ensembling - any kind of base estimator. See the :ref:`Bagging ` section of - the user guide for details and examples. By `Gilles Louppe`_. +- Added :class:`ensemble.BaggingClassifier` and + :class:`ensemble.BaggingRegressor` meta-estimators for ensembling + any kind of base estimator. See the :ref:`Bagging ` section of + the user guide for details and examples. By `Gilles Louppe`_. - - New unsupervised feature selection algorithm - :class:`feature_selection.VarianceThreshold`, by `Lars Buitinck`_. +- New unsupervised feature selection algorithm + :class:`feature_selection.VarianceThreshold`, by `Lars Buitinck`_. - - Added :class:`linear_model.RANSACRegressor` meta-estimator for the robust - fitting of regression models. By :user:`Johannes Schönberger `. +- Added :class:`linear_model.RANSACRegressor` meta-estimator for the robust + fitting of regression models. By :user:`Johannes Schönberger `. - - Added :class:`cluster.AgglomerativeClustering` for hierarchical - agglomerative clustering with average linkage, complete linkage and - ward strategies, by `Nelle Varoquaux`_ and `Gael Varoquaux`_. +- Added :class:`cluster.AgglomerativeClustering` for hierarchical + agglomerative clustering with average linkage, complete linkage and + ward strategies, by `Nelle Varoquaux`_ and `Gael Varoquaux`_. - - Shorthand constructors :func:`pipeline.make_pipeline` and - :func:`pipeline.make_union` were added by `Lars Buitinck`_. +- Shorthand constructors :func:`pipeline.make_pipeline` and + :func:`pipeline.make_union` were added by `Lars Buitinck`_. - - Shuffle option for :class:`cross_validation.StratifiedKFold`. - By :user:`Jeffrey Blackburne `. +- Shuffle option for :class:`cross_validation.StratifiedKFold`. + By :user:`Jeffrey Blackburne `. - - Incremental learning (``partial_fit``) for Gaussian Naive Bayes by - Imran Haque. +- Incremental learning (``partial_fit``) for Gaussian Naive Bayes by + Imran Haque. - - Added ``partial_fit`` to :class:`BernoulliRBM - ` - By :user:`Danny Sullivan `. +- Added ``partial_fit`` to :class:`BernoulliRBM + ` + By :user:`Danny Sullivan `. - - Added :func:`learning_curve ` utility to - chart performance with respect to training size. See - :ref:`sphx_glr_auto_examples_model_selection_plot_learning_curve.py`. By Alexander Fabisch. +- Added :func:`learning_curve ` utility to + chart performance with respect to training size. See + :ref:`sphx_glr_auto_examples_model_selection_plot_learning_curve.py`. By Alexander Fabisch. - - Add positive option in :class:`LassoCV ` and - :class:`ElasticNetCV `. - By Brian Wignall and `Alexandre Gramfort`_. +- Add positive option in :class:`LassoCV ` and + :class:`ElasticNetCV `. + By Brian Wignall and `Alexandre Gramfort`_. - - Added :class:`linear_model.MultiTaskElasticNetCV` and - :class:`linear_model.MultiTaskLassoCV`. By `Manoj Kumar`_. +- Added :class:`linear_model.MultiTaskElasticNetCV` and + :class:`linear_model.MultiTaskLassoCV`. By `Manoj Kumar`_. - - Added :class:`manifold.TSNE`. By Alexander Fabisch. +- Added :class:`manifold.TSNE`. By Alexander Fabisch. Enhancements ............ - - Add sparse input support to :class:`ensemble.AdaBoostClassifier` and - :class:`ensemble.AdaBoostRegressor` meta-estimators. - By :user:`Hamzeh Alsalhi `. +- Add sparse input support to :class:`ensemble.AdaBoostClassifier` and + :class:`ensemble.AdaBoostRegressor` meta-estimators. + By :user:`Hamzeh Alsalhi `. - - Memory improvements of decision trees, by `Arnaud Joly`_. +- Memory improvements of decision trees, by `Arnaud Joly`_. - - Decision trees can now be built in best-first manner by using ``max_leaf_nodes`` - as the stopping criteria. Refactored the tree code to use either a - stack or a priority queue for tree building. - By `Peter Prettenhofer`_ and `Gilles Louppe`_. +- Decision trees can now be built in best-first manner by using ``max_leaf_nodes`` + as the stopping criteria. Refactored the tree code to use either a + stack or a priority queue for tree building. + By `Peter Prettenhofer`_ and `Gilles Louppe`_. - - Decision trees can now be fitted on fortran- and c-style arrays, and - non-continuous arrays without the need to make a copy. - If the input array has a different dtype than ``np.float32``, a fortran- - style copy will be made since fortran-style memory layout has speed - advantages. By `Peter Prettenhofer`_ and `Gilles Louppe`_. +- Decision trees can now be fitted on fortran- and c-style arrays, and + non-continuous arrays without the need to make a copy. + If the input array has a different dtype than ``np.float32``, a fortran- + style copy will be made since fortran-style memory layout has speed + advantages. By `Peter Prettenhofer`_ and `Gilles Louppe`_. - - Speed improvement of regression trees by optimizing the - the computation of the mean square error criterion. This lead - to speed improvement of the tree, forest and gradient boosting tree - modules. By `Arnaud Joly`_ +- Speed improvement of regression trees by optimizing the + the computation of the mean square error criterion. This lead + to speed improvement of the tree, forest and gradient boosting tree + modules. By `Arnaud Joly`_ - - The ``img_to_graph`` and ``grid_tograph`` functions in - :mod:`sklearn.feature_extraction.image` now return ``np.ndarray`` - instead of ``np.matrix`` when ``return_as=np.ndarray``. See the - Notes section for more information on compatibility. - - - Changed the internal storage of decision trees to use a struct array. - This fixed some small bugs, while improving code and providing a small - speed gain. By `Joel Nothman`_. - - - Reduce memory usage and overhead when fitting and predicting with forests - of randomized trees in parallel with ``n_jobs != 1`` by leveraging new - threading backend of joblib 0.8 and releasing the GIL in the tree fitting - Cython code. By `Olivier Grisel`_ and `Gilles Louppe`_. - - - Speed improvement of the :mod:`sklearn.ensemble.gradient_boosting` module. - By `Gilles Louppe`_ and `Peter Prettenhofer`_. - - - Various enhancements to the :mod:`sklearn.ensemble.gradient_boosting` - module: a ``warm_start`` argument to fit additional trees, - a ``max_leaf_nodes`` argument to fit GBM style trees, - a ``monitor`` fit argument to inspect the estimator during training, and - refactoring of the verbose code. By `Peter Prettenhofer`_. - - - Faster :class:`sklearn.ensemble.ExtraTrees` by caching feature values. - By `Arnaud Joly`_. - - - Faster depth-based tree building algorithm such as decision tree, - random forest, extra trees or gradient tree boosting (with depth based - growing strategy) by avoiding trying to split on found constant features - in the sample subset. By `Arnaud Joly`_. - - - Add ``min_weight_fraction_leaf`` pre-pruning parameter to tree-based - methods: the minimum weighted fraction of the input samples required to be - at a leaf node. By `Noel Dawe`_. - - - Added :func:`metrics.pairwise_distances_argmin_min`, by Philippe Gervais. - - - Added predict method to :class:`cluster.AffinityPropagation` and - :class:`cluster.MeanShift`, by `Mathieu Blondel`_. - - - Vector and matrix multiplications have been optimised throughout the - library by `Denis Engemann`_, and `Alexandre Gramfort`_. - In particular, they should take less memory with older NumPy versions - (prior to 1.7.2). - - - Precision-recall and ROC examples now use train_test_split, and have more - explanation of why these metrics are useful. By `Kyle Kastner`_ - - - The training algorithm for :class:`decomposition.NMF` is faster for - sparse matrices and has much lower memory complexity, meaning it will - scale up gracefully to large datasets. By `Lars Buitinck`_. - - - Added svd_method option with default value to "randomized" to - :class:`decomposition.FactorAnalysis` to save memory and - significantly speedup computation by `Denis Engemann`_, and - `Alexandre Gramfort`_. - - - Changed :class:`cross_validation.StratifiedKFold` to try and - preserve as much of the original ordering of samples as possible so as - not to hide overfitting on datasets with a non-negligible level of - samples dependency. - By `Daniel Nouri`_ and `Olivier Grisel`_. - - - Add multi-output support to :class:`gaussian_process.GaussianProcess` - by John Novak. - - - Support for precomputed distance matrices in nearest neighbor estimators - by `Robert Layton`_ and `Joel Nothman`_. - - - Norm computations optimized for NumPy 1.6 and later versions by - `Lars Buitinck`_. In particular, the k-means algorithm no longer - needs a temporary data structure the size of its input. - - - :class:`dummy.DummyClassifier` can now be used to predict a constant - output value. By `Manoj Kumar`_. - - - :class:`dummy.DummyRegressor` has now a strategy parameter which allows - to predict the mean, the median of the training set or a constant - output value. By :user:`Maheshakya Wijewardena `. - - - Multi-label classification output in multilabel indicator format - is now supported by :func:`metrics.roc_auc_score` and - :func:`metrics.average_precision_score` by `Arnaud Joly`_. - - - Significant performance improvements (more than 100x speedup for - large problems) in :class:`isotonic.IsotonicRegression` by - `Andrew Tulloch`_. - - - Speed and memory usage improvements to the SGD algorithm for linear - models: it now uses threads, not separate processes, when ``n_jobs>1``. - By `Lars Buitinck`_. - - - Grid search and cross validation allow NaNs in the input arrays so that - preprocessors such as :class:`preprocessing.Imputer - ` can be trained within the cross validation loop, - avoiding potentially skewed results. - - - Ridge regression can now deal with sample weights in feature space - (only sample space until then). By :user:`Michael Eickenberg `. - Both solutions are provided by the Cholesky solver. - - - Several classification and regression metrics now support weighted - samples with the new ``sample_weight`` argument: - :func:`metrics.accuracy_score`, - :func:`metrics.zero_one_loss`, - :func:`metrics.precision_score`, - :func:`metrics.average_precision_score`, - :func:`metrics.f1_score`, - :func:`metrics.fbeta_score`, - :func:`metrics.recall_score`, - :func:`metrics.roc_auc_score`, - :func:`metrics.explained_variance_score`, - :func:`metrics.mean_squared_error`, - :func:`metrics.mean_absolute_error`, - :func:`metrics.r2_score`. - By `Noel Dawe`_. - - - Speed up of the sample generator - :func:`datasets.make_multilabel_classification`. By `Joel Nothman`_. +- The ``img_to_graph`` and ``grid_tograph`` functions in + :mod:`sklearn.feature_extraction.image` now return ``np.ndarray`` + instead of ``np.matrix`` when ``return_as=np.ndarray``. See the + Notes section for more information on compatibility. + +- Changed the internal storage of decision trees to use a struct array. + This fixed some small bugs, while improving code and providing a small + speed gain. By `Joel Nothman`_. + +- Reduce memory usage and overhead when fitting and predicting with forests + of randomized trees in parallel with ``n_jobs != 1`` by leveraging new + threading backend of joblib 0.8 and releasing the GIL in the tree fitting + Cython code. By `Olivier Grisel`_ and `Gilles Louppe`_. + +- Speed improvement of the :mod:`sklearn.ensemble.gradient_boosting` module. + By `Gilles Louppe`_ and `Peter Prettenhofer`_. + +- Various enhancements to the :mod:`sklearn.ensemble.gradient_boosting` + module: a ``warm_start`` argument to fit additional trees, + a ``max_leaf_nodes`` argument to fit GBM style trees, + a ``monitor`` fit argument to inspect the estimator during training, and + refactoring of the verbose code. By `Peter Prettenhofer`_. + +- Faster :class:`sklearn.ensemble.ExtraTrees` by caching feature values. + By `Arnaud Joly`_. + +- Faster depth-based tree building algorithm such as decision tree, + random forest, extra trees or gradient tree boosting (with depth based + growing strategy) by avoiding trying to split on found constant features + in the sample subset. By `Arnaud Joly`_. + +- Add ``min_weight_fraction_leaf`` pre-pruning parameter to tree-based + methods: the minimum weighted fraction of the input samples required to be + at a leaf node. By `Noel Dawe`_. + +- Added :func:`metrics.pairwise_distances_argmin_min`, by Philippe Gervais. + +- Added predict method to :class:`cluster.AffinityPropagation` and + :class:`cluster.MeanShift`, by `Mathieu Blondel`_. + +- Vector and matrix multiplications have been optimised throughout the + library by `Denis Engemann`_, and `Alexandre Gramfort`_. + In particular, they should take less memory with older NumPy versions + (prior to 1.7.2). + +- Precision-recall and ROC examples now use train_test_split, and have more + explanation of why these metrics are useful. By `Kyle Kastner`_ + +- The training algorithm for :class:`decomposition.NMF` is faster for + sparse matrices and has much lower memory complexity, meaning it will + scale up gracefully to large datasets. By `Lars Buitinck`_. + +- Added svd_method option with default value to "randomized" to + :class:`decomposition.FactorAnalysis` to save memory and + significantly speedup computation by `Denis Engemann`_, and + `Alexandre Gramfort`_. + +- Changed :class:`cross_validation.StratifiedKFold` to try and + preserve as much of the original ordering of samples as possible so as + not to hide overfitting on datasets with a non-negligible level of + samples dependency. + By `Daniel Nouri`_ and `Olivier Grisel`_. + +- Add multi-output support to :class:`gaussian_process.GaussianProcess` + by John Novak. + +- Support for precomputed distance matrices in nearest neighbor estimators + by `Robert Layton`_ and `Joel Nothman`_. + +- Norm computations optimized for NumPy 1.6 and later versions by + `Lars Buitinck`_. In particular, the k-means algorithm no longer + needs a temporary data structure the size of its input. + +- :class:`dummy.DummyClassifier` can now be used to predict a constant + output value. By `Manoj Kumar`_. + +- :class:`dummy.DummyRegressor` has now a strategy parameter which allows + to predict the mean, the median of the training set or a constant + output value. By :user:`Maheshakya Wijewardena `. + +- Multi-label classification output in multilabel indicator format + is now supported by :func:`metrics.roc_auc_score` and + :func:`metrics.average_precision_score` by `Arnaud Joly`_. + +- Significant performance improvements (more than 100x speedup for + large problems) in :class:`isotonic.IsotonicRegression` by + `Andrew Tulloch`_. + +- Speed and memory usage improvements to the SGD algorithm for linear + models: it now uses threads, not separate processes, when ``n_jobs>1``. + By `Lars Buitinck`_. + +- Grid search and cross validation allow NaNs in the input arrays so that + preprocessors such as :class:`preprocessing.Imputer + ` can be trained within the cross validation loop, + avoiding potentially skewed results. + +- Ridge regression can now deal with sample weights in feature space + (only sample space until then). By :user:`Michael Eickenberg `. + Both solutions are provided by the Cholesky solver. + +- Several classification and regression metrics now support weighted + samples with the new ``sample_weight`` argument: + :func:`metrics.accuracy_score`, + :func:`metrics.zero_one_loss`, + :func:`metrics.precision_score`, + :func:`metrics.average_precision_score`, + :func:`metrics.f1_score`, + :func:`metrics.fbeta_score`, + :func:`metrics.recall_score`, + :func:`metrics.roc_auc_score`, + :func:`metrics.explained_variance_score`, + :func:`metrics.mean_squared_error`, + :func:`metrics.mean_absolute_error`, + :func:`metrics.r2_score`. + By `Noel Dawe`_. + +- Speed up of the sample generator + :func:`datasets.make_multilabel_classification`. By `Joel Nothman`_. Documentation improvements ........................... - - The :ref:`Working With Text Data ` tutorial - has now been worked in to the main documentation's tutorial section. - Includes exercises and skeletons for tutorial presentation. - Original tutorial created by several authors including - `Olivier Grisel`_, Lars Buitinck and many others. - Tutorial integration into the scikit-learn documentation - by `Jaques Grobler`_ - - - Added :ref:`Computational Performance ` - documentation. Discussion and examples of prediction latency / throughput - and different factors that have influence over speed. Additional tips for - building faster models and choosing a relevant compromise between speed - and predictive power. - By :user:`Eustache Diemert `. +- The :ref:`Working With Text Data ` tutorial + has now been worked in to the main documentation's tutorial section. + Includes exercises and skeletons for tutorial presentation. + Original tutorial created by several authors including + `Olivier Grisel`_, Lars Buitinck and many others. + Tutorial integration into the scikit-learn documentation + by `Jaques Grobler`_ + +- Added :ref:`Computational Performance ` + documentation. Discussion and examples of prediction latency / throughput + and different factors that have influence over speed. Additional tips for + building faster models and choosing a relevant compromise between speed + and predictive power. + By :user:`Eustache Diemert `. Bug fixes ......... - - Fixed bug in :class:`decomposition.MiniBatchDictionaryLearning` : - ``partial_fit`` was not working properly. +- Fixed bug in :class:`decomposition.MiniBatchDictionaryLearning` : + ``partial_fit`` was not working properly. - - Fixed bug in :class:`linear_model.stochastic_gradient` : - ``l1_ratio`` was used as ``(1.0 - l1_ratio)`` . +- Fixed bug in :class:`linear_model.stochastic_gradient` : + ``l1_ratio`` was used as ``(1.0 - l1_ratio)`` . - - Fixed bug in :class:`multiclass.OneVsOneClassifier` with string - labels +- Fixed bug in :class:`multiclass.OneVsOneClassifier` with string + labels - - Fixed a bug in :class:`LassoCV ` and - :class:`ElasticNetCV `: they would not - pre-compute the Gram matrix with ``precompute=True`` or - ``precompute="auto"`` and ``n_samples > n_features``. By `Manoj Kumar`_. +- Fixed a bug in :class:`LassoCV ` and + :class:`ElasticNetCV `: they would not + pre-compute the Gram matrix with ``precompute=True`` or + ``precompute="auto"`` and ``n_samples > n_features``. By `Manoj Kumar`_. - - Fixed incorrect estimation of the degrees of freedom in - :func:`feature_selection.f_regression` when variates are not centered. - By :user:`Virgile Fritsch `. +- Fixed incorrect estimation of the degrees of freedom in + :func:`feature_selection.f_regression` when variates are not centered. + By :user:`Virgile Fritsch `. - - Fixed a race condition in parallel processing with - ``pre_dispatch != "all"`` (for instance, in ``cross_val_score``). - By `Olivier Grisel`_. +- Fixed a race condition in parallel processing with + ``pre_dispatch != "all"`` (for instance, in ``cross_val_score``). + By `Olivier Grisel`_. - - Raise error in :class:`cluster.FeatureAgglomeration` and - :class:`cluster.WardAgglomeration` when no samples are given, - rather than returning meaningless clustering. +- Raise error in :class:`cluster.FeatureAgglomeration` and + :class:`cluster.WardAgglomeration` when no samples are given, + rather than returning meaningless clustering. - - Fixed bug in :class:`gradient_boosting.GradientBoostingRegressor` with - ``loss='huber'``: ``gamma`` might have not been initialized. +- Fixed bug in :class:`gradient_boosting.GradientBoostingRegressor` with + ``loss='huber'``: ``gamma`` might have not been initialized. - - Fixed feature importances as computed with a forest of randomized trees - when fit with ``sample_weight != None`` and/or with ``bootstrap=True``. - By `Gilles Louppe`_. +- Fixed feature importances as computed with a forest of randomized trees + when fit with ``sample_weight != None`` and/or with ``bootstrap=True``. + By `Gilles Louppe`_. API changes summary ------------------- - - :mod:`sklearn.hmm` is deprecated. Its removal is planned - for the 0.17 release. - - - Use of :class:`covariance.EllipticEnvelop` has now been removed after - deprecation. - Please use :class:`covariance.EllipticEnvelope` instead. - - - :class:`cluster.Ward` is deprecated. Use - :class:`cluster.AgglomerativeClustering` instead. - - - :class:`cluster.WardClustering` is deprecated. Use - - :class:`cluster.AgglomerativeClustering` instead. - - - :class:`cross_validation.Bootstrap` is deprecated. - :class:`cross_validation.KFold` or - :class:`cross_validation.ShuffleSplit` are recommended instead. - - - Direct support for the sequence of sequences (or list of lists) multilabel - format is deprecated. To convert to and from the supported binary - indicator matrix format, use - :class:`MultiLabelBinarizer `. - By `Joel Nothman`_. - - - Add score method to :class:`PCA ` following the model of - probabilistic PCA and deprecate - :class:`ProbabilisticPCA ` model whose - score implementation is not correct. The computation now also exploits the - matrix inversion lemma for faster computation. By `Alexandre Gramfort`_. - - - The score method of :class:`FactorAnalysis ` - now returns the average log-likelihood of the samples. Use score_samples - to get log-likelihood of each sample. By `Alexandre Gramfort`_. - - - Generating boolean masks (the setting ``indices=False``) - from cross-validation generators is deprecated. - Support for masks will be removed in 0.17. - The generators have produced arrays of indices by default since 0.10. - By `Joel Nothman`_. - - - 1-d arrays containing strings with ``dtype=object`` (as used in Pandas) - are now considered valid classification targets. This fixes a regression - from version 0.13 in some classifiers. By `Joel Nothman`_. - - - Fix wrong ``explained_variance_ratio_`` attribute in - :class:`RandomizedPCA `. - By `Alexandre Gramfort`_. - - - Fit alphas for each ``l1_ratio`` instead of ``mean_l1_ratio`` in - :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV`. - This changes the shape of ``alphas_`` from ``(n_alphas,)`` to - ``(n_l1_ratio, n_alphas)`` if the ``l1_ratio`` provided is a 1-D array like - object of length greater than one. - By `Manoj Kumar`_. - - - Fix :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV` - when fitting intercept and input data is sparse. The automatic grid - of alphas was not computed correctly and the scaling with normalize - was wrong. By `Manoj Kumar`_. - - - Fix wrong maximal number of features drawn (``max_features``) at each split - for decision trees, random forests and gradient tree boosting. - Previously, the count for the number of drawn features started only after - one non constant features in the split. This bug fix will affect - computational and generalization performance of those algorithms in the - presence of constant features. To get back previous generalization - performance, you should modify the value of ``max_features``. - By `Arnaud Joly`_. - - - Fix wrong maximal number of features drawn (``max_features``) at each split - for :class:`ensemble.ExtraTreesClassifier` and - :class:`ensemble.ExtraTreesRegressor`. Previously, only non constant - features in the split was counted as drawn. Now constant features are - counted as drawn. Furthermore at least one feature must be non constant - in order to make a valid split. This bug fix will affect - computational and generalization performance of extra trees in the - presence of constant features. To get back previous generalization - performance, you should modify the value of ``max_features``. - By `Arnaud Joly`_. - - - Fix :func:`utils.compute_class_weight` when ``class_weight=="auto"``. - Previously it was broken for input of non-integer ``dtype`` and the - weighted array that was returned was wrong. By `Manoj Kumar`_. - - - Fix :class:`cross_validation.Bootstrap` to return ``ValueError`` - when ``n_train + n_test > n``. By :user:`Ronald Phlypo `. +- :mod:`sklearn.hmm` is deprecated. Its removal is planned + for the 0.17 release. + +- Use of :class:`covariance.EllipticEnvelop` has now been removed after + deprecation. + Please use :class:`covariance.EllipticEnvelope` instead. + +- :class:`cluster.Ward` is deprecated. Use + :class:`cluster.AgglomerativeClustering` instead. + +- :class:`cluster.WardClustering` is deprecated. Use +- :class:`cluster.AgglomerativeClustering` instead. + +- :class:`cross_validation.Bootstrap` is deprecated. + :class:`cross_validation.KFold` or + :class:`cross_validation.ShuffleSplit` are recommended instead. + +- Direct support for the sequence of sequences (or list of lists) multilabel + format is deprecated. To convert to and from the supported binary + indicator matrix format, use + :class:`MultiLabelBinarizer `. + By `Joel Nothman`_. + +- Add score method to :class:`PCA ` following the model of + probabilistic PCA and deprecate + :class:`ProbabilisticPCA ` model whose + score implementation is not correct. The computation now also exploits the + matrix inversion lemma for faster computation. By `Alexandre Gramfort`_. + +- The score method of :class:`FactorAnalysis ` + now returns the average log-likelihood of the samples. Use score_samples + to get log-likelihood of each sample. By `Alexandre Gramfort`_. + +- Generating boolean masks (the setting ``indices=False``) + from cross-validation generators is deprecated. + Support for masks will be removed in 0.17. + The generators have produced arrays of indices by default since 0.10. + By `Joel Nothman`_. + +- 1-d arrays containing strings with ``dtype=object`` (as used in Pandas) + are now considered valid classification targets. This fixes a regression + from version 0.13 in some classifiers. By `Joel Nothman`_. + +- Fix wrong ``explained_variance_ratio_`` attribute in + :class:`RandomizedPCA `. + By `Alexandre Gramfort`_. + +- Fit alphas for each ``l1_ratio`` instead of ``mean_l1_ratio`` in + :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV`. + This changes the shape of ``alphas_`` from ``(n_alphas,)`` to + ``(n_l1_ratio, n_alphas)`` if the ``l1_ratio`` provided is a 1-D array like + object of length greater than one. + By `Manoj Kumar`_. + +- Fix :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV` + when fitting intercept and input data is sparse. The automatic grid + of alphas was not computed correctly and the scaling with normalize + was wrong. By `Manoj Kumar`_. + +- Fix wrong maximal number of features drawn (``max_features``) at each split + for decision trees, random forests and gradient tree boosting. + Previously, the count for the number of drawn features started only after + one non constant features in the split. This bug fix will affect + computational and generalization performance of those algorithms in the + presence of constant features. To get back previous generalization + performance, you should modify the value of ``max_features``. + By `Arnaud Joly`_. + +- Fix wrong maximal number of features drawn (``max_features``) at each split + for :class:`ensemble.ExtraTreesClassifier` and + :class:`ensemble.ExtraTreesRegressor`. Previously, only non constant + features in the split was counted as drawn. Now constant features are + counted as drawn. Furthermore at least one feature must be non constant + in order to make a valid split. This bug fix will affect + computational and generalization performance of extra trees in the + presence of constant features. To get back previous generalization + performance, you should modify the value of ``max_features``. + By `Arnaud Joly`_. + +- Fix :func:`utils.compute_class_weight` when ``class_weight=="auto"``. + Previously it was broken for input of non-integer ``dtype`` and the + weighted array that was returned was wrong. By `Manoj Kumar`_. + +- Fix :class:`cross_validation.Bootstrap` to return ``ValueError`` + when ``n_train + n_test > n``. By :user:`Ronald Phlypo `. People @@ -3320,287 +3325,287 @@ Version 0.14 Changelog --------- - - Missing values with sparse and dense matrices can be imputed with the - transformer :class:`preprocessing.Imputer` by `Nicolas Trésegnie`_. - - - The core implementation of decisions trees has been rewritten from - scratch, allowing for faster tree induction and lower memory - consumption in all tree-based estimators. By `Gilles Louppe`_. - - - Added :class:`ensemble.AdaBoostClassifier` and - :class:`ensemble.AdaBoostRegressor`, by `Noel Dawe`_ and - `Gilles Louppe`_. See the :ref:`AdaBoost ` section of the user - guide for details and examples. - - - Added :class:`grid_search.RandomizedSearchCV` and - :class:`grid_search.ParameterSampler` for randomized hyperparameter - optimization. By `Andreas Müller`_. - - - Added :ref:`biclustering ` algorithms - (:class:`sklearn.cluster.bicluster.SpectralCoclustering` and - :class:`sklearn.cluster.bicluster.SpectralBiclustering`), data - generation methods (:func:`sklearn.datasets.make_biclusters` and - :func:`sklearn.datasets.make_checkerboard`), and scoring metrics - (:func:`sklearn.metrics.consensus_score`). By `Kemal Eren`_. - - - Added :ref:`Restricted Boltzmann Machines` - (:class:`neural_network.BernoulliRBM`). By `Yann Dauphin`_. - - - Python 3 support by :user:`Justin Vincent `, `Lars Buitinck`_, - :user:`Subhodeep Moitra ` and `Olivier Grisel`_. All tests now pass under - Python 3.3. - - - Ability to pass one penalty (alpha value) per target in - :class:`linear_model.Ridge`, by @eickenberg and `Mathieu Blondel`_. - - - Fixed :mod:`sklearn.linear_model.stochastic_gradient.py` L2 regularization - issue (minor practical significance). - By :user:`Norbert Crombach ` and `Mathieu Blondel`_ . - - - Added an interactive version of `Andreas Müller`_'s - `Machine Learning Cheat Sheet (for scikit-learn) - `_ - to the documentation. See :ref:`Choosing the right estimator `. - By `Jaques Grobler`_. - - - :class:`grid_search.GridSearchCV` and - :func:`cross_validation.cross_val_score` now support the use of advanced - scoring function such as area under the ROC curve and f-beta scores. - See :ref:`scoring_parameter` for details. By `Andreas Müller`_ - and `Lars Buitinck`_. - Passing a function from :mod:`sklearn.metrics` as ``score_func`` is - deprecated. - - - Multi-label classification output is now supported by - :func:`metrics.accuracy_score`, :func:`metrics.zero_one_loss`, - :func:`metrics.f1_score`, :func:`metrics.fbeta_score`, - :func:`metrics.classification_report`, - :func:`metrics.precision_score` and :func:`metrics.recall_score` - by `Arnaud Joly`_. - - - Two new metrics :func:`metrics.hamming_loss` and - :func:`metrics.jaccard_similarity_score` - are added with multi-label support by `Arnaud Joly`_. - - - Speed and memory usage improvements in - :class:`feature_extraction.text.CountVectorizer` and - :class:`feature_extraction.text.TfidfVectorizer`, - by Jochen Wersdörfer and Roman Sinayev. - - - The ``min_df`` parameter in - :class:`feature_extraction.text.CountVectorizer` and - :class:`feature_extraction.text.TfidfVectorizer`, which used to be 2, - has been reset to 1 to avoid unpleasant surprises (empty vocabularies) - for novice users who try it out on tiny document collections. - A value of at least 2 is still recommended for practical use. - - - :class:`svm.LinearSVC`, :class:`linear_model.SGDClassifier` and - :class:`linear_model.SGDRegressor` now have a ``sparsify`` method that - converts their ``coef_`` into a sparse matrix, meaning stored models - trained using these estimators can be made much more compact. - - - :class:`linear_model.SGDClassifier` now produces multiclass probability - estimates when trained under log loss or modified Huber loss. - - - Hyperlinks to documentation in example code on the website by - :user:`Martin Luessi `. - - - Fixed bug in :class:`preprocessing.MinMaxScaler` causing incorrect scaling - of the features for non-default ``feature_range`` settings. By `Andreas - Müller`_. - - - ``max_features`` in :class:`tree.DecisionTreeClassifier`, - :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators - now supports percentage values. By `Gilles Louppe`_. - - - Performance improvements in :class:`isotonic.IsotonicRegression` by - `Nelle Varoquaux`_. - - - :func:`metrics.accuracy_score` has an option normalize to return - the fraction or the number of correctly classified sample - by `Arnaud Joly`_. - - - Added :func:`metrics.log_loss` that computes log loss, aka cross-entropy - loss. By Jochen Wersdörfer and `Lars Buitinck`_. +- Missing values with sparse and dense matrices can be imputed with the + transformer :class:`preprocessing.Imputer` by `Nicolas Trésegnie`_. + +- The core implementation of decisions trees has been rewritten from + scratch, allowing for faster tree induction and lower memory + consumption in all tree-based estimators. By `Gilles Louppe`_. + +- Added :class:`ensemble.AdaBoostClassifier` and + :class:`ensemble.AdaBoostRegressor`, by `Noel Dawe`_ and + `Gilles Louppe`_. See the :ref:`AdaBoost ` section of the user + guide for details and examples. + +- Added :class:`grid_search.RandomizedSearchCV` and + :class:`grid_search.ParameterSampler` for randomized hyperparameter + optimization. By `Andreas Müller`_. + +- Added :ref:`biclustering ` algorithms + (:class:`sklearn.cluster.bicluster.SpectralCoclustering` and + :class:`sklearn.cluster.bicluster.SpectralBiclustering`), data + generation methods (:func:`sklearn.datasets.make_biclusters` and + :func:`sklearn.datasets.make_checkerboard`), and scoring metrics + (:func:`sklearn.metrics.consensus_score`). By `Kemal Eren`_. + +- Added :ref:`Restricted Boltzmann Machines` + (:class:`neural_network.BernoulliRBM`). By `Yann Dauphin`_. + +- Python 3 support by :user:`Justin Vincent `, `Lars Buitinck`_, + :user:`Subhodeep Moitra ` and `Olivier Grisel`_. All tests now pass under + Python 3.3. + +- Ability to pass one penalty (alpha value) per target in + :class:`linear_model.Ridge`, by @eickenberg and `Mathieu Blondel`_. + +- Fixed :mod:`sklearn.linear_model.stochastic_gradient.py` L2 regularization + issue (minor practical significance). + By :user:`Norbert Crombach ` and `Mathieu Blondel`_ . + +- Added an interactive version of `Andreas Müller`_'s + `Machine Learning Cheat Sheet (for scikit-learn) + `_ + to the documentation. See :ref:`Choosing the right estimator `. + By `Jaques Grobler`_. + +- :class:`grid_search.GridSearchCV` and + :func:`cross_validation.cross_val_score` now support the use of advanced + scoring function such as area under the ROC curve and f-beta scores. + See :ref:`scoring_parameter` for details. By `Andreas Müller`_ + and `Lars Buitinck`_. + Passing a function from :mod:`sklearn.metrics` as ``score_func`` is + deprecated. + +- Multi-label classification output is now supported by + :func:`metrics.accuracy_score`, :func:`metrics.zero_one_loss`, + :func:`metrics.f1_score`, :func:`metrics.fbeta_score`, + :func:`metrics.classification_report`, + :func:`metrics.precision_score` and :func:`metrics.recall_score` + by `Arnaud Joly`_. + +- Two new metrics :func:`metrics.hamming_loss` and + :func:`metrics.jaccard_similarity_score` + are added with multi-label support by `Arnaud Joly`_. + +- Speed and memory usage improvements in + :class:`feature_extraction.text.CountVectorizer` and + :class:`feature_extraction.text.TfidfVectorizer`, + by Jochen Wersdörfer and Roman Sinayev. + +- The ``min_df`` parameter in + :class:`feature_extraction.text.CountVectorizer` and + :class:`feature_extraction.text.TfidfVectorizer`, which used to be 2, + has been reset to 1 to avoid unpleasant surprises (empty vocabularies) + for novice users who try it out on tiny document collections. + A value of at least 2 is still recommended for practical use. + +- :class:`svm.LinearSVC`, :class:`linear_model.SGDClassifier` and + :class:`linear_model.SGDRegressor` now have a ``sparsify`` method that + converts their ``coef_`` into a sparse matrix, meaning stored models + trained using these estimators can be made much more compact. + +- :class:`linear_model.SGDClassifier` now produces multiclass probability + estimates when trained under log loss or modified Huber loss. + +- Hyperlinks to documentation in example code on the website by + :user:`Martin Luessi `. + +- Fixed bug in :class:`preprocessing.MinMaxScaler` causing incorrect scaling + of the features for non-default ``feature_range`` settings. By `Andreas + Müller`_. + +- ``max_features`` in :class:`tree.DecisionTreeClassifier`, + :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators + now supports percentage values. By `Gilles Louppe`_. + +- Performance improvements in :class:`isotonic.IsotonicRegression` by + `Nelle Varoquaux`_. + +- :func:`metrics.accuracy_score` has an option normalize to return + the fraction or the number of correctly classified sample + by `Arnaud Joly`_. + +- Added :func:`metrics.log_loss` that computes log loss, aka cross-entropy + loss. By Jochen Wersdörfer and `Lars Buitinck`_. - - A bug that caused :class:`ensemble.AdaBoostClassifier`'s to output - incorrect probabilities has been fixed. - - - Feature selectors now share a mixin providing consistent ``transform``, - ``inverse_transform`` and ``get_support`` methods. By `Joel Nothman`_. - - - A fitted :class:`grid_search.GridSearchCV` or - :class:`grid_search.RandomizedSearchCV` can now generally be pickled. - By `Joel Nothman`_. - - - Refactored and vectorized implementation of :func:`metrics.roc_curve` - and :func:`metrics.precision_recall_curve`. By `Joel Nothman`_. +- A bug that caused :class:`ensemble.AdaBoostClassifier`'s to output + incorrect probabilities has been fixed. + +- Feature selectors now share a mixin providing consistent ``transform``, + ``inverse_transform`` and ``get_support`` methods. By `Joel Nothman`_. + +- A fitted :class:`grid_search.GridSearchCV` or + :class:`grid_search.RandomizedSearchCV` can now generally be pickled. + By `Joel Nothman`_. + +- Refactored and vectorized implementation of :func:`metrics.roc_curve` + and :func:`metrics.precision_recall_curve`. By `Joel Nothman`_. - - The new estimator :class:`sklearn.decomposition.TruncatedSVD` - performs dimensionality reduction using SVD on sparse matrices, - and can be used for latent semantic analysis (LSA). - By `Lars Buitinck`_. +- The new estimator :class:`sklearn.decomposition.TruncatedSVD` + performs dimensionality reduction using SVD on sparse matrices, + and can be used for latent semantic analysis (LSA). + By `Lars Buitinck`_. - - Added self-contained example of out-of-core learning on text data - :ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`. - By :user:`Eustache Diemert `. +- Added self-contained example of out-of-core learning on text data + :ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`. + By :user:`Eustache Diemert `. - - The default number of components for - :class:`sklearn.decomposition.RandomizedPCA` is now correctly documented - to be ``n_features``. This was the default behavior, so programs using it - will continue to work as they did. +- The default number of components for + :class:`sklearn.decomposition.RandomizedPCA` is now correctly documented + to be ``n_features``. This was the default behavior, so programs using it + will continue to work as they did. - - :class:`sklearn.cluster.KMeans` now fits several orders of magnitude - faster on sparse data (the speedup depends on the sparsity). By - `Lars Buitinck`_. - - - Reduce memory footprint of FastICA by `Denis Engemann`_ and - `Alexandre Gramfort`_. +- :class:`sklearn.cluster.KMeans` now fits several orders of magnitude + faster on sparse data (the speedup depends on the sparsity). By + `Lars Buitinck`_. + +- Reduce memory footprint of FastICA by `Denis Engemann`_ and + `Alexandre Gramfort`_. - - Verbose output in :mod:`sklearn.ensemble.gradient_boosting` now uses - a column format and prints progress in decreasing frequency. - It also shows the remaining time. By `Peter Prettenhofer`_. +- Verbose output in :mod:`sklearn.ensemble.gradient_boosting` now uses + a column format and prints progress in decreasing frequency. + It also shows the remaining time. By `Peter Prettenhofer`_. - - :mod:`sklearn.ensemble.gradient_boosting` provides out-of-bag improvement - :attr:`~sklearn.ensemble.GradientBoostingRegressor.oob_improvement_` - rather than the OOB score for model selection. An example that shows - how to use OOB estimates to select the number of trees was added. - By `Peter Prettenhofer`_. +- :mod:`sklearn.ensemble.gradient_boosting` provides out-of-bag improvement + :attr:`~sklearn.ensemble.GradientBoostingRegressor.oob_improvement_` + rather than the OOB score for model selection. An example that shows + how to use OOB estimates to select the number of trees was added. + By `Peter Prettenhofer`_. - - Most metrics now support string labels for multiclass classification - by `Arnaud Joly`_ and `Lars Buitinck`_. +- Most metrics now support string labels for multiclass classification + by `Arnaud Joly`_ and `Lars Buitinck`_. - - New OrthogonalMatchingPursuitCV class by `Alexandre Gramfort`_ - and `Vlad Niculae`_. +- New OrthogonalMatchingPursuitCV class by `Alexandre Gramfort`_ + and `Vlad Niculae`_. - - Fixed a bug in :class:`sklearn.covariance.GraphLassoCV`: the - 'alphas' parameter now works as expected when given a list of - values. By Philippe Gervais. +- Fixed a bug in :class:`sklearn.covariance.GraphLassoCV`: the + 'alphas' parameter now works as expected when given a list of + values. By Philippe Gervais. - - Fixed an important bug in :class:`sklearn.covariance.GraphLassoCV` - that prevented all folds provided by a CV object to be used (only - the first 3 were used). When providing a CV object, execution - time may thus increase significantly compared to the previous - version (bug results are correct now). By Philippe Gervais. +- Fixed an important bug in :class:`sklearn.covariance.GraphLassoCV` + that prevented all folds provided by a CV object to be used (only + the first 3 were used). When providing a CV object, execution + time may thus increase significantly compared to the previous + version (bug results are correct now). By Philippe Gervais. - - :class:`cross_validation.cross_val_score` and the :mod:`grid_search` - module is now tested with multi-output data by `Arnaud Joly`_. +- :class:`cross_validation.cross_val_score` and the :mod:`grid_search` + module is now tested with multi-output data by `Arnaud Joly`_. - - :func:`datasets.make_multilabel_classification` can now return - the output in label indicator multilabel format by `Arnaud Joly`_. +- :func:`datasets.make_multilabel_classification` can now return + the output in label indicator multilabel format by `Arnaud Joly`_. - - K-nearest neighbors, :class:`neighbors.KNeighborsRegressor` - and :class:`neighbors.RadiusNeighborsRegressor`, - and radius neighbors, :class:`neighbors.RadiusNeighborsRegressor` and - :class:`neighbors.RadiusNeighborsClassifier` support multioutput data - by `Arnaud Joly`_. +- K-nearest neighbors, :class:`neighbors.KNeighborsRegressor` + and :class:`neighbors.RadiusNeighborsRegressor`, + and radius neighbors, :class:`neighbors.RadiusNeighborsRegressor` and + :class:`neighbors.RadiusNeighborsClassifier` support multioutput data + by `Arnaud Joly`_. - - Random state in LibSVM-based estimators (:class:`svm.SVC`, :class:`NuSVC`, - :class:`OneClassSVM`, :class:`svm.SVR`, :class:`svm.NuSVR`) can now be - controlled. This is useful to ensure consistency in the probability - estimates for the classifiers trained with ``probability=True``. By - `Vlad Niculae`_. +- Random state in LibSVM-based estimators (:class:`svm.SVC`, :class:`NuSVC`, + :class:`OneClassSVM`, :class:`svm.SVR`, :class:`svm.NuSVR`) can now be + controlled. This is useful to ensure consistency in the probability + estimates for the classifiers trained with ``probability=True``. By + `Vlad Niculae`_. - - Out-of-core learning support for discrete naive Bayes classifiers - :class:`sklearn.naive_bayes.MultinomialNB` and - :class:`sklearn.naive_bayes.BernoulliNB` by adding the ``partial_fit`` - method by `Olivier Grisel`_. +- Out-of-core learning support for discrete naive Bayes classifiers + :class:`sklearn.naive_bayes.MultinomialNB` and + :class:`sklearn.naive_bayes.BernoulliNB` by adding the ``partial_fit`` + method by `Olivier Grisel`_. - - New website design and navigation by `Gilles Louppe`_, `Nelle Varoquaux`_, - Vincent Michel and `Andreas Müller`_. +- New website design and navigation by `Gilles Louppe`_, `Nelle Varoquaux`_, + Vincent Michel and `Andreas Müller`_. - - Improved documentation on :ref:`multi-class, multi-label and multi-output - classification ` by `Yannick Schwartz`_ and `Arnaud Joly`_. +- Improved documentation on :ref:`multi-class, multi-label and multi-output + classification ` by `Yannick Schwartz`_ and `Arnaud Joly`_. - - Better input and error handling in the :mod:`metrics` module by - `Arnaud Joly`_ and `Joel Nothman`_. +- Better input and error handling in the :mod:`metrics` module by + `Arnaud Joly`_ and `Joel Nothman`_. - - Speed optimization of the :mod:`hmm` module by :user:`Mikhail Korobov ` +- Speed optimization of the :mod:`hmm` module by :user:`Mikhail Korobov ` - - Significant speed improvements for :class:`sklearn.cluster.DBSCAN` - by `cleverless `_ +- Significant speed improvements for :class:`sklearn.cluster.DBSCAN` + by `cleverless `_ API changes summary ------------------- - - The :func:`auc_score` was renamed :func:`roc_auc_score`. +- The :func:`auc_score` was renamed :func:`roc_auc_score`. - - Testing scikit-learn with ``sklearn.test()`` is deprecated. Use - ``nosetests sklearn`` from the command line. +- Testing scikit-learn with ``sklearn.test()`` is deprecated. Use + ``nosetests sklearn`` from the command line. - - Feature importances in :class:`tree.DecisionTreeClassifier`, - :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators - are now computed on the fly when accessing the ``feature_importances_`` - attribute. Setting ``compute_importances=True`` is no longer required. - By `Gilles Louppe`_. +- Feature importances in :class:`tree.DecisionTreeClassifier`, + :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators + are now computed on the fly when accessing the ``feature_importances_`` + attribute. Setting ``compute_importances=True`` is no longer required. + By `Gilles Louppe`_. - - :class:`linear_model.lasso_path` and - :class:`linear_model.enet_path` can return its results in the same - format as that of :class:`linear_model.lars_path`. This is done by - setting the ``return_models`` parameter to ``False``. By - `Jaques Grobler`_ and `Alexandre Gramfort`_ +- :class:`linear_model.lasso_path` and + :class:`linear_model.enet_path` can return its results in the same + format as that of :class:`linear_model.lars_path`. This is done by + setting the ``return_models`` parameter to ``False``. By + `Jaques Grobler`_ and `Alexandre Gramfort`_ - - :class:`grid_search.IterGrid` was renamed to - :class:`grid_search.ParameterGrid`. +- :class:`grid_search.IterGrid` was renamed to + :class:`grid_search.ParameterGrid`. - - Fixed bug in :class:`KFold` causing imperfect class balance in some - cases. By `Alexandre Gramfort`_ and Tadej Janež. +- Fixed bug in :class:`KFold` causing imperfect class balance in some + cases. By `Alexandre Gramfort`_ and Tadej Janež. - - :class:`sklearn.neighbors.BallTree` has been refactored, and a - :class:`sklearn.neighbors.KDTree` has been - added which shares the same interface. The Ball Tree now works with - a wide variety of distance metrics. Both classes have many new - methods, including single-tree and dual-tree queries, breadth-first - and depth-first searching, and more advanced queries such as - kernel density estimation and 2-point correlation functions. - By `Jake Vanderplas`_ +- :class:`sklearn.neighbors.BallTree` has been refactored, and a + :class:`sklearn.neighbors.KDTree` has been + added which shares the same interface. The Ball Tree now works with + a wide variety of distance metrics. Both classes have many new + methods, including single-tree and dual-tree queries, breadth-first + and depth-first searching, and more advanced queries such as + kernel density estimation and 2-point correlation functions. + By `Jake Vanderplas`_ - - Support for scipy.spatial.cKDTree within neighbors queries has been - removed, and the functionality replaced with the new :class:`KDTree` - class. +- Support for scipy.spatial.cKDTree within neighbors queries has been + removed, and the functionality replaced with the new :class:`KDTree` + class. - - :class:`sklearn.neighbors.KernelDensity` has been added, which performs - efficient kernel density estimation with a variety of kernels. +- :class:`sklearn.neighbors.KernelDensity` has been added, which performs + efficient kernel density estimation with a variety of kernels. - - :class:`sklearn.decomposition.KernelPCA` now always returns output with - ``n_components`` components, unless the new parameter ``remove_zero_eig`` - is set to ``True``. This new behavior is consistent with the way - kernel PCA was always documented; previously, the removal of components - with zero eigenvalues was tacitly performed on all data. +- :class:`sklearn.decomposition.KernelPCA` now always returns output with + ``n_components`` components, unless the new parameter ``remove_zero_eig`` + is set to ``True``. This new behavior is consistent with the way + kernel PCA was always documented; previously, the removal of components + with zero eigenvalues was tacitly performed on all data. - - ``gcv_mode="auto"`` no longer tries to perform SVD on a densified - sparse matrix in :class:`sklearn.linear_model.RidgeCV`. +- ``gcv_mode="auto"`` no longer tries to perform SVD on a densified + sparse matrix in :class:`sklearn.linear_model.RidgeCV`. - - Sparse matrix support in :class:`sklearn.decomposition.RandomizedPCA` - is now deprecated in favor of the new ``TruncatedSVD``. +- Sparse matrix support in :class:`sklearn.decomposition.RandomizedPCA` + is now deprecated in favor of the new ``TruncatedSVD``. - - :class:`cross_validation.KFold` and - :class:`cross_validation.StratifiedKFold` now enforce `n_folds >= 2` - otherwise a ``ValueError`` is raised. By `Olivier Grisel`_. +- :class:`cross_validation.KFold` and + :class:`cross_validation.StratifiedKFold` now enforce `n_folds >= 2` + otherwise a ``ValueError`` is raised. By `Olivier Grisel`_. - - :func:`datasets.load_files`'s ``charset`` and ``charset_errors`` - parameters were renamed ``encoding`` and ``decode_errors``. +- :func:`datasets.load_files`'s ``charset`` and ``charset_errors`` + parameters were renamed ``encoding`` and ``decode_errors``. - - Attribute ``oob_score_`` in :class:`sklearn.ensemble.GradientBoostingRegressor` - and :class:`sklearn.ensemble.GradientBoostingClassifier` - is deprecated and has been replaced by ``oob_improvement_`` . +- Attribute ``oob_score_`` in :class:`sklearn.ensemble.GradientBoostingRegressor` + and :class:`sklearn.ensemble.GradientBoostingClassifier` + is deprecated and has been replaced by ``oob_improvement_`` . - - Attributes in OrthogonalMatchingPursuit have been deprecated - (copy_X, Gram, ...) and precompute_gram renamed precompute - for consistency. See #2224. +- Attributes in OrthogonalMatchingPursuit have been deprecated + (copy_X, Gram, ...) and precompute_gram renamed precompute + for consistency. See #2224. - - :class:`sklearn.preprocessing.StandardScaler` now converts integer input - to float, and raises a warning. Previously it rounded for dense integer - input. +- :class:`sklearn.preprocessing.StandardScaler` now converts integer input + to float, and raises a warning. Previously it rounded for dense integer + input. - - :class:`sklearn.multiclass.OneVsRestClassifier` now has a - ``decision_function`` method. This will return the distance of each - sample from the decision boundary for each class, as long as the - underlying estimators implement the ``decision_function`` method. - By `Kyle Kastner`_. +- :class:`sklearn.multiclass.OneVsRestClassifier` now has a + ``decision_function`` method. This will return the distance of each + sample from the decision boundary for each class, as long as the + underlying estimators implement the ``decision_function`` method. + By `Kyle Kastner`_. - - Better input validation, warning on unexpected shapes for y. +- Better input validation, warning on unexpected shapes for y. People ------ @@ -3707,21 +3712,21 @@ The 0.13.1 release only fixes some bugs and does not add any new functionality. Changelog --------- - - Fixed a testing error caused by the function :func:`cross_validation.train_test_split` being - interpreted as a test by `Yaroslav Halchenko`_. +- Fixed a testing error caused by the function :func:`cross_validation.train_test_split` being + interpreted as a test by `Yaroslav Halchenko`_. - - Fixed a bug in the reassignment of small clusters in the :class:`cluster.MiniBatchKMeans` - by `Gael Varoquaux`_. +- Fixed a bug in the reassignment of small clusters in the :class:`cluster.MiniBatchKMeans` + by `Gael Varoquaux`_. - - Fixed default value of ``gamma`` in :class:`decomposition.KernelPCA` by `Lars Buitinck`_. +- Fixed default value of ``gamma`` in :class:`decomposition.KernelPCA` by `Lars Buitinck`_. - - Updated joblib to ``0.7.0d`` by `Gael Varoquaux`_. +- Updated joblib to ``0.7.0d`` by `Gael Varoquaux`_. - - Fixed scaling of the deviance in :class:`ensemble.GradientBoostingClassifier` by `Peter Prettenhofer`_. +- Fixed scaling of the deviance in :class:`ensemble.GradientBoostingClassifier` by `Peter Prettenhofer`_. - - Better tie-breaking in :class:`multiclass.OneVsOneClassifier` by `Andreas Müller`_. +- Better tie-breaking in :class:`multiclass.OneVsOneClassifier` by `Andreas Müller`_. - - Other small improvements to tests and documentation. +- Other small improvements to tests and documentation. People ------ @@ -3753,263 +3758,263 @@ Version 0.13 New Estimator Classes --------------------- - - :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`, two - data-independent predictors by `Mathieu Blondel`_. Useful to sanity-check - your estimators. See :ref:`dummy_estimators` in the user guide. - Multioutput support added by `Arnaud Joly`_. +- :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`, two + data-independent predictors by `Mathieu Blondel`_. Useful to sanity-check + your estimators. See :ref:`dummy_estimators` in the user guide. + Multioutput support added by `Arnaud Joly`_. - - :class:`decomposition.FactorAnalysis`, a transformer implementing the - classical factor analysis, by `Christian Osendorfer`_ and `Alexandre - Gramfort`_. See :ref:`FA` in the user guide. +- :class:`decomposition.FactorAnalysis`, a transformer implementing the + classical factor analysis, by `Christian Osendorfer`_ and `Alexandre + Gramfort`_. See :ref:`FA` in the user guide. - - :class:`feature_extraction.FeatureHasher`, a transformer implementing the - "hashing trick" for fast, low-memory feature extraction from string fields - by `Lars Buitinck`_ and :class:`feature_extraction.text.HashingVectorizer` - for text documents by `Olivier Grisel`_ See :ref:`feature_hashing` and - :ref:`hashing_vectorizer` for the documentation and sample usage. +- :class:`feature_extraction.FeatureHasher`, a transformer implementing the + "hashing trick" for fast, low-memory feature extraction from string fields + by `Lars Buitinck`_ and :class:`feature_extraction.text.HashingVectorizer` + for text documents by `Olivier Grisel`_ See :ref:`feature_hashing` and + :ref:`hashing_vectorizer` for the documentation and sample usage. - - :class:`pipeline.FeatureUnion`, a transformer that concatenates - results of several other transformers by `Andreas Müller`_. See - :ref:`feature_union` in the user guide. +- :class:`pipeline.FeatureUnion`, a transformer that concatenates + results of several other transformers by `Andreas Müller`_. See + :ref:`feature_union` in the user guide. - - :class:`random_projection.GaussianRandomProjection`, - :class:`random_projection.SparseRandomProjection` and the function - :func:`random_projection.johnson_lindenstrauss_min_dim`. The first two are - transformers implementing Gaussian and sparse random projection matrix - by `Olivier Grisel`_ and `Arnaud Joly`_. - See :ref:`random_projection` in the user guide. +- :class:`random_projection.GaussianRandomProjection`, + :class:`random_projection.SparseRandomProjection` and the function + :func:`random_projection.johnson_lindenstrauss_min_dim`. The first two are + transformers implementing Gaussian and sparse random projection matrix + by `Olivier Grisel`_ and `Arnaud Joly`_. + See :ref:`random_projection` in the user guide. - - :class:`kernel_approximation.Nystroem`, a transformer for approximating - arbitrary kernels by `Andreas Müller`_. See - :ref:`nystroem_kernel_approx` in the user guide. +- :class:`kernel_approximation.Nystroem`, a transformer for approximating + arbitrary kernels by `Andreas Müller`_. See + :ref:`nystroem_kernel_approx` in the user guide. - - :class:`preprocessing.OneHotEncoder`, a transformer that computes binary - encodings of categorical features by `Andreas Müller`_. See - :ref:`preprocessing_categorical_features` in the user guide. +- :class:`preprocessing.OneHotEncoder`, a transformer that computes binary + encodings of categorical features by `Andreas Müller`_. See + :ref:`preprocessing_categorical_features` in the user guide. - - :class:`linear_model.PassiveAggressiveClassifier` and - :class:`linear_model.PassiveAggressiveRegressor`, predictors implementing - an efficient stochastic optimization for linear models by `Rob Zinkov`_ and - `Mathieu Blondel`_. See :ref:`passive_aggressive` in the user - guide. +- :class:`linear_model.PassiveAggressiveClassifier` and + :class:`linear_model.PassiveAggressiveRegressor`, predictors implementing + an efficient stochastic optimization for linear models by `Rob Zinkov`_ and + `Mathieu Blondel`_. See :ref:`passive_aggressive` in the user + guide. - - :class:`ensemble.RandomTreesEmbedding`, a transformer for creating high-dimensional - sparse representations using ensembles of totally random trees by `Andreas Müller`_. - See :ref:`random_trees_embedding` in the user guide. +- :class:`ensemble.RandomTreesEmbedding`, a transformer for creating high-dimensional + sparse representations using ensembles of totally random trees by `Andreas Müller`_. + See :ref:`random_trees_embedding` in the user guide. - - :class:`manifold.SpectralEmbedding` and function - :func:`manifold.spectral_embedding`, implementing the "laplacian - eigenmaps" transformation for non-linear dimensionality reduction by Wei - Li. See :ref:`spectral_embedding` in the user guide. +- :class:`manifold.SpectralEmbedding` and function + :func:`manifold.spectral_embedding`, implementing the "laplacian + eigenmaps" transformation for non-linear dimensionality reduction by Wei + Li. See :ref:`spectral_embedding` in the user guide. - - :class:`isotonic.IsotonicRegression` by `Fabian Pedregosa`_, `Alexandre Gramfort`_ - and `Nelle Varoquaux`_, +- :class:`isotonic.IsotonicRegression` by `Fabian Pedregosa`_, `Alexandre Gramfort`_ + and `Nelle Varoquaux`_, Changelog --------- - - :func:`metrics.zero_one_loss` (formerly ``metrics.zero_one``) now has - option for normalized output that reports the fraction of - misclassifications, rather than the raw number of misclassifications. By - Kyle Beauchamp. +- :func:`metrics.zero_one_loss` (formerly ``metrics.zero_one``) now has + option for normalized output that reports the fraction of + misclassifications, rather than the raw number of misclassifications. By + Kyle Beauchamp. - - :class:`tree.DecisionTreeClassifier` and all derived ensemble models now - support sample weighting, by `Noel Dawe`_ and `Gilles Louppe`_. +- :class:`tree.DecisionTreeClassifier` and all derived ensemble models now + support sample weighting, by `Noel Dawe`_ and `Gilles Louppe`_. - - Speedup improvement when using bootstrap samples in forests of randomized - trees, by `Peter Prettenhofer`_ and `Gilles Louppe`_. +- Speedup improvement when using bootstrap samples in forests of randomized + trees, by `Peter Prettenhofer`_ and `Gilles Louppe`_. - - Partial dependence plots for :ref:`gradient_boosting` in - :func:`ensemble.partial_dependence.partial_dependence` by `Peter - Prettenhofer`_. See :ref:`sphx_glr_auto_examples_ensemble_plot_partial_dependence.py` for an - example. +- Partial dependence plots for :ref:`gradient_boosting` in + :func:`ensemble.partial_dependence.partial_dependence` by `Peter + Prettenhofer`_. See :ref:`sphx_glr_auto_examples_ensemble_plot_partial_dependence.py` for an + example. - - The table of contents on the website has now been made expandable by - `Jaques Grobler`_. +- The table of contents on the website has now been made expandable by + `Jaques Grobler`_. - - :class:`feature_selection.SelectPercentile` now breaks ties - deterministically instead of returning all equally ranked features. +- :class:`feature_selection.SelectPercentile` now breaks ties + deterministically instead of returning all equally ranked features. - - :class:`feature_selection.SelectKBest` and - :class:`feature_selection.SelectPercentile` are more numerically stable - since they use scores, rather than p-values, to rank results. This means - that they might sometimes select different features than they did - previously. +- :class:`feature_selection.SelectKBest` and + :class:`feature_selection.SelectPercentile` are more numerically stable + since they use scores, rather than p-values, to rank results. This means + that they might sometimes select different features than they did + previously. - - Ridge regression and ridge classification fitting with ``sparse_cg`` solver - no longer has quadratic memory complexity, by `Lars Buitinck`_ and - `Fabian Pedregosa`_. +- Ridge regression and ridge classification fitting with ``sparse_cg`` solver + no longer has quadratic memory complexity, by `Lars Buitinck`_ and + `Fabian Pedregosa`_. - - Ridge regression and ridge classification now support a new fast solver - called ``lsqr``, by `Mathieu Blondel`_. +- Ridge regression and ridge classification now support a new fast solver + called ``lsqr``, by `Mathieu Blondel`_. - - Speed up of :func:`metrics.precision_recall_curve` by Conrad Lee. +- Speed up of :func:`metrics.precision_recall_curve` by Conrad Lee. - - Added support for reading/writing svmlight files with pairwise - preference attribute (qid in svmlight file format) in - :func:`datasets.dump_svmlight_file` and - :func:`datasets.load_svmlight_file` by `Fabian Pedregosa`_. +- Added support for reading/writing svmlight files with pairwise + preference attribute (qid in svmlight file format) in + :func:`datasets.dump_svmlight_file` and + :func:`datasets.load_svmlight_file` by `Fabian Pedregosa`_. - - Faster and more robust :func:`metrics.confusion_matrix` and - :ref:`clustering_evaluation` by Wei Li. +- Faster and more robust :func:`metrics.confusion_matrix` and + :ref:`clustering_evaluation` by Wei Li. - - :func:`cross_validation.cross_val_score` now works with precomputed kernels - and affinity matrices, by `Andreas Müller`_. +- :func:`cross_validation.cross_val_score` now works with precomputed kernels + and affinity matrices, by `Andreas Müller`_. - - LARS algorithm made more numerically stable with heuristics to drop - regressors too correlated as well as to stop the path when - numerical noise becomes predominant, by `Gael Varoquaux`_. +- LARS algorithm made more numerically stable with heuristics to drop + regressors too correlated as well as to stop the path when + numerical noise becomes predominant, by `Gael Varoquaux`_. - - Faster implementation of :func:`metrics.precision_recall_curve` by - Conrad Lee. +- Faster implementation of :func:`metrics.precision_recall_curve` by + Conrad Lee. - - New kernel :class:`metrics.chi2_kernel` by `Andreas Müller`_, often used - in computer vision applications. +- New kernel :class:`metrics.chi2_kernel` by `Andreas Müller`_, often used + in computer vision applications. - - Fix of longstanding bug in :class:`naive_bayes.BernoulliNB` fixed by - Shaun Jackman. +- Fix of longstanding bug in :class:`naive_bayes.BernoulliNB` fixed by + Shaun Jackman. - - Implemented ``predict_proba`` in :class:`multiclass.OneVsRestClassifier`, - by Andrew Winterman. +- Implemented ``predict_proba`` in :class:`multiclass.OneVsRestClassifier`, + by Andrew Winterman. - - Improve consistency in gradient boosting: estimators - :class:`ensemble.GradientBoostingRegressor` and - :class:`ensemble.GradientBoostingClassifier` use the estimator - :class:`tree.DecisionTreeRegressor` instead of the - :class:`tree._tree.Tree` data structure by `Arnaud Joly`_. +- Improve consistency in gradient boosting: estimators + :class:`ensemble.GradientBoostingRegressor` and + :class:`ensemble.GradientBoostingClassifier` use the estimator + :class:`tree.DecisionTreeRegressor` instead of the + :class:`tree._tree.Tree` data structure by `Arnaud Joly`_. - - Fixed a floating point exception in the :ref:`decision trees ` - module, by Seberg. +- Fixed a floating point exception in the :ref:`decision trees ` + module, by Seberg. - - Fix :func:`metrics.roc_curve` fails when y_true has only one class - by Wei Li. +- Fix :func:`metrics.roc_curve` fails when y_true has only one class + by Wei Li. - - Add the :func:`metrics.mean_absolute_error` function which computes the - mean absolute error. The :func:`metrics.mean_squared_error`, - :func:`metrics.mean_absolute_error` and - :func:`metrics.r2_score` metrics support multioutput by `Arnaud Joly`_. +- Add the :func:`metrics.mean_absolute_error` function which computes the + mean absolute error. The :func:`metrics.mean_squared_error`, + :func:`metrics.mean_absolute_error` and + :func:`metrics.r2_score` metrics support multioutput by `Arnaud Joly`_. - - Fixed ``class_weight`` support in :class:`svm.LinearSVC` and - :class:`linear_model.LogisticRegression` by `Andreas Müller`_. The meaning - of ``class_weight`` was reversed as erroneously higher weight meant less - positives of a given class in earlier releases. +- Fixed ``class_weight`` support in :class:`svm.LinearSVC` and + :class:`linear_model.LogisticRegression` by `Andreas Müller`_. The meaning + of ``class_weight`` was reversed as erroneously higher weight meant less + positives of a given class in earlier releases. - - Improve narrative documentation and consistency in - :mod:`sklearn.metrics` for regression and classification metrics - by `Arnaud Joly`_. +- Improve narrative documentation and consistency in + :mod:`sklearn.metrics` for regression and classification metrics + by `Arnaud Joly`_. - - Fixed a bug in :class:`sklearn.svm.SVC` when using csr-matrices with - unsorted indices by Xinfan Meng and `Andreas Müller`_. +- Fixed a bug in :class:`sklearn.svm.SVC` when using csr-matrices with + unsorted indices by Xinfan Meng and `Andreas Müller`_. - - :class:`MiniBatchKMeans`: Add random reassignment of cluster centers - with little observations attached to them, by `Gael Varoquaux`_. +- :class:`MiniBatchKMeans`: Add random reassignment of cluster centers + with little observations attached to them, by `Gael Varoquaux`_. API changes summary ------------------- - - Renamed all occurrences of ``n_atoms`` to ``n_components`` for consistency. - This applies to :class:`decomposition.DictionaryLearning`, - :class:`decomposition.MiniBatchDictionaryLearning`, - :func:`decomposition.dict_learning`, :func:`decomposition.dict_learning_online`. +- Renamed all occurrences of ``n_atoms`` to ``n_components`` for consistency. + This applies to :class:`decomposition.DictionaryLearning`, + :class:`decomposition.MiniBatchDictionaryLearning`, + :func:`decomposition.dict_learning`, :func:`decomposition.dict_learning_online`. - - Renamed all occurrences of ``max_iters`` to ``max_iter`` for consistency. - This applies to :class:`semi_supervised.LabelPropagation` and - :class:`semi_supervised.label_propagation.LabelSpreading`. +- Renamed all occurrences of ``max_iters`` to ``max_iter`` for consistency. + This applies to :class:`semi_supervised.LabelPropagation` and + :class:`semi_supervised.label_propagation.LabelSpreading`. - - Renamed all occurrences of ``learn_rate`` to ``learning_rate`` for - consistency in :class:`ensemble.BaseGradientBoosting` and - :class:`ensemble.GradientBoostingRegressor`. +- Renamed all occurrences of ``learn_rate`` to ``learning_rate`` for + consistency in :class:`ensemble.BaseGradientBoosting` and + :class:`ensemble.GradientBoostingRegressor`. - - The module ``sklearn.linear_model.sparse`` is gone. Sparse matrix support - was already integrated into the "regular" linear models. +- The module ``sklearn.linear_model.sparse`` is gone. Sparse matrix support + was already integrated into the "regular" linear models. - - :func:`sklearn.metrics.mean_square_error`, which incorrectly returned the - accumulated error, was removed. Use ``mean_squared_error`` instead. +- :func:`sklearn.metrics.mean_square_error`, which incorrectly returned the + accumulated error, was removed. Use ``mean_squared_error`` instead. - - Passing ``class_weight`` parameters to ``fit`` methods is no longer - supported. Pass them to estimator constructors instead. +- Passing ``class_weight`` parameters to ``fit`` methods is no longer + supported. Pass them to estimator constructors instead. - - GMMs no longer have ``decode`` and ``rvs`` methods. Use the ``score``, - ``predict`` or ``sample`` methods instead. +- GMMs no longer have ``decode`` and ``rvs`` methods. Use the ``score``, + ``predict`` or ``sample`` methods instead. - - The ``solver`` fit option in Ridge regression and classification is now - deprecated and will be removed in v0.14. Use the constructor option - instead. +- The ``solver`` fit option in Ridge regression and classification is now + deprecated and will be removed in v0.14. Use the constructor option + instead. - - :class:`feature_extraction.text.DictVectorizer` now returns sparse - matrices in the CSR format, instead of COO. +- :class:`feature_extraction.text.DictVectorizer` now returns sparse + matrices in the CSR format, instead of COO. - - Renamed ``k`` in :class:`cross_validation.KFold` and - :class:`cross_validation.StratifiedKFold` to ``n_folds``, renamed - ``n_bootstraps`` to ``n_iter`` in ``cross_validation.Bootstrap``. +- Renamed ``k`` in :class:`cross_validation.KFold` and + :class:`cross_validation.StratifiedKFold` to ``n_folds``, renamed + ``n_bootstraps`` to ``n_iter`` in ``cross_validation.Bootstrap``. - - Renamed all occurrences of ``n_iterations`` to ``n_iter`` for consistency. - This applies to :class:`cross_validation.ShuffleSplit`, - :class:`cross_validation.StratifiedShuffleSplit`, - :func:`utils.randomized_range_finder` and :func:`utils.randomized_svd`. +- Renamed all occurrences of ``n_iterations`` to ``n_iter`` for consistency. + This applies to :class:`cross_validation.ShuffleSplit`, + :class:`cross_validation.StratifiedShuffleSplit`, + :func:`utils.randomized_range_finder` and :func:`utils.randomized_svd`. - - Replaced ``rho`` in :class:`linear_model.ElasticNet` and - :class:`linear_model.SGDClassifier` by ``l1_ratio``. The ``rho`` parameter - had different meanings; ``l1_ratio`` was introduced to avoid confusion. - It has the same meaning as previously ``rho`` in - :class:`linear_model.ElasticNet` and ``(1-rho)`` in - :class:`linear_model.SGDClassifier`. +- Replaced ``rho`` in :class:`linear_model.ElasticNet` and + :class:`linear_model.SGDClassifier` by ``l1_ratio``. The ``rho`` parameter + had different meanings; ``l1_ratio`` was introduced to avoid confusion. + It has the same meaning as previously ``rho`` in + :class:`linear_model.ElasticNet` and ``(1-rho)`` in + :class:`linear_model.SGDClassifier`. - - :class:`linear_model.LassoLars` and :class:`linear_model.Lars` now - store a list of paths in the case of multiple targets, rather than - an array of paths. +- :class:`linear_model.LassoLars` and :class:`linear_model.Lars` now + store a list of paths in the case of multiple targets, rather than + an array of paths. - - The attribute ``gmm`` of :class:`hmm.GMMHMM` was renamed to ``gmm_`` - to adhere more strictly with the API. +- The attribute ``gmm`` of :class:`hmm.GMMHMM` was renamed to ``gmm_`` + to adhere more strictly with the API. - - :func:`cluster.spectral_embedding` was moved to - :func:`manifold.spectral_embedding`. +- :func:`cluster.spectral_embedding` was moved to + :func:`manifold.spectral_embedding`. - - Renamed ``eig_tol`` in :func:`manifold.spectral_embedding`, - :class:`cluster.SpectralClustering` to ``eigen_tol``, renamed ``mode`` - to ``eigen_solver``. +- Renamed ``eig_tol`` in :func:`manifold.spectral_embedding`, + :class:`cluster.SpectralClustering` to ``eigen_tol``, renamed ``mode`` + to ``eigen_solver``. - - Renamed ``mode`` in :func:`manifold.spectral_embedding` and - :class:`cluster.SpectralClustering` to ``eigen_solver``. +- Renamed ``mode`` in :func:`manifold.spectral_embedding` and + :class:`cluster.SpectralClustering` to ``eigen_solver``. - - ``classes_`` and ``n_classes_`` attributes of - :class:`tree.DecisionTreeClassifier` and all derived ensemble models are - now flat in case of single output problems and nested in case of - multi-output problems. +- ``classes_`` and ``n_classes_`` attributes of + :class:`tree.DecisionTreeClassifier` and all derived ensemble models are + now flat in case of single output problems and nested in case of + multi-output problems. - - The ``estimators_`` attribute of - :class:`ensemble.gradient_boosting.GradientBoostingRegressor` and - :class:`ensemble.gradient_boosting.GradientBoostingClassifier` is now an - array of :class:'tree.DecisionTreeRegressor'. +- The ``estimators_`` attribute of + :class:`ensemble.gradient_boosting.GradientBoostingRegressor` and + :class:`ensemble.gradient_boosting.GradientBoostingClassifier` is now an + array of :class:'tree.DecisionTreeRegressor'. - - Renamed ``chunk_size`` to ``batch_size`` in - :class:`decomposition.MiniBatchDictionaryLearning` and - :class:`decomposition.MiniBatchSparsePCA` for consistency. +- Renamed ``chunk_size`` to ``batch_size`` in + :class:`decomposition.MiniBatchDictionaryLearning` and + :class:`decomposition.MiniBatchSparsePCA` for consistency. - - :class:`svm.SVC` and :class:`svm.NuSVC` now provide a ``classes_`` - attribute and support arbitrary dtypes for labels ``y``. - Also, the dtype returned by ``predict`` now reflects the dtype of - ``y`` during ``fit`` (used to be ``np.float``). +- :class:`svm.SVC` and :class:`svm.NuSVC` now provide a ``classes_`` + attribute and support arbitrary dtypes for labels ``y``. + Also, the dtype returned by ``predict`` now reflects the dtype of + ``y`` during ``fit`` (used to be ``np.float``). - - Changed default test_size in :func:`cross_validation.train_test_split` - to None, added possibility to infer ``test_size`` from ``train_size`` in - :class:`cross_validation.ShuffleSplit` and - :class:`cross_validation.StratifiedShuffleSplit`. +- Changed default test_size in :func:`cross_validation.train_test_split` + to None, added possibility to infer ``test_size`` from ``train_size`` in + :class:`cross_validation.ShuffleSplit` and + :class:`cross_validation.StratifiedShuffleSplit`. - - Renamed function :func:`sklearn.metrics.zero_one` to - :func:`sklearn.metrics.zero_one_loss`. Be aware that the default behavior - in :func:`sklearn.metrics.zero_one_loss` is different from - :func:`sklearn.metrics.zero_one`: ``normalize=False`` is changed to - ``normalize=True``. +- Renamed function :func:`sklearn.metrics.zero_one` to + :func:`sklearn.metrics.zero_one_loss`. Be aware that the default behavior + in :func:`sklearn.metrics.zero_one_loss` is different from + :func:`sklearn.metrics.zero_one`: ``normalize=False`` is changed to + ``normalize=True``. - - Renamed function :func:`metrics.zero_one_score` to - :func:`metrics.accuracy_score`. +- Renamed function :func:`metrics.zero_one_score` to + :func:`metrics.accuracy_score`. - - :func:`datasets.make_circles` now has the same number of inner and outer points. +- :func:`datasets.make_circles` now has the same number of inner and outer points. - - In the Naive Bayes classifiers, the ``class_prior`` parameter was moved - from ``fit`` to ``__init__``. +- In the Naive Bayes classifiers, the ``class_prior`` parameter was moved + from ``fit`` to ``__init__``. People ------ @@ -4096,27 +4101,27 @@ instead a set of bug fixes Changelog ---------- - - Improved numerical stability in spectral embedding by `Gael - Varoquaux`_ +- Improved numerical stability in spectral embedding by `Gael + Varoquaux`_ - - Doctest under windows 64bit by `Gael Varoquaux`_ +- Doctest under windows 64bit by `Gael Varoquaux`_ - - Documentation fixes for elastic net by `Andreas Müller`_ and - `Alexandre Gramfort`_ +- Documentation fixes for elastic net by `Andreas Müller`_ and + `Alexandre Gramfort`_ - - Proper behavior with fortran-ordered NumPy arrays by `Gael Varoquaux`_ +- Proper behavior with fortran-ordered NumPy arrays by `Gael Varoquaux`_ - - Make GridSearchCV work with non-CSR sparse matrix by `Lars Buitinck`_ +- Make GridSearchCV work with non-CSR sparse matrix by `Lars Buitinck`_ - - Fix parallel computing in MDS by `Gael Varoquaux`_ +- Fix parallel computing in MDS by `Gael Varoquaux`_ - - Fix Unicode support in count vectorizer by `Andreas Müller`_ +- Fix Unicode support in count vectorizer by `Andreas Müller`_ - - Fix MinCovDet breaking with X.shape = (3, 1) by :user:`Virgile Fritsch ` +- Fix MinCovDet breaking with X.shape = (3, 1) by :user:`Virgile Fritsch ` - - Fix clone of SGD objects by `Peter Prettenhofer`_ +- Fix clone of SGD objects by `Peter Prettenhofer`_ - - Stabilize GMM by :user:`Virgile Fritsch ` +- Stabilize GMM by :user:`Virgile Fritsch ` People ------ @@ -4140,137 +4145,137 @@ Version 0.12 Changelog --------- - - Various speed improvements of the :ref:`decision trees ` module, by - `Gilles Louppe`_. +- Various speed improvements of the :ref:`decision trees ` module, by + `Gilles Louppe`_. - - :class:`ensemble.GradientBoostingRegressor` and - :class:`ensemble.GradientBoostingClassifier` now support feature subsampling - via the ``max_features`` argument, by `Peter Prettenhofer`_. +- :class:`ensemble.GradientBoostingRegressor` and + :class:`ensemble.GradientBoostingClassifier` now support feature subsampling + via the ``max_features`` argument, by `Peter Prettenhofer`_. - - Added Huber and Quantile loss functions to - :class:`ensemble.GradientBoostingRegressor`, by `Peter Prettenhofer`_. +- Added Huber and Quantile loss functions to + :class:`ensemble.GradientBoostingRegressor`, by `Peter Prettenhofer`_. - - :ref:`Decision trees ` and :ref:`forests of randomized trees ` - now support multi-output classification and regression problems, by - `Gilles Louppe`_. +- :ref:`Decision trees ` and :ref:`forests of randomized trees ` + now support multi-output classification and regression problems, by + `Gilles Louppe`_. - - Added :class:`preprocessing.LabelEncoder`, a simple utility class to - normalize labels or transform non-numerical labels, by `Mathieu Blondel`_. +- Added :class:`preprocessing.LabelEncoder`, a simple utility class to + normalize labels or transform non-numerical labels, by `Mathieu Blondel`_. - - Added the epsilon-insensitive loss and the ability to make probabilistic - predictions with the modified huber loss in :ref:`sgd`, by - `Mathieu Blondel`_. +- Added the epsilon-insensitive loss and the ability to make probabilistic + predictions with the modified huber loss in :ref:`sgd`, by + `Mathieu Blondel`_. - - Added :ref:`multidimensional_scaling`, by Nelle Varoquaux. +- Added :ref:`multidimensional_scaling`, by Nelle Varoquaux. - - SVMlight file format loader now detects compressed (gzip/bzip2) files and - decompresses them on the fly, by `Lars Buitinck`_. +- SVMlight file format loader now detects compressed (gzip/bzip2) files and + decompresses them on the fly, by `Lars Buitinck`_. - - SVMlight file format serializer now preserves double precision floating - point values, by `Olivier Grisel`_. +- SVMlight file format serializer now preserves double precision floating + point values, by `Olivier Grisel`_. - - A common testing framework for all estimators was added, by `Andreas Müller`_. +- A common testing framework for all estimators was added, by `Andreas Müller`_. - - Understandable error messages for estimators that do not accept - sparse input by `Gael Varoquaux`_ +- Understandable error messages for estimators that do not accept + sparse input by `Gael Varoquaux`_ - - Speedups in hierarchical clustering by `Gael Varoquaux`_. In - particular building the tree now supports early stopping. This is - useful when the number of clusters is not small compared to the - number of samples. +- Speedups in hierarchical clustering by `Gael Varoquaux`_. In + particular building the tree now supports early stopping. This is + useful when the number of clusters is not small compared to the + number of samples. - - Add MultiTaskLasso and MultiTaskElasticNet for joint feature selection, - by `Alexandre Gramfort`_. +- Add MultiTaskLasso and MultiTaskElasticNet for joint feature selection, + by `Alexandre Gramfort`_. - - Added :func:`metrics.auc_score` and - :func:`metrics.average_precision_score` convenience functions by `Andreas - Müller`_. +- Added :func:`metrics.auc_score` and + :func:`metrics.average_precision_score` convenience functions by `Andreas + Müller`_. - - Improved sparse matrix support in the :ref:`feature_selection` - module by `Andreas Müller`_. +- Improved sparse matrix support in the :ref:`feature_selection` + module by `Andreas Müller`_. - - New word boundaries-aware character n-gram analyzer for the - :ref:`text_feature_extraction` module by :user:`@kernc `. +- New word boundaries-aware character n-gram analyzer for the + :ref:`text_feature_extraction` module by :user:`@kernc `. - - Fixed bug in spectral clustering that led to single point clusters - by `Andreas Müller`_. +- Fixed bug in spectral clustering that led to single point clusters + by `Andreas Müller`_. - - In :class:`feature_extraction.text.CountVectorizer`, added an option to - ignore infrequent words, ``min_df`` by `Andreas Müller`_. +- In :class:`feature_extraction.text.CountVectorizer`, added an option to + ignore infrequent words, ``min_df`` by `Andreas Müller`_. - - Add support for multiple targets in some linear models (ElasticNet, Lasso - and OrthogonalMatchingPursuit) by `Vlad Niculae`_ and - `Alexandre Gramfort`_. +- Add support for multiple targets in some linear models (ElasticNet, Lasso + and OrthogonalMatchingPursuit) by `Vlad Niculae`_ and + `Alexandre Gramfort`_. - - Fixes in :class:`decomposition.ProbabilisticPCA` score function by Wei Li. +- Fixes in :class:`decomposition.ProbabilisticPCA` score function by Wei Li. - - Fixed feature importance computation in - :ref:`gradient_boosting`. +- Fixed feature importance computation in + :ref:`gradient_boosting`. API changes summary ------------------- - - The old ``scikits.learn`` package has disappeared; all code should import - from ``sklearn`` instead, which was introduced in 0.9. +- The old ``scikits.learn`` package has disappeared; all code should import + from ``sklearn`` instead, which was introduced in 0.9. - - In :func:`metrics.roc_curve`, the ``thresholds`` array is now returned - with it's order reversed, in order to keep it consistent with the order - of the returned ``fpr`` and ``tpr``. +- In :func:`metrics.roc_curve`, the ``thresholds`` array is now returned + with it's order reversed, in order to keep it consistent with the order + of the returned ``fpr`` and ``tpr``. - - In :class:`hmm` objects, like :class:`hmm.GaussianHMM`, - :class:`hmm.MultinomialHMM`, etc., all parameters must be passed to the - object when initialising it and not through ``fit``. Now ``fit`` will - only accept the data as an input parameter. +- In :class:`hmm` objects, like :class:`hmm.GaussianHMM`, + :class:`hmm.MultinomialHMM`, etc., all parameters must be passed to the + object when initialising it and not through ``fit``. Now ``fit`` will + only accept the data as an input parameter. - - For all SVM classes, a faulty behavior of ``gamma`` was fixed. Previously, - the default gamma value was only computed the first time ``fit`` was called - and then stored. It is now recalculated on every call to ``fit``. +- For all SVM classes, a faulty behavior of ``gamma`` was fixed. Previously, + the default gamma value was only computed the first time ``fit`` was called + and then stored. It is now recalculated on every call to ``fit``. - - All ``Base`` classes are now abstract meta classes so that they can not be - instantiated. +- All ``Base`` classes are now abstract meta classes so that they can not be + instantiated. - - :func:`cluster.ward_tree` now also returns the parent array. This is - necessary for early-stopping in which case the tree is not - completely built. +- :func:`cluster.ward_tree` now also returns the parent array. This is + necessary for early-stopping in which case the tree is not + completely built. - - In :class:`feature_extraction.text.CountVectorizer` the parameters - ``min_n`` and ``max_n`` were joined to the parameter ``n_gram_range`` to - enable grid-searching both at once. +- In :class:`feature_extraction.text.CountVectorizer` the parameters + ``min_n`` and ``max_n`` were joined to the parameter ``n_gram_range`` to + enable grid-searching both at once. - - In :class:`feature_extraction.text.CountVectorizer`, words that appear - only in one document are now ignored by default. To reproduce - the previous behavior, set ``min_df=1``. +- In :class:`feature_extraction.text.CountVectorizer`, words that appear + only in one document are now ignored by default. To reproduce + the previous behavior, set ``min_df=1``. - - Fixed API inconsistency: :meth:`linear_model.SGDClassifier.predict_proba` now - returns 2d array when fit on two classes. +- Fixed API inconsistency: :meth:`linear_model.SGDClassifier.predict_proba` now + returns 2d array when fit on two classes. - - Fixed API inconsistency: :meth:`discriminant_analysis.QuadraticDiscriminantAnalysis.decision_function` - and :meth:`discriminant_analysis.LinearDiscriminantAnalysis.decision_function` now return 1d arrays - when fit on two classes. +- Fixed API inconsistency: :meth:`discriminant_analysis.QuadraticDiscriminantAnalysis.decision_function` + and :meth:`discriminant_analysis.LinearDiscriminantAnalysis.decision_function` now return 1d arrays + when fit on two classes. - - Grid of alphas used for fitting :class:`linear_model.LassoCV` and - :class:`linear_model.ElasticNetCV` is now stored - in the attribute ``alphas_`` rather than overriding the init parameter - ``alphas``. +- Grid of alphas used for fitting :class:`linear_model.LassoCV` and + :class:`linear_model.ElasticNetCV` is now stored + in the attribute ``alphas_`` rather than overriding the init parameter + ``alphas``. - - Linear models when alpha is estimated by cross-validation store - the estimated value in the ``alpha_`` attribute rather than just - ``alpha`` or ``best_alpha``. +- Linear models when alpha is estimated by cross-validation store + the estimated value in the ``alpha_`` attribute rather than just + ``alpha`` or ``best_alpha``. - - :class:`ensemble.GradientBoostingClassifier` now supports - :meth:`ensemble.GradientBoostingClassifier.staged_predict_proba`, and - :meth:`ensemble.GradientBoostingClassifier.staged_predict`. +- :class:`ensemble.GradientBoostingClassifier` now supports + :meth:`ensemble.GradientBoostingClassifier.staged_predict_proba`, and + :meth:`ensemble.GradientBoostingClassifier.staged_predict`. - - :class:`svm.sparse.SVC` and other sparse SVM classes are now deprecated. - The all classes in the :ref:`svm` module now automatically select the - sparse or dense representation base on the input. +- :class:`svm.sparse.SVC` and other sparse SVM classes are now deprecated. + The all classes in the :ref:`svm` module now automatically select the + sparse or dense representation base on the input. - - All clustering algorithms now interpret the array ``X`` given to ``fit`` as - input data, in particular :class:`cluster.SpectralClustering` and - :class:`cluster.AffinityPropagation` which previously expected affinity matrices. +- All clustering algorithms now interpret the array ``X`` given to ``fit`` as + input data, in particular :class:`cluster.SpectralClustering` and + :class:`cluster.AffinityPropagation` which previously expected affinity matrices. - - For clustering algorithms that take the desired number of clusters as a parameter, - this parameter is now called ``n_clusters``. +- For clustering algorithms that take the desired number of clusters as a parameter, + this parameter is now called ``n_clusters``. People @@ -4338,176 +4343,176 @@ Changelog Highlights ............. - - Gradient boosted regression trees (:ref:`gradient_boosting`) - for classification and regression by `Peter Prettenhofer`_ - and `Scott White`_ . +- Gradient boosted regression trees (:ref:`gradient_boosting`) + for classification and regression by `Peter Prettenhofer`_ + and `Scott White`_ . - - Simple dict-based feature loader with support for categorical variables - (:class:`feature_extraction.DictVectorizer`) by `Lars Buitinck`_. +- Simple dict-based feature loader with support for categorical variables + (:class:`feature_extraction.DictVectorizer`) by `Lars Buitinck`_. - - Added Matthews correlation coefficient (:func:`metrics.matthews_corrcoef`) - and added macro and micro average options to - :func:`metrics.precision_score`, :func:`metrics.recall_score` and - :func:`metrics.f1_score` by `Satrajit Ghosh`_. +- Added Matthews correlation coefficient (:func:`metrics.matthews_corrcoef`) + and added macro and micro average options to + :func:`metrics.precision_score`, :func:`metrics.recall_score` and + :func:`metrics.f1_score` by `Satrajit Ghosh`_. - - :ref:`out_of_bag` of generalization error for :ref:`ensemble` - by `Andreas Müller`_. +- :ref:`out_of_bag` of generalization error for :ref:`ensemble` + by `Andreas Müller`_. - - Randomized sparse linear models for feature - selection, by `Alexandre Gramfort`_ and `Gael Varoquaux`_ +- Randomized sparse linear models for feature + selection, by `Alexandre Gramfort`_ and `Gael Varoquaux`_ - - :ref:`label_propagation` for semi-supervised learning, by Clay - Woolam. **Note** the semi-supervised API is still work in progress, - and may change. +- :ref:`label_propagation` for semi-supervised learning, by Clay + Woolam. **Note** the semi-supervised API is still work in progress, + and may change. - - Added BIC/AIC model selection to classical :ref:`gmm` and unified - the API with the remainder of scikit-learn, by `Bertrand Thirion`_ +- Added BIC/AIC model selection to classical :ref:`gmm` and unified + the API with the remainder of scikit-learn, by `Bertrand Thirion`_ - - Added :class:`sklearn.cross_validation.StratifiedShuffleSplit`, which is - a :class:`sklearn.cross_validation.ShuffleSplit` with balanced splits, - by Yannick Schwartz. +- Added :class:`sklearn.cross_validation.StratifiedShuffleSplit`, which is + a :class:`sklearn.cross_validation.ShuffleSplit` with balanced splits, + by Yannick Schwartz. - - :class:`sklearn.neighbors.NearestCentroid` classifier added, along with a - ``shrink_threshold`` parameter, which implements **shrunken centroid - classification**, by `Robert Layton`_. +- :class:`sklearn.neighbors.NearestCentroid` classifier added, along with a + ``shrink_threshold`` parameter, which implements **shrunken centroid + classification**, by `Robert Layton`_. Other changes .............. - - Merged dense and sparse implementations of :ref:`sgd` module and - exposed utility extension types for sequential - datasets ``seq_dataset`` and weight vectors ``weight_vector`` - by `Peter Prettenhofer`_. +- Merged dense and sparse implementations of :ref:`sgd` module and + exposed utility extension types for sequential + datasets ``seq_dataset`` and weight vectors ``weight_vector`` + by `Peter Prettenhofer`_. - - Added ``partial_fit`` (support for online/minibatch learning) and - warm_start to the :ref:`sgd` module by `Mathieu Blondel`_. +- Added ``partial_fit`` (support for online/minibatch learning) and + warm_start to the :ref:`sgd` module by `Mathieu Blondel`_. - - Dense and sparse implementations of :ref:`svm` classes and - :class:`linear_model.LogisticRegression` merged by `Lars Buitinck`_. +- Dense and sparse implementations of :ref:`svm` classes and + :class:`linear_model.LogisticRegression` merged by `Lars Buitinck`_. - - Regressors can now be used as base estimator in the :ref:`multiclass` - module by `Mathieu Blondel`_. +- Regressors can now be used as base estimator in the :ref:`multiclass` + module by `Mathieu Blondel`_. - - Added n_jobs option to :func:`metrics.pairwise.pairwise_distances` - and :func:`metrics.pairwise.pairwise_kernels` for parallel computation, - by `Mathieu Blondel`_. +- Added n_jobs option to :func:`metrics.pairwise.pairwise_distances` + and :func:`metrics.pairwise.pairwise_kernels` for parallel computation, + by `Mathieu Blondel`_. - - :ref:`k_means` can now be run in parallel, using the ``n_jobs`` argument - to either :ref:`k_means` or :class:`KMeans`, by `Robert Layton`_. +- :ref:`k_means` can now be run in parallel, using the ``n_jobs`` argument + to either :ref:`k_means` or :class:`KMeans`, by `Robert Layton`_. - - Improved :ref:`cross_validation` and :ref:`grid_search` documentation - and introduced the new :func:`cross_validation.train_test_split` - helper function by `Olivier Grisel`_ +- Improved :ref:`cross_validation` and :ref:`grid_search` documentation + and introduced the new :func:`cross_validation.train_test_split` + helper function by `Olivier Grisel`_ - - :class:`svm.SVC` members ``coef_`` and ``intercept_`` changed sign for - consistency with ``decision_function``; for ``kernel==linear``, - ``coef_`` was fixed in the one-vs-one case, by `Andreas Müller`_. +- :class:`svm.SVC` members ``coef_`` and ``intercept_`` changed sign for + consistency with ``decision_function``; for ``kernel==linear``, + ``coef_`` was fixed in the one-vs-one case, by `Andreas Müller`_. - - Performance improvements to efficient leave-one-out cross-validated - Ridge regression, esp. for the ``n_samples > n_features`` case, in - :class:`linear_model.RidgeCV`, by Reuben Fletcher-Costin. +- Performance improvements to efficient leave-one-out cross-validated + Ridge regression, esp. for the ``n_samples > n_features`` case, in + :class:`linear_model.RidgeCV`, by Reuben Fletcher-Costin. - - Refactoring and simplification of the :ref:`text_feature_extraction` - API and fixed a bug that caused possible negative IDF, - by `Olivier Grisel`_. +- Refactoring and simplification of the :ref:`text_feature_extraction` + API and fixed a bug that caused possible negative IDF, + by `Olivier Grisel`_. - - Beam pruning option in :class:`_BaseHMM` module has been removed since it - is difficult to Cythonize. If you are interested in contributing a Cython - version, you can use the python version in the git history as a reference. +- Beam pruning option in :class:`_BaseHMM` module has been removed since it + is difficult to Cythonize. If you are interested in contributing a Cython + version, you can use the python version in the git history as a reference. - - Classes in :ref:`neighbors` now support arbitrary Minkowski metric for - nearest neighbors searches. The metric can be specified by argument ``p``. +- Classes in :ref:`neighbors` now support arbitrary Minkowski metric for + nearest neighbors searches. The metric can be specified by argument ``p``. API changes summary ------------------- - - :class:`covariance.EllipticEnvelop` is now deprecated - Please use :class:`covariance.EllipticEnvelope` - instead. +- :class:`covariance.EllipticEnvelop` is now deprecated - Please use :class:`covariance.EllipticEnvelope` + instead. - - ``NeighborsClassifier`` and ``NeighborsRegressor`` are gone in the module - :ref:`neighbors`. Use the classes :class:`KNeighborsClassifier`, - :class:`RadiusNeighborsClassifier`, :class:`KNeighborsRegressor` - and/or :class:`RadiusNeighborsRegressor` instead. +- ``NeighborsClassifier`` and ``NeighborsRegressor`` are gone in the module + :ref:`neighbors`. Use the classes :class:`KNeighborsClassifier`, + :class:`RadiusNeighborsClassifier`, :class:`KNeighborsRegressor` + and/or :class:`RadiusNeighborsRegressor` instead. - - Sparse classes in the :ref:`sgd` module are now deprecated. +- Sparse classes in the :ref:`sgd` module are now deprecated. - - In :class:`mixture.GMM`, :class:`mixture.DPGMM` and :class:`mixture.VBGMM`, - parameters must be passed to an object when initialising it and not through - ``fit``. Now ``fit`` will only accept the data as an input parameter. +- In :class:`mixture.GMM`, :class:`mixture.DPGMM` and :class:`mixture.VBGMM`, + parameters must be passed to an object when initialising it and not through + ``fit``. Now ``fit`` will only accept the data as an input parameter. - - methods ``rvs`` and ``decode`` in :class:`GMM` module are now deprecated. - ``sample`` and ``score`` or ``predict`` should be used instead. +- methods ``rvs`` and ``decode`` in :class:`GMM` module are now deprecated. + ``sample`` and ``score`` or ``predict`` should be used instead. - - attribute ``_scores`` and ``_pvalues`` in univariate feature selection - objects are now deprecated. - ``scores_`` or ``pvalues_`` should be used instead. +- attribute ``_scores`` and ``_pvalues`` in univariate feature selection + objects are now deprecated. + ``scores_`` or ``pvalues_`` should be used instead. - - In :class:`LogisticRegression`, :class:`LinearSVC`, :class:`SVC` and - :class:`NuSVC`, the ``class_weight`` parameter is now an initialization - parameter, not a parameter to fit. This makes grid searches - over this parameter possible. +- In :class:`LogisticRegression`, :class:`LinearSVC`, :class:`SVC` and + :class:`NuSVC`, the ``class_weight`` parameter is now an initialization + parameter, not a parameter to fit. This makes grid searches + over this parameter possible. - - LFW ``data`` is now always shape ``(n_samples, n_features)`` to be - consistent with the Olivetti faces dataset. Use ``images`` and - ``pairs`` attribute to access the natural images shapes instead. +- LFW ``data`` is now always shape ``(n_samples, n_features)`` to be + consistent with the Olivetti faces dataset. Use ``images`` and + ``pairs`` attribute to access the natural images shapes instead. - - In :class:`svm.LinearSVC`, the meaning of the ``multi_class`` parameter - changed. Options now are ``'ovr'`` and ``'crammer_singer'``, with - ``'ovr'`` being the default. This does not change the default behavior - but hopefully is less confusing. +- In :class:`svm.LinearSVC`, the meaning of the ``multi_class`` parameter + changed. Options now are ``'ovr'`` and ``'crammer_singer'``, with + ``'ovr'`` being the default. This does not change the default behavior + but hopefully is less confusing. - - Class :class:`feature_selection.text.Vectorizer` is deprecated and - replaced by :class:`feature_selection.text.TfidfVectorizer`. +- Class :class:`feature_selection.text.Vectorizer` is deprecated and + replaced by :class:`feature_selection.text.TfidfVectorizer`. - - The preprocessor / analyzer nested structure for text feature - extraction has been removed. All those features are - now directly passed as flat constructor arguments - to :class:`feature_selection.text.TfidfVectorizer` and - :class:`feature_selection.text.CountVectorizer`, in particular the - following parameters are now used: +- The preprocessor / analyzer nested structure for text feature + extraction has been removed. All those features are + now directly passed as flat constructor arguments + to :class:`feature_selection.text.TfidfVectorizer` and + :class:`feature_selection.text.CountVectorizer`, in particular the + following parameters are now used: - - ``analyzer`` can be ``'word'`` or ``'char'`` to switch the default - analysis scheme, or use a specific python callable (as previously). +- ``analyzer`` can be ``'word'`` or ``'char'`` to switch the default + analysis scheme, or use a specific python callable (as previously). - - ``tokenizer`` and ``preprocessor`` have been introduced to make it - still possible to customize those steps with the new API. +- ``tokenizer`` and ``preprocessor`` have been introduced to make it + still possible to customize those steps with the new API. - - ``input`` explicitly control how to interpret the sequence passed to - ``fit`` and ``predict``: filenames, file objects or direct (byte or - Unicode) strings. +- ``input`` explicitly control how to interpret the sequence passed to + ``fit`` and ``predict``: filenames, file objects or direct (byte or + Unicode) strings. - - charset decoding is explicit and strict by default. +- charset decoding is explicit and strict by default. - - the ``vocabulary``, fitted or not is now stored in the - ``vocabulary_`` attribute to be consistent with the project - conventions. +- the ``vocabulary``, fitted or not is now stored in the + ``vocabulary_`` attribute to be consistent with the project + conventions. - - Class :class:`feature_selection.text.TfidfVectorizer` now derives directly - from :class:`feature_selection.text.CountVectorizer` to make grid - search trivial. +- Class :class:`feature_selection.text.TfidfVectorizer` now derives directly + from :class:`feature_selection.text.CountVectorizer` to make grid + search trivial. - - methods ``rvs`` in :class:`_BaseHMM` module are now deprecated. - ``sample`` should be used instead. +- methods ``rvs`` in :class:`_BaseHMM` module are now deprecated. + ``sample`` should be used instead. - - Beam pruning option in :class:`_BaseHMM` module is removed since it is - difficult to be Cythonized. If you are interested, you can look in the - history codes by git. +- Beam pruning option in :class:`_BaseHMM` module is removed since it is + difficult to be Cythonized. If you are interested, you can look in the + history codes by git. - - The SVMlight format loader now supports files with both zero-based and - one-based column indices, since both occur "in the wild". +- The SVMlight format loader now supports files with both zero-based and + one-based column indices, since both occur "in the wild". - - Arguments in class :class:`ShuffleSplit` are now consistent with - :class:`StratifiedShuffleSplit`. Arguments ``test_fraction`` and - ``train_fraction`` are deprecated and renamed to ``test_size`` and - ``train_size`` and can accept both ``float`` and ``int``. +- Arguments in class :class:`ShuffleSplit` are now consistent with + :class:`StratifiedShuffleSplit`. Arguments ``test_fraction`` and + ``train_fraction`` are deprecated and renamed to ``test_size`` and + ``train_size`` and can accept both ``float`` and ``int``. - - Arguments in class :class:`Bootstrap` are now consistent with - :class:`StratifiedShuffleSplit`. Arguments ``n_test`` and - ``n_train`` are deprecated and renamed to ``test_size`` and - ``train_size`` and can accept both ``float`` and ``int``. +- Arguments in class :class:`Bootstrap` are now consistent with + :class:`StratifiedShuffleSplit`. Arguments ``n_test`` and + ``n_train`` are deprecated and renamed to ``test_size`` and + ``train_size`` and can accept both ``float`` and ``int``. - - Argument ``p`` added to classes in :ref:`neighbors` to specify an - arbitrary Minkowski metric for nearest neighbors searches. +- Argument ``p`` added to classes in :ref:`neighbors` to specify an + arbitrary Minkowski metric for nearest neighbors searches. People @@ -4572,85 +4577,85 @@ Version 0.10 Changelog --------- - - Python 2.5 compatibility was dropped; the minimum Python version needed - to use scikit-learn is now 2.6. +- Python 2.5 compatibility was dropped; the minimum Python version needed + to use scikit-learn is now 2.6. - - :ref:`sparse_inverse_covariance` estimation using the graph Lasso, with - associated cross-validated estimator, by `Gael Varoquaux`_ +- :ref:`sparse_inverse_covariance` estimation using the graph Lasso, with + associated cross-validated estimator, by `Gael Varoquaux`_ - - New :ref:`Tree ` module by `Brian Holt`_, `Peter Prettenhofer`_, - `Satrajit Ghosh`_ and `Gilles Louppe`_. The module comes with complete - documentation and examples. +- New :ref:`Tree ` module by `Brian Holt`_, `Peter Prettenhofer`_, + `Satrajit Ghosh`_ and `Gilles Louppe`_. The module comes with complete + documentation and examples. - - Fixed a bug in the RFE module by `Gilles Louppe`_ (issue #378). +- Fixed a bug in the RFE module by `Gilles Louppe`_ (issue #378). - - Fixed a memory leak in :ref:`svm` module by `Brian Holt`_ (issue #367). +- Fixed a memory leak in :ref:`svm` module by `Brian Holt`_ (issue #367). - - Faster tests by `Fabian Pedregosa`_ and others. +- Faster tests by `Fabian Pedregosa`_ and others. - - Silhouette Coefficient cluster analysis evaluation metric added as - :func:`sklearn.metrics.silhouette_score` by Robert Layton. +- Silhouette Coefficient cluster analysis evaluation metric added as + :func:`sklearn.metrics.silhouette_score` by Robert Layton. - - Fixed a bug in :ref:`k_means` in the handling of the ``n_init`` parameter: - the clustering algorithm used to be run ``n_init`` times but the last - solution was retained instead of the best solution by `Olivier Grisel`_. +- Fixed a bug in :ref:`k_means` in the handling of the ``n_init`` parameter: + the clustering algorithm used to be run ``n_init`` times but the last + solution was retained instead of the best solution by `Olivier Grisel`_. - - Minor refactoring in :ref:`sgd` module; consolidated dense and sparse - predict methods; Enhanced test time performance by converting model - parameters to fortran-style arrays after fitting (only multi-class). +- Minor refactoring in :ref:`sgd` module; consolidated dense and sparse + predict methods; Enhanced test time performance by converting model + parameters to fortran-style arrays after fitting (only multi-class). - - Adjusted Mutual Information metric added as - :func:`sklearn.metrics.adjusted_mutual_info_score` by Robert Layton. +- Adjusted Mutual Information metric added as + :func:`sklearn.metrics.adjusted_mutual_info_score` by Robert Layton. - - Models like SVC/SVR/LinearSVC/LogisticRegression from libsvm/liblinear - now support scaling of C regularization parameter by the number of - samples by `Alexandre Gramfort`_. +- Models like SVC/SVR/LinearSVC/LogisticRegression from libsvm/liblinear + now support scaling of C regularization parameter by the number of + samples by `Alexandre Gramfort`_. - - New :ref:`Ensemble Methods ` module by `Gilles Louppe`_ and - `Brian Holt`_. The module comes with the random forest algorithm and the - extra-trees method, along with documentation and examples. +- New :ref:`Ensemble Methods ` module by `Gilles Louppe`_ and + `Brian Holt`_. The module comes with the random forest algorithm and the + extra-trees method, along with documentation and examples. - - :ref:`outlier_detection`: outlier and novelty detection, by - :user:`Virgile Fritsch `. +- :ref:`outlier_detection`: outlier and novelty detection, by + :user:`Virgile Fritsch `. - - :ref:`kernel_approximation`: a transform implementing kernel - approximation for fast SGD on non-linear kernels by - `Andreas Müller`_. +- :ref:`kernel_approximation`: a transform implementing kernel + approximation for fast SGD on non-linear kernels by + `Andreas Müller`_. - - Fixed a bug due to atom swapping in :ref:`OMP` by `Vlad Niculae`_. +- Fixed a bug due to atom swapping in :ref:`OMP` by `Vlad Niculae`_. - - :ref:`SparseCoder` by `Vlad Niculae`_. +- :ref:`SparseCoder` by `Vlad Niculae`_. - - :ref:`mini_batch_kmeans` performance improvements by `Olivier Grisel`_. +- :ref:`mini_batch_kmeans` performance improvements by `Olivier Grisel`_. - - :ref:`k_means` support for sparse matrices by `Mathieu Blondel`_. +- :ref:`k_means` support for sparse matrices by `Mathieu Blondel`_. - - Improved documentation for developers and for the :mod:`sklearn.utils` - module, by `Jake Vanderplas`_. +- Improved documentation for developers and for the :mod:`sklearn.utils` + module, by `Jake Vanderplas`_. - - Vectorized 20newsgroups dataset loader - (:func:`sklearn.datasets.fetch_20newsgroups_vectorized`) by - `Mathieu Blondel`_. +- Vectorized 20newsgroups dataset loader + (:func:`sklearn.datasets.fetch_20newsgroups_vectorized`) by + `Mathieu Blondel`_. - - :ref:`multiclass` by `Lars Buitinck`_. +- :ref:`multiclass` by `Lars Buitinck`_. - - Utilities for fast computation of mean and variance for sparse matrices - by `Mathieu Blondel`_. +- Utilities for fast computation of mean and variance for sparse matrices + by `Mathieu Blondel`_. - - Make :func:`sklearn.preprocessing.scale` and - :class:`sklearn.preprocessing.Scaler` work on sparse matrices by - `Olivier Grisel`_ +- Make :func:`sklearn.preprocessing.scale` and + :class:`sklearn.preprocessing.Scaler` work on sparse matrices by + `Olivier Grisel`_ - - Feature importances using decision trees and/or forest of trees, - by `Gilles Louppe`_. +- Feature importances using decision trees and/or forest of trees, + by `Gilles Louppe`_. - - Parallel implementation of forests of randomized trees by - `Gilles Louppe`_. +- Parallel implementation of forests of randomized trees by + `Gilles Louppe`_. - - :class:`sklearn.cross_validation.ShuffleSplit` can subsample the train - sets as well as the test sets by `Olivier Grisel`_. +- :class:`sklearn.cross_validation.ShuffleSplit` can subsample the train + sets as well as the test sets by `Olivier Grisel`_. - - Errors in the build of the documentation fixed by `Andreas Müller`_. +- Errors in the build of the documentation fixed by `Andreas Müller`_. API changes summary @@ -4659,55 +4664,55 @@ API changes summary Here are the code migration instructions when upgrading from scikit-learn version 0.9: - - Some estimators that may overwrite their inputs to save memory previously - had ``overwrite_`` parameters; these have been replaced with ``copy_`` - parameters with exactly the opposite meaning. +- Some estimators that may overwrite their inputs to save memory previously + had ``overwrite_`` parameters; these have been replaced with ``copy_`` + parameters with exactly the opposite meaning. - This particularly affects some of the estimators in :mod:`linear_model`. - The default behavior is still to copy everything passed in. + This particularly affects some of the estimators in :mod:`linear_model`. + The default behavior is still to copy everything passed in. - - The SVMlight dataset loader :func:`sklearn.datasets.load_svmlight_file` no - longer supports loading two files at once; use ``load_svmlight_files`` - instead. Also, the (unused) ``buffer_mb`` parameter is gone. +- The SVMlight dataset loader :func:`sklearn.datasets.load_svmlight_file` no + longer supports loading two files at once; use ``load_svmlight_files`` + instead. Also, the (unused) ``buffer_mb`` parameter is gone. - - Sparse estimators in the :ref:`sgd` module use dense parameter vector - ``coef_`` instead of ``sparse_coef_``. This significantly improves - test time performance. +- Sparse estimators in the :ref:`sgd` module use dense parameter vector + ``coef_`` instead of ``sparse_coef_``. This significantly improves + test time performance. - - The :ref:`covariance` module now has a robust estimator of - covariance, the Minimum Covariance Determinant estimator. +- The :ref:`covariance` module now has a robust estimator of + covariance, the Minimum Covariance Determinant estimator. - - Cluster evaluation metrics in :mod:`metrics.cluster` have been refactored - but the changes are backwards compatible. They have been moved to the - :mod:`metrics.cluster.supervised`, along with - :mod:`metrics.cluster.unsupervised` which contains the Silhouette - Coefficient. +- Cluster evaluation metrics in :mod:`metrics.cluster` have been refactored + but the changes are backwards compatible. They have been moved to the + :mod:`metrics.cluster.supervised`, along with + :mod:`metrics.cluster.unsupervised` which contains the Silhouette + Coefficient. - - The ``permutation_test_score`` function now behaves the same way as - ``cross_val_score`` (i.e. uses the mean score across the folds.) +- The ``permutation_test_score`` function now behaves the same way as + ``cross_val_score`` (i.e. uses the mean score across the folds.) - - Cross Validation generators now use integer indices (``indices=True``) - by default instead of boolean masks. This make it more intuitive to - use with sparse matrix data. +- Cross Validation generators now use integer indices (``indices=True``) + by default instead of boolean masks. This make it more intuitive to + use with sparse matrix data. - - The functions used for sparse coding, ``sparse_encode`` and - ``sparse_encode_parallel`` have been combined into - :func:`sklearn.decomposition.sparse_encode`, and the shapes of the arrays - have been transposed for consistency with the matrix factorization setting, - as opposed to the regression setting. +- The functions used for sparse coding, ``sparse_encode`` and + ``sparse_encode_parallel`` have been combined into + :func:`sklearn.decomposition.sparse_encode`, and the shapes of the arrays + have been transposed for consistency with the matrix factorization setting, + as opposed to the regression setting. - - Fixed an off-by-one error in the SVMlight/LibSVM file format handling; - files generated using :func:`sklearn.datasets.dump_svmlight_file` should be - re-generated. (They should continue to work, but accidentally had one - extra column of zeros prepended.) +- Fixed an off-by-one error in the SVMlight/LibSVM file format handling; + files generated using :func:`sklearn.datasets.dump_svmlight_file` should be + re-generated. (They should continue to work, but accidentally had one + extra column of zeros prepended.) - - ``BaseDictionaryLearning`` class replaced by ``SparseCodingMixin``. +- ``BaseDictionaryLearning`` class replaced by ``SparseCodingMixin``. - - :func:`sklearn.utils.extmath.fast_svd` has been renamed - :func:`sklearn.utils.extmath.randomized_svd` and the default - oversampling is now fixed to 10 additional random vectors instead - of doubling the number of components to extract. The new behavior - follows the reference paper. +- :func:`sklearn.utils.extmath.fast_svd` has been renamed + :func:`sklearn.utils.extmath.randomized_svd` and the default + oversampling is now fixed to 10 additional random vectors instead + of doubling the number of components to extract. The new behavior + follows the reference paper. People @@ -4789,84 +4794,84 @@ This release also includes the dictionary-learning work developed by Changelog --------- - - New :ref:`manifold` module by `Jake Vanderplas`_ and - `Fabian Pedregosa`_. +- New :ref:`manifold` module by `Jake Vanderplas`_ and + `Fabian Pedregosa`_. - - New :ref:`Dirichlet Process ` Gaussian Mixture - Model by `Alexandre Passos`_ +- New :ref:`Dirichlet Process ` Gaussian Mixture + Model by `Alexandre Passos`_ - - :ref:`neighbors` module refactoring by `Jake Vanderplas`_ : - general refactoring, support for sparse matrices in input, speed and - documentation improvements. See the next section for a full list of API - changes. +- :ref:`neighbors` module refactoring by `Jake Vanderplas`_ : + general refactoring, support for sparse matrices in input, speed and + documentation improvements. See the next section for a full list of API + changes. - - Improvements on the :ref:`feature_selection` module by - `Gilles Louppe`_ : refactoring of the RFE classes, documentation - rewrite, increased efficiency and minor API changes. +- Improvements on the :ref:`feature_selection` module by + `Gilles Louppe`_ : refactoring of the RFE classes, documentation + rewrite, increased efficiency and minor API changes. - - :ref:`SparsePCA` by `Vlad Niculae`_, `Gael Varoquaux`_ and - `Alexandre Gramfort`_ +- :ref:`SparsePCA` by `Vlad Niculae`_, `Gael Varoquaux`_ and + `Alexandre Gramfort`_ - - Printing an estimator now behaves independently of architectures - and Python version thanks to :user:`Jean Kossaifi `. +- Printing an estimator now behaves independently of architectures + and Python version thanks to :user:`Jean Kossaifi `. - - :ref:`Loader for libsvm/svmlight format ` by - `Mathieu Blondel`_ and `Lars Buitinck`_ +- :ref:`Loader for libsvm/svmlight format ` by + `Mathieu Blondel`_ and `Lars Buitinck`_ - - Documentation improvements: thumbnails in - example gallery by `Fabian Pedregosa`_. +- Documentation improvements: thumbnails in + example gallery by `Fabian Pedregosa`_. - - Important bugfixes in :ref:`svm` module (segfaults, bad - performance) by `Fabian Pedregosa`_. +- Important bugfixes in :ref:`svm` module (segfaults, bad + performance) by `Fabian Pedregosa`_. - - Added :ref:`multinomial_naive_bayes` and :ref:`bernoulli_naive_bayes` - by `Lars Buitinck`_ +- Added :ref:`multinomial_naive_bayes` and :ref:`bernoulli_naive_bayes` + by `Lars Buitinck`_ - - Text feature extraction optimizations by Lars Buitinck +- Text feature extraction optimizations by Lars Buitinck - - Chi-Square feature selection - (:func:`feature_selection.univariate_selection.chi2`) by `Lars Buitinck`_. +- Chi-Square feature selection + (:func:`feature_selection.univariate_selection.chi2`) by `Lars Buitinck`_. - - :ref:`sample_generators` module refactoring by `Gilles Louppe`_ +- :ref:`sample_generators` module refactoring by `Gilles Louppe`_ - - :ref:`multiclass` by `Mathieu Blondel`_ +- :ref:`multiclass` by `Mathieu Blondel`_ - - Ball tree rewrite by `Jake Vanderplas`_ +- Ball tree rewrite by `Jake Vanderplas`_ - - Implementation of :ref:`dbscan` algorithm by Robert Layton +- Implementation of :ref:`dbscan` algorithm by Robert Layton - - Kmeans predict and transform by Robert Layton +- Kmeans predict and transform by Robert Layton - - Preprocessing module refactoring by `Olivier Grisel`_ +- Preprocessing module refactoring by `Olivier Grisel`_ - - Faster mean shift by Conrad Lee +- Faster mean shift by Conrad Lee - - New ``Bootstrap``, :ref:`ShuffleSplit` and various other - improvements in cross validation schemes by `Olivier Grisel`_ and - `Gael Varoquaux`_ +- New ``Bootstrap``, :ref:`ShuffleSplit` and various other + improvements in cross validation schemes by `Olivier Grisel`_ and + `Gael Varoquaux`_ - - Adjusted Rand index and V-Measure clustering evaluation metrics by `Olivier Grisel`_ +- Adjusted Rand index and V-Measure clustering evaluation metrics by `Olivier Grisel`_ - - Added :class:`Orthogonal Matching Pursuit ` by `Vlad Niculae`_ +- Added :class:`Orthogonal Matching Pursuit ` by `Vlad Niculae`_ - - Added 2D-patch extractor utilities in the :ref:`feature_extraction` module by `Vlad Niculae`_ +- Added 2D-patch extractor utilities in the :ref:`feature_extraction` module by `Vlad Niculae`_ - - Implementation of :class:`linear_model.LassoLarsCV` - (cross-validated Lasso solver using the Lars algorithm) and - :class:`linear_model.LassoLarsIC` (BIC/AIC model - selection in Lars) by `Gael Varoquaux`_ - and `Alexandre Gramfort`_ +- Implementation of :class:`linear_model.LassoLarsCV` + (cross-validated Lasso solver using the Lars algorithm) and + :class:`linear_model.LassoLarsIC` (BIC/AIC model + selection in Lars) by `Gael Varoquaux`_ + and `Alexandre Gramfort`_ - - Scalability improvements to :func:`metrics.roc_curve` by Olivier Hervieu +- Scalability improvements to :func:`metrics.roc_curve` by Olivier Hervieu - - Distance helper functions :func:`metrics.pairwise.pairwise_distances` - and :func:`metrics.pairwise.pairwise_kernels` by Robert Layton +- Distance helper functions :func:`metrics.pairwise.pairwise_distances` + and :func:`metrics.pairwise.pairwise_kernels` by Robert Layton - - :class:`Mini-Batch K-Means ` by Nelle Varoquaux and Peter Prettenhofer. +- :class:`Mini-Batch K-Means ` by Nelle Varoquaux and Peter Prettenhofer. - - :ref:`mldata` utilities by Pietro Berkes. +- :ref:`mldata` utilities by Pietro Berkes. - - :ref:`olivetti_faces` by `David Warde-Farley`_. +- :ref:`olivetti_faces` by `David Warde-Farley`_. API changes summary @@ -4875,71 +4880,71 @@ API changes summary Here are the code migration instructions when upgrading from scikit-learn version 0.8: - - The ``scikits.learn`` package was renamed ``sklearn``. There is - still a ``scikits.learn`` package alias for backward compatibility. +- The ``scikits.learn`` package was renamed ``sklearn``. There is + still a ``scikits.learn`` package alias for backward compatibility. - Third-party projects with a dependency on scikit-learn 0.9+ should - upgrade their codebase. For instance, under Linux / MacOSX just run - (make a backup first!):: + Third-party projects with a dependency on scikit-learn 0.9+ should + upgrade their codebase. For instance, under Linux / MacOSX just run + (make a backup first!):: find -name "*.py" | xargs sed -i 's/\bscikits.learn\b/sklearn/g' - - Estimators no longer accept model parameters as ``fit`` arguments: - instead all parameters must be only be passed as constructor - arguments or using the now public ``set_params`` method inherited - from :class:`base.BaseEstimator`. +- Estimators no longer accept model parameters as ``fit`` arguments: + instead all parameters must be only be passed as constructor + arguments or using the now public ``set_params`` method inherited + from :class:`base.BaseEstimator`. - Some estimators can still accept keyword arguments on the ``fit`` - but this is restricted to data-dependent values (e.g. a Gram matrix - or an affinity matrix that are precomputed from the ``X`` data matrix. + Some estimators can still accept keyword arguments on the ``fit`` + but this is restricted to data-dependent values (e.g. a Gram matrix + or an affinity matrix that are precomputed from the ``X`` data matrix. - - The ``cross_val`` package has been renamed to ``cross_validation`` - although there is also a ``cross_val`` package alias in place for - backward compatibility. +- The ``cross_val`` package has been renamed to ``cross_validation`` + although there is also a ``cross_val`` package alias in place for + backward compatibility. - Third-party projects with a dependency on scikit-learn 0.9+ should - upgrade their codebase. For instance, under Linux / MacOSX just run - (make a backup first!):: + Third-party projects with a dependency on scikit-learn 0.9+ should + upgrade their codebase. For instance, under Linux / MacOSX just run + (make a backup first!):: find -name "*.py" | xargs sed -i 's/\bcross_val\b/cross_validation/g' - - The ``score_func`` argument of the - ``sklearn.cross_validation.cross_val_score`` function is now expected - to accept ``y_test`` and ``y_predicted`` as only arguments for - classification and regression tasks or ``X_test`` for unsupervised - estimators. +- The ``score_func`` argument of the + ``sklearn.cross_validation.cross_val_score`` function is now expected + to accept ``y_test`` and ``y_predicted`` as only arguments for + classification and regression tasks or ``X_test`` for unsupervised + estimators. - - ``gamma`` parameter for support vector machine algorithms is set - to ``1 / n_features`` by default, instead of ``1 / n_samples``. +- ``gamma`` parameter for support vector machine algorithms is set + to ``1 / n_features`` by default, instead of ``1 / n_samples``. - - The ``sklearn.hmm`` has been marked as orphaned: it will be removed - from scikit-learn in version 0.11 unless someone steps up to - contribute documentation, examples and fix lurking numerical - stability issues. +- The ``sklearn.hmm`` has been marked as orphaned: it will be removed + from scikit-learn in version 0.11 unless someone steps up to + contribute documentation, examples and fix lurking numerical + stability issues. - - ``sklearn.neighbors`` has been made into a submodule. The two previously - available estimators, ``NeighborsClassifier`` and ``NeighborsRegressor`` - have been marked as deprecated. Their functionality has been divided - among five new classes: ``NearestNeighbors`` for unsupervised neighbors - searches, ``KNeighborsClassifier`` & ``RadiusNeighborsClassifier`` - for supervised classification problems, and ``KNeighborsRegressor`` - & ``RadiusNeighborsRegressor`` for supervised regression problems. +- ``sklearn.neighbors`` has been made into a submodule. The two previously + available estimators, ``NeighborsClassifier`` and ``NeighborsRegressor`` + have been marked as deprecated. Their functionality has been divided + among five new classes: ``NearestNeighbors`` for unsupervised neighbors + searches, ``KNeighborsClassifier`` & ``RadiusNeighborsClassifier`` + for supervised classification problems, and ``KNeighborsRegressor`` + & ``RadiusNeighborsRegressor`` for supervised regression problems. - - ``sklearn.ball_tree.BallTree`` has been moved to - ``sklearn.neighbors.BallTree``. Using the former will generate a warning. +- ``sklearn.ball_tree.BallTree`` has been moved to + ``sklearn.neighbors.BallTree``. Using the former will generate a warning. - - ``sklearn.linear_model.LARS()`` and related classes (LassoLARS, - LassoLARSCV, etc.) have been renamed to - ``sklearn.linear_model.Lars()``. +- ``sklearn.linear_model.LARS()`` and related classes (LassoLARS, + LassoLARSCV, etc.) have been renamed to + ``sklearn.linear_model.Lars()``. - - All distance metrics and kernels in ``sklearn.metrics.pairwise`` now have a Y - parameter, which by default is None. If not given, the result is the distance - (or kernel similarity) between each sample in Y. If given, the result is the - pairwise distance (or kernel similarity) between samples in X to Y. +- All distance metrics and kernels in ``sklearn.metrics.pairwise`` now have a Y + parameter, which by default is None. If not given, the result is the distance + (or kernel similarity) between each sample in Y. If given, the result is the + pairwise distance (or kernel similarity) between samples in X to Y. - - ``sklearn.metrics.pairwise.l1_distance`` is now called ``manhattan_distance``, - and by default returns the pairwise distance. For the component wise distance, - set the parameter ``sum_over_features`` to ``False``. +- ``sklearn.metrics.pairwise.l1_distance`` is now called ``manhattan_distance``, + and by default returns the pairwise distance. For the component wise distance, + set the parameter ``sum_over_features`` to ``False``. Backward compatibility package aliases and other deprecated classes and functions will be removed in version 0.11. @@ -4950,42 +4955,42 @@ People 38 people contributed to this release. - - 387 `Vlad Niculae`_ - - 320 `Olivier Grisel`_ - - 192 `Lars Buitinck`_ - - 179 `Gael Varoquaux`_ - - 168 `Fabian Pedregosa`_ (`INRIA`_, `Parietal Team`_) - - 127 `Jake Vanderplas`_ - - 120 `Mathieu Blondel`_ - - 85 `Alexandre Passos`_ - - 67 `Alexandre Gramfort`_ - - 57 `Peter Prettenhofer`_ - - 56 `Gilles Louppe`_ - - 42 Robert Layton - - 38 Nelle Varoquaux - - 32 :user:`Jean Kossaifi ` - - 30 Conrad Lee - - 22 Pietro Berkes - - 18 andy - - 17 David Warde-Farley - - 12 Brian Holt - - 11 Robert - - 8 Amit Aides - - 8 :user:`Virgile Fritsch ` - - 7 `Yaroslav Halchenko`_ - - 6 Salvatore Masecchia - - 5 Paolo Losi - - 4 Vincent Schut - - 3 Alexis Metaireau - - 3 Bryan Silverthorn - - 3 `Andreas Müller`_ - - 2 Minwoo Jake Lee - - 1 Emmanuelle Gouillart - - 1 Keith Goodman - - 1 Lucas Wiman - - 1 `Nicolas Pinto`_ - - 1 Thouis (Ray) Jones - - 1 Tim Sheerman-Chase +- 387 `Vlad Niculae`_ +- 320 `Olivier Grisel`_ +- 192 `Lars Buitinck`_ +- 179 `Gael Varoquaux`_ +- 168 `Fabian Pedregosa`_ (`INRIA`_, `Parietal Team`_) +- 127 `Jake Vanderplas`_ +- 120 `Mathieu Blondel`_ +- 85 `Alexandre Passos`_ +- 67 `Alexandre Gramfort`_ +- 57 `Peter Prettenhofer`_ +- 56 `Gilles Louppe`_ +- 42 Robert Layton +- 38 Nelle Varoquaux +- 32 :user:`Jean Kossaifi ` +- 30 Conrad Lee +- 22 Pietro Berkes +- 18 andy +- 17 David Warde-Farley +- 12 Brian Holt +- 11 Robert +- 8 Amit Aides +- 8 :user:`Virgile Fritsch ` +- 7 `Yaroslav Halchenko`_ +- 6 Salvatore Masecchia +- 5 Paolo Losi +- 4 Vincent Schut +- 3 Alexis Metaireau +- 3 Bryan Silverthorn +- 3 `Andreas Müller`_ +- 2 Minwoo Jake Lee +- 1 Emmanuelle Gouillart +- 1 Keith Goodman +- 1 Lucas Wiman +- 1 `Nicolas Pinto`_ +- 1 Thouis (Ray) Jones +- 1 Tim Sheerman-Chase .. _changes_0_8: @@ -5008,53 +5013,53 @@ Changelog Several new modules where introduced during this release: - - New :ref:`hierarchical_clustering` module by Vincent Michel, - `Bertrand Thirion`_, `Alexandre Gramfort`_ and `Gael Varoquaux`_. +- New :ref:`hierarchical_clustering` module by Vincent Michel, + `Bertrand Thirion`_, `Alexandre Gramfort`_ and `Gael Varoquaux`_. - - :ref:`kernel_pca` implementation by `Mathieu Blondel`_ +- :ref:`kernel_pca` implementation by `Mathieu Blondel`_ - - :ref:`labeled_faces_in_the_wild` by `Olivier Grisel`_. +- :ref:`labeled_faces_in_the_wild` by `Olivier Grisel`_. - - New :ref:`cross_decomposition` module by `Edouard Duchesnay`_. +- New :ref:`cross_decomposition` module by `Edouard Duchesnay`_. - - :ref:`NMF` module `Vlad Niculae`_ +- :ref:`NMF` module `Vlad Niculae`_ - - Implementation of the :ref:`oracle_approximating_shrinkage` algorithm by - :user:`Virgile Fritsch ` in the :ref:`covariance` module. +- Implementation of the :ref:`oracle_approximating_shrinkage` algorithm by + :user:`Virgile Fritsch ` in the :ref:`covariance` module. Some other modules benefited from significant improvements or cleanups. - - Initial support for Python 3: builds and imports cleanly, - some modules are usable while others have failing tests by `Fabian Pedregosa`_. +- Initial support for Python 3: builds and imports cleanly, + some modules are usable while others have failing tests by `Fabian Pedregosa`_. - - :class:`decomposition.PCA` is now usable from the Pipeline object by `Olivier Grisel`_. +- :class:`decomposition.PCA` is now usable from the Pipeline object by `Olivier Grisel`_. - - Guide :ref:`performance-howto` by `Olivier Grisel`_. +- Guide :ref:`performance-howto` by `Olivier Grisel`_. - - Fixes for memory leaks in libsvm bindings, 64-bit safer BallTree by Lars Buitinck. +- Fixes for memory leaks in libsvm bindings, 64-bit safer BallTree by Lars Buitinck. - - bug and style fixing in :ref:`k_means` algorithm by Jan Schlüter. +- bug and style fixing in :ref:`k_means` algorithm by Jan Schlüter. - - Add attribute converged to Gaussian Mixture Models by Vincent Schut. +- Add attribute converged to Gaussian Mixture Models by Vincent Schut. - - Implemented ``transform``, ``predict_log_proba`` in - :class:`discriminant_analysis.LinearDiscriminantAnalysis` By `Mathieu Blondel`_. +- Implemented ``transform``, ``predict_log_proba`` in + :class:`discriminant_analysis.LinearDiscriminantAnalysis` By `Mathieu Blondel`_. - - Refactoring in the :ref:`svm` module and bug fixes by `Fabian Pedregosa`_, - `Gael Varoquaux`_ and Amit Aides. +- Refactoring in the :ref:`svm` module and bug fixes by `Fabian Pedregosa`_, + `Gael Varoquaux`_ and Amit Aides. - - Refactored SGD module (removed code duplication, better variable naming), - added interface for sample weight by `Peter Prettenhofer`_. +- Refactored SGD module (removed code duplication, better variable naming), + added interface for sample weight by `Peter Prettenhofer`_. - - Wrapped BallTree with Cython by Thouis (Ray) Jones. +- Wrapped BallTree with Cython by Thouis (Ray) Jones. - - Added function :func:`svm.l1_min_c` by Paolo Losi. +- Added function :func:`svm.l1_min_c` by Paolo Losi. - - Typos, doc style, etc. by `Yaroslav Halchenko`_, `Gael Varoquaux`_, - `Olivier Grisel`_, Yann Malet, `Nicolas Pinto`_, Lars Buitinck and - `Fabian Pedregosa`_. +- Typos, doc style, etc. by `Yaroslav Halchenko`_, `Gael Varoquaux`_, + `Olivier Grisel`_, Yann Malet, `Nicolas Pinto`_, Lars Buitinck and + `Fabian Pedregosa`_. People @@ -5063,17 +5068,17 @@ People People that made this release possible preceded by number of commits: - - 159 `Olivier Grisel`_ - - 96 `Gael Varoquaux`_ - - 96 `Vlad Niculae`_ - - 94 `Fabian Pedregosa`_ - - 36 `Alexandre Gramfort`_ - - 32 Paolo Losi - - 31 `Edouard Duchesnay`_ - - 30 `Mathieu Blondel`_ - - 25 `Peter Prettenhofer`_ - - 22 `Nicolas Pinto`_ - - 11 :user:`Virgile Fritsch ` +- 159 `Olivier Grisel`_ +- 96 `Gael Varoquaux`_ +- 96 `Vlad Niculae`_ +- 94 `Fabian Pedregosa`_ +- 36 `Alexandre Gramfort`_ +- 32 Paolo Losi +- 31 `Edouard Duchesnay`_ +- 30 `Mathieu Blondel`_ +- 25 `Peter Prettenhofer`_ +- 22 `Nicolas Pinto`_ +- 11 :user:`Virgile Fritsch ` - 7 Lars Buitinck - 6 Vincent Michel - 5 `Bertrand Thirion`_ @@ -5107,56 +5112,56 @@ preceding release, no new modules where added to this release. Changelog --------- - - Performance improvements for Gaussian Mixture Model sampling [Jan - Schlüter]. +- Performance improvements for Gaussian Mixture Model sampling [Jan + Schlüter]. - - Implementation of efficient leave-one-out cross-validated Ridge in - :class:`linear_model.RidgeCV` [`Mathieu Blondel`_] +- Implementation of efficient leave-one-out cross-validated Ridge in + :class:`linear_model.RidgeCV` [`Mathieu Blondel`_] - - Better handling of collinearity and early stopping in - :func:`linear_model.lars_path` [`Alexandre Gramfort`_ and `Fabian - Pedregosa`_]. +- Better handling of collinearity and early stopping in + :func:`linear_model.lars_path` [`Alexandre Gramfort`_ and `Fabian + Pedregosa`_]. - - Fixes for liblinear ordering of labels and sign of coefficients - [Dan Yamins, Paolo Losi, `Mathieu Blondel`_ and `Fabian Pedregosa`_]. +- Fixes for liblinear ordering of labels and sign of coefficients + [Dan Yamins, Paolo Losi, `Mathieu Blondel`_ and `Fabian Pedregosa`_]. - - Performance improvements for Nearest Neighbors algorithm in - high-dimensional spaces [`Fabian Pedregosa`_]. +- Performance improvements for Nearest Neighbors algorithm in + high-dimensional spaces [`Fabian Pedregosa`_]. - - Performance improvements for :class:`cluster.KMeans` [`Gael - Varoquaux`_ and `James Bergstra`_]. +- Performance improvements for :class:`cluster.KMeans` [`Gael + Varoquaux`_ and `James Bergstra`_]. - - Sanity checks for SVM-based classes [`Mathieu Blondel`_]. +- Sanity checks for SVM-based classes [`Mathieu Blondel`_]. - - Refactoring of :class:`neighbors.NeighborsClassifier` and - :func:`neighbors.kneighbors_graph`: added different algorithms for - the k-Nearest Neighbor Search and implemented a more stable - algorithm for finding barycenter weights. Also added some - developer documentation for this module, see - `notes_neighbors - `_ for more information [`Fabian Pedregosa`_]. +- Refactoring of :class:`neighbors.NeighborsClassifier` and + :func:`neighbors.kneighbors_graph`: added different algorithms for + the k-Nearest Neighbor Search and implemented a more stable + algorithm for finding barycenter weights. Also added some + developer documentation for this module, see + `notes_neighbors + `_ for more information [`Fabian Pedregosa`_]. - - Documentation improvements: Added :class:`pca.RandomizedPCA` and - :class:`linear_model.LogisticRegression` to the class - reference. Also added references of matrices used for clustering - and other fixes [`Gael Varoquaux`_, `Fabian Pedregosa`_, `Mathieu - Blondel`_, `Olivier Grisel`_, Virgile Fritsch , Emmanuelle - Gouillart] +- Documentation improvements: Added :class:`pca.RandomizedPCA` and + :class:`linear_model.LogisticRegression` to the class + reference. Also added references of matrices used for clustering + and other fixes [`Gael Varoquaux`_, `Fabian Pedregosa`_, `Mathieu + Blondel`_, `Olivier Grisel`_, Virgile Fritsch , Emmanuelle + Gouillart] - - Binded decision_function in classes that make use of liblinear_, - dense and sparse variants, like :class:`svm.LinearSVC` or - :class:`linear_model.LogisticRegression` [`Fabian Pedregosa`_]. +- Binded decision_function in classes that make use of liblinear_, + dense and sparse variants, like :class:`svm.LinearSVC` or + :class:`linear_model.LogisticRegression` [`Fabian Pedregosa`_]. - - Performance and API improvements to - :func:`metrics.euclidean_distances` and to - :class:`pca.RandomizedPCA` [`James Bergstra`_]. +- Performance and API improvements to + :func:`metrics.euclidean_distances` and to + :class:`pca.RandomizedPCA` [`James Bergstra`_]. - - Fix compilation issues under NetBSD [Kamel Ibn Hassen Derouiche] +- Fix compilation issues under NetBSD [Kamel Ibn Hassen Derouiche] - - Allow input sequences of different lengths in :class:`hmm.GaussianHMM` - [`Ron Weiss`_]. +- Allow input sequences of different lengths in :class:`hmm.GaussianHMM` + [`Ron Weiss`_]. - - Fix bug in affinity propagation caused by incorrect indexing [Xinfan Meng] +- Fix bug in affinity propagation caused by incorrect indexing [Xinfan Meng] People @@ -5164,23 +5169,23 @@ People People that made this release possible preceded by number of commits: - - 85 `Fabian Pedregosa`_ - - 67 `Mathieu Blondel`_ - - 20 `Alexandre Gramfort`_ - - 19 `James Bergstra`_ - - 14 Dan Yamins - - 13 `Olivier Grisel`_ - - 12 `Gael Varoquaux`_ - - 4 `Edouard Duchesnay`_ - - 4 `Ron Weiss`_ - - 2 Satrajit Ghosh - - 2 Vincent Dubourg - - 1 Emmanuelle Gouillart - - 1 Kamel Ibn Hassen Derouiche - - 1 Paolo Losi - - 1 VirgileFritsch - - 1 `Yaroslav Halchenko`_ - - 1 Xinfan Meng +- 85 `Fabian Pedregosa`_ +- 67 `Mathieu Blondel`_ +- 20 `Alexandre Gramfort`_ +- 19 `James Bergstra`_ +- 14 Dan Yamins +- 13 `Olivier Grisel`_ +- 12 `Gael Varoquaux`_ +- 4 `Edouard Duchesnay`_ +- 4 `Ron Weiss`_ +- 2 Satrajit Ghosh +- 2 Vincent Dubourg +- 1 Emmanuelle Gouillart +- 1 Kamel Ibn Hassen Derouiche +- 1 Paolo Losi +- 1 VirgileFritsch +- 1 `Yaroslav Halchenko`_ +- 1 Xinfan Meng .. _changes_0_6: @@ -5199,56 +5204,56 @@ applications to real-world datasets. Changelog --------- - - New `stochastic gradient - `_ descent - module by Peter Prettenhofer. The module comes with complete - documentation and examples. +- New `stochastic gradient + `_ descent + module by Peter Prettenhofer. The module comes with complete + documentation and examples. - - Improved svm module: memory consumption has been reduced by 50%, - heuristic to automatically set class weights, possibility to - assign weights to samples (see - :ref:`sphx_glr_auto_examples_svm_plot_weighted_samples.py` for an example). +- Improved svm module: memory consumption has been reduced by 50%, + heuristic to automatically set class weights, possibility to + assign weights to samples (see + :ref:`sphx_glr_auto_examples_svm_plot_weighted_samples.py` for an example). - - New :ref:`gaussian_process` module by Vincent Dubourg. This module - also has great documentation and some very neat examples. See - example_gaussian_process_plot_gp_regression.py or - example_gaussian_process_plot_gp_probabilistic_classification_after_regression.py - for a taste of what can be done. +- New :ref:`gaussian_process` module by Vincent Dubourg. This module + also has great documentation and some very neat examples. See + example_gaussian_process_plot_gp_regression.py or + example_gaussian_process_plot_gp_probabilistic_classification_after_regression.py + for a taste of what can be done. - - It is now possible to use liblinear’s Multi-class SVC (option - multi_class in :class:`svm.LinearSVC`) +- It is now possible to use liblinear’s Multi-class SVC (option + multi_class in :class:`svm.LinearSVC`) - - New features and performance improvements of text feature - extraction. +- New features and performance improvements of text feature + extraction. - - Improved sparse matrix support, both in main classes - (:class:`grid_search.GridSearchCV`) as in modules - sklearn.svm.sparse and sklearn.linear_model.sparse. +- Improved sparse matrix support, both in main classes + (:class:`grid_search.GridSearchCV`) as in modules + sklearn.svm.sparse and sklearn.linear_model.sparse. - - Lots of cool new examples and a new section that uses real-world - datasets was created. These include: - :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py`, - :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py`, - :ref:`sphx_glr_auto_examples_applications_svm_gui.py`, - :ref:`sphx_glr_auto_examples_applications_wikipedia_principal_eigenvector.py` and - others. +- Lots of cool new examples and a new section that uses real-world + datasets was created. These include: + :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py`, + :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py`, + :ref:`sphx_glr_auto_examples_applications_svm_gui.py`, + :ref:`sphx_glr_auto_examples_applications_wikipedia_principal_eigenvector.py` and + others. - - Faster :ref:`least_angle_regression` algorithm. It is now 2x - faster than the R version on worst case and up to 10x times faster - on some cases. +- Faster :ref:`least_angle_regression` algorithm. It is now 2x + faster than the R version on worst case and up to 10x times faster + on some cases. - - Faster coordinate descent algorithm. In particular, the full path - version of lasso (:func:`linear_model.lasso_path`) is more than - 200x times faster than before. +- Faster coordinate descent algorithm. In particular, the full path + version of lasso (:func:`linear_model.lasso_path`) is more than + 200x times faster than before. - - It is now possible to get probability estimates from a - :class:`linear_model.LogisticRegression` model. +- It is now possible to get probability estimates from a + :class:`linear_model.LogisticRegression` model. - - module renaming: the glm module has been renamed to linear_model, - the gmm module has been included into the more general mixture - model and the sgd module has been included in linear_model. +- module renaming: the glm module has been renamed to linear_model, + the gmm module has been included into the more general mixture + model and the sgd module has been included in linear_model. - - Lots of bug fixes and documentation improvements. +- Lots of bug fixes and documentation improvements. People @@ -5298,86 +5303,86 @@ Changelog New classes ----------- - - Support for sparse matrices in some classifiers of modules - ``svm`` and ``linear_model`` (see :class:`svm.sparse.SVC`, - :class:`svm.sparse.SVR`, :class:`svm.sparse.LinearSVC`, - :class:`linear_model.sparse.Lasso`, :class:`linear_model.sparse.ElasticNet`) +- Support for sparse matrices in some classifiers of modules + ``svm`` and ``linear_model`` (see :class:`svm.sparse.SVC`, + :class:`svm.sparse.SVR`, :class:`svm.sparse.LinearSVC`, + :class:`linear_model.sparse.Lasso`, :class:`linear_model.sparse.ElasticNet`) - - New :class:`pipeline.Pipeline` object to compose different estimators. +- New :class:`pipeline.Pipeline` object to compose different estimators. - - Recursive Feature Elimination routines in module - :ref:`feature_selection`. +- Recursive Feature Elimination routines in module + :ref:`feature_selection`. - - Addition of various classes capable of cross validation in the - linear_model module (:class:`linear_model.LassoCV`, :class:`linear_model.ElasticNetCV`, - etc.). +- Addition of various classes capable of cross validation in the + linear_model module (:class:`linear_model.LassoCV`, :class:`linear_model.ElasticNetCV`, + etc.). - - New, more efficient LARS algorithm implementation. The Lasso - variant of the algorithm is also implemented. See - :class:`linear_model.lars_path`, :class:`linear_model.Lars` and - :class:`linear_model.LassoLars`. +- New, more efficient LARS algorithm implementation. The Lasso + variant of the algorithm is also implemented. See + :class:`linear_model.lars_path`, :class:`linear_model.Lars` and + :class:`linear_model.LassoLars`. - - New Hidden Markov Models module (see classes - :class:`hmm.GaussianHMM`, :class:`hmm.MultinomialHMM`, - :class:`hmm.GMMHMM`) +- New Hidden Markov Models module (see classes + :class:`hmm.GaussianHMM`, :class:`hmm.MultinomialHMM`, + :class:`hmm.GMMHMM`) - - New module feature_extraction (see :ref:`class reference - `) +- New module feature_extraction (see :ref:`class reference + `) - - New FastICA algorithm in module sklearn.fastica +- New FastICA algorithm in module sklearn.fastica Documentation ------------- - - Improved documentation for many modules, now separating - narrative documentation from the class reference. As an example, - see `documentation for the SVM module - `_ and the - complete `class reference - `_. +- Improved documentation for many modules, now separating + narrative documentation from the class reference. As an example, + see `documentation for the SVM module + `_ and the + complete `class reference + `_. Fixes ----- - - API changes: adhere variable names to PEP-8, give more - meaningful names. +- API changes: adhere variable names to PEP-8, give more + meaningful names. - - Fixes for svm module to run on a shared memory context - (multiprocessing). +- Fixes for svm module to run on a shared memory context + (multiprocessing). - - It is again possible to generate latex (and thus PDF) from the - sphinx docs. +- It is again possible to generate latex (and thus PDF) from the + sphinx docs. Examples -------- - - new examples using some of the mlcomp datasets: - ``sphx_glr_auto_examples_mlcomp_sparse_document_classification.py`` (since removed) and - :ref:`sphx_glr_auto_examples_text_document_classification_20newsgroups.py` +- new examples using some of the mlcomp datasets: + ``sphx_glr_auto_examples_mlcomp_sparse_document_classification.py`` (since removed) and + :ref:`sphx_glr_auto_examples_text_document_classification_20newsgroups.py` - - Many more examples. `See here - `_ - the full list of examples. +- Many more examples. `See here + `_ + the full list of examples. External dependencies --------------------- - - Joblib is now a dependency of this package, although it is - shipped with (sklearn.externals.joblib). +- Joblib is now a dependency of this package, although it is + shipped with (sklearn.externals.joblib). Removed modules --------------- - - Module ann (Artificial Neural Networks) has been removed from - the distribution. Users wanting this sort of algorithms should - take a look into pybrain. +- Module ann (Artificial Neural Networks) has been removed from + the distribution. Users wanting this sort of algorithms should + take a look into pybrain. Misc ---- - - New sphinx theme for the web page. +- New sphinx theme for the web page. Authors @@ -5411,37 +5416,37 @@ Changelog Major changes in this release include: - - Coordinate Descent algorithm (Lasso, ElasticNet) refactoring & - speed improvements (roughly 100x times faster). +- Coordinate Descent algorithm (Lasso, ElasticNet) refactoring & + speed improvements (roughly 100x times faster). - - Coordinate Descent Refactoring (and bug fixing) for consistency - with R's package GLMNET. +- Coordinate Descent Refactoring (and bug fixing) for consistency + with R's package GLMNET. - - New metrics module. +- New metrics module. - - New GMM module contributed by Ron Weiss. +- New GMM module contributed by Ron Weiss. - - Implementation of the LARS algorithm (without Lasso variant for now). +- Implementation of the LARS algorithm (without Lasso variant for now). - - feature_selection module redesign. +- feature_selection module redesign. - - Migration to GIT as version control system. +- Migration to GIT as version control system. - - Removal of obsolete attrselect module. +- Removal of obsolete attrselect module. - - Rename of private compiled extensions (added underscore). +- Rename of private compiled extensions (added underscore). - - Removal of legacy unmaintained code. +- Removal of legacy unmaintained code. - - Documentation improvements (both docstring and rst). +- Documentation improvements (both docstring and rst). - - Improvement of the build system to (optionally) link with MKL. - Also, provide a lite BLAS implementation in case no system-wide BLAS is - found. +- Improvement of the build system to (optionally) link with MKL. + Also, provide a lite BLAS implementation in case no system-wide BLAS is + found. - - Lots of new examples. +- Lots of new examples. - - Many, many bug fixes ... +- Many, many bug fixes ... Authors From 7b7cc61cf0234afe1a721c39a396a270ce15caa0 Mon Sep 17 00:00:00 2001 From: Fang-Chieh Chou Date: Mon, 24 Jul 2017 08:36:18 -0700 Subject: [PATCH 44/86] Update partial_dependence.py (#9434) Minor fix on the _grid_from_X function. The emp_percentiles variable is computed in the loop but not does not actually change, so it should be pulled out of the loop --- sklearn/ensemble/partial_dependence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/ensemble/partial_dependence.py b/sklearn/ensemble/partial_dependence.py index d4ed3233f44e7..e8bfc2110bb90 100644 --- a/sklearn/ensemble/partial_dependence.py +++ b/sklearn/ensemble/partial_dependence.py @@ -53,13 +53,13 @@ def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100): raise ValueError('percentile values must be in [0, 1]') axes = [] + emp_percentiles = mquantiles(X, prob=percentiles, axis=0) for col in range(X.shape[1]): uniques = np.unique(X[:, col]) if uniques.shape[0] < grid_resolution: # feature has low resolution use unique vals axis = uniques else: - emp_percentiles = mquantiles(X, prob=percentiles, axis=0) # create axis based on percentiles and grid resolution axis = np.linspace(emp_percentiles[0, col], emp_percentiles[1, col], From 3f7095fc8e9210b9819afe012d3b4f9acb52f635 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 24 Jul 2017 19:03:22 -0400 Subject: [PATCH 45/86] remove depreated "plt.hold" that defaults to "on". (#9444) --- examples/plot_kernel_ridge_regression.py | 1 - examples/svm/plot_svm_regression.py | 1 - 2 files changed, 2 deletions(-) diff --git a/examples/plot_kernel_ridge_regression.py b/examples/plot_kernel_ridge_regression.py index cb91908ed5f89..59e22ea3e6969 100644 --- a/examples/plot_kernel_ridge_regression.py +++ b/examples/plot_kernel_ridge_regression.py @@ -104,7 +104,6 @@ zorder=2, edgecolors=(0, 0, 0)) plt.scatter(X[:100], y[:100], c='k', label='data', zorder=1, edgecolors=(0, 0, 0)) -plt.hold('on') plt.plot(X_plot, y_svr, c='r', label='SVR (fit: %.3fs, predict: %.3fs)' % (svr_fit, svr_predict)) plt.plot(X_plot, y_kr, c='g', diff --git a/examples/svm/plot_svm_regression.py b/examples/svm/plot_svm_regression.py index e46675eb0e069..54d2c0b54337b 100644 --- a/examples/svm/plot_svm_regression.py +++ b/examples/svm/plot_svm_regression.py @@ -34,7 +34,6 @@ # Look at the results lw = 2 plt.scatter(X, y, color='darkorange', label='data') -plt.hold('on') plt.plot(X, y_rbf, color='navy', lw=lw, label='RBF model') plt.plot(X, y_lin, color='c', lw=lw, label='Linear model') plt.plot(X, y_poly, color='cornflowerblue', lw=lw, label='Polynomial model') From 26032938391c20deee98de55b23aa84dda3ea9c9 Mon Sep 17 00:00:00 2001 From: Aarshay Jain Date: Tue, 25 Jul 2017 04:26:05 -0400 Subject: [PATCH 46/86] [MRG + 1] Multiclass Documentation update (#9419) --- doc/modules/multiclass.rst | 149 ++++++++++++++++++++++++------------- 1 file changed, 99 insertions(+), 50 deletions(-) diff --git a/doc/modules/multiclass.rst b/doc/modules/multiclass.rst index 983fd416b5a05..5ae785400782d 100644 --- a/doc/modules/multiclass.rst +++ b/doc/modules/multiclass.rst @@ -17,42 +17,42 @@ The :mod:`sklearn.multiclass` module implements *meta-estimators* to solve by decomposing such problems into binary classification problems. Multitarget regression is also supported. - - **Multiclass classification** means a classification task with more than - two classes; e.g., classify a set of images of fruits which may be oranges, - apples, or pears. Multiclass classification makes the assumption that each - sample is assigned to one and only one label: a fruit can be either an - apple or a pear but not both at the same time. - - - **Multilabel classification** assigns to each sample a set of target - labels. This can be thought as predicting properties of a data-point - that are not mutually exclusive, such as topics that are relevant for a - document. A text might be about any of religion, politics, finance or - education at the same time or none of these. - - - **Multioutput regression** assigns each sample a set of target - values. This can be thought of as predicting several properties - for each data-point, such as wind direction and magnitude at a - certain location. - - - **Multioutput-multiclass classification** and **multi-task classification** - means that a single estimator has to handle several joint classification - tasks. This is both a generalization of the multi-label classification - task, which only considers binary classification, as well as a - generalization of the multi-class classification task. *The output format - is a 2d numpy array or sparse matrix.* - - The set of labels can be different for each output variable. - For instance, a sample could be assigned "pear" for an output variable that - takes possible values in a finite set of species such as "pear", "apple"; - and "blue" or "green" for a second output variable that takes possible values - in a finite set of colors such as "green", "red", "blue", "yellow"... - - This means that any classifiers handling multi-output - multiclass or multi-task classification tasks, - support the multi-label classification task as a special case. - Multi-task classification is similar to the multi-output - classification task with different model formulations. For - more information, see the relevant estimator documentation. +- **Multiclass classification** means a classification task with more than + two classes; e.g., classify a set of images of fruits which may be oranges, + apples, or pears. Multiclass classification makes the assumption that each + sample is assigned to one and only one label: a fruit can be either an + apple or a pear but not both at the same time. + +- **Multilabel classification** assigns to each sample a set of target + labels. This can be thought as predicting properties of a data-point + that are not mutually exclusive, such as topics that are relevant for a + document. A text might be about any of religion, politics, finance or + education at the same time or none of these. + +- **Multioutput regression** assigns each sample a set of target + values. This can be thought of as predicting several properties + for each data-point, such as wind direction and magnitude at a + certain location. + +- **Multioutput-multiclass classification** and **multi-task classification** + means that a single estimator has to handle several joint classification + tasks. This is both a generalization of the multi-label classification + task, which only considers binary classification, as well as a + generalization of the multi-class classification task. *The output format + is a 2d numpy array or sparse matrix.* + + The set of labels can be different for each output variable. + For instance, a sample could be assigned "pear" for an output variable that + takes possible values in a finite set of species such as "pear", "apple"; + and "blue" or "green" for a second output variable that takes possible values + in a finite set of colors such as "green", "red", "blue", "yellow"... + + This means that any classifiers handling multi-output + multiclass or multi-task classification tasks, + support the multi-label classification task as a special case. + Multi-task classification is similar to the multi-output + classification task with different model formulations. For + more information, see the relevant estimator documentation. All scikit-learn classifiers are capable of multiclass classification, but the meta-estimators offered by :mod:`sklearn.multiclass` @@ -64,20 +64,69 @@ Below is a summary of the classifiers supported by scikit-learn grouped by strategy; you don't need the meta-estimators in this class if you're using one of these, unless you want custom multiclass behavior: - - Inherently multiclass: :ref:`Naive Bayes `, - :ref:`LDA and QDA `, - :ref:`Decision Trees `, :ref:`Random Forests `, - :ref:`Nearest Neighbors `, - setting ``multi_class='multinomial'`` in - :class:`sklearn.linear_model.LogisticRegression`. - - Support multilabel: :ref:`Decision Trees `, - :ref:`Random Forests `, :ref:`Nearest Neighbors `. - - One-Vs-One: :class:`sklearn.svm.SVC`. - - One-Vs-All: all linear models except :class:`sklearn.svm.SVC`. - -Some estimators also support multioutput-multiclass classification -tasks :ref:`Decision Trees `, :ref:`Random Forests `, -:ref:`Nearest Neighbors `. +- **Inherently multiclass:** + + - :class:`sklearn.naive_bayes.BernoulliNB` + - :class:`sklearn.tree.DecisionTreeClassifier` + - :class:`sklearn.tree.ExtraTreeClassifier` + - :class:`sklearn.ensemble.ExtraTreesClassifier` + - :class:`sklearn.naive_bayes.GaussianNB` + - :class:`sklearn.neighbors.KNeighborsClassifier` + - :class:`sklearn.semi_supervised.LabelPropagation` + - :class:`sklearn.semi_supervised.LabelSpreading` + - :class:`sklearn.discriminant_analysis.LinearDiscriminantAnalysis` + - :class:`sklearn.svm.LinearSVC` (setting multi_class="crammer_singer") + - :class:`sklearn.linear_model.LogisticRegression` (setting multi_class="multinomial") + - :class:`sklearn.linear_model.LogisticRegressionCV` (setting multi_class="multinomial") + - :class:`sklearn.neural_network.MLPClassifier` + - :class:`sklearn.neighbors.NearestCentroid` + - :class:`sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis` + - :class:`sklearn.neighbors.RadiusNeighborsClassifier` + - :class:`sklearn.ensemble.RandomForestClassifier` + - :class:`sklearn.linear_model.RidgeClassifier` + - :class:`sklearn.linear_model.RidgeClassifierCV` + + +- **Multiclass as One-Vs-One:** + + - :class:`sklearn.svm.NuSVC` + - :class:`sklearn.svm.SVC`. + - :class:`sklearn.gaussian_process.GaussianProcessClassifier` (setting multi_class = "one_vs_one") + + +- **Multiclass as One-Vs-All:** + + - :class:`sklearn.ensemble.GradientBoostingClassifier` + - :class:`sklearn.gaussian_process.GaussianProcessClassifier` (setting multi_class = "one_vs_rest") + - :class:`sklearn.svm.LinearSVC` (setting multi_class="ovr") + - :class:`sklearn.linear_model.LogisticRegression` (setting multi_class="ovr") + - :class:`sklearn.linear_model.LogisticRegressionCV` (setting multi_class="ovr") + - :class:`sklearn.linear_model.SGDClassifier` + - :class:`sklearn.linear_model.Perceptron` + - :class:`sklearn.linear_model.PassiveAggressiveClassifier` + + +- **Support multilabel:** + + - :class:`sklearn.tree.DecisionTreeClassifier` + - :class:`sklearn.tree.ExtraTreeClassifier` + - :class:`sklearn.ensemble.ExtraTreesClassifier` + - :class:`sklearn.neighbors.KNeighborsClassifier` + - :class:`sklearn.neural_network.MLPClassifier` + - :class:`sklearn.neighbors.RadiusNeighborsClassifier` + - :class:`sklearn.ensemble.RandomForestClassifier` + - :class:`sklearn.linear_model.RidgeClassifierCV` + + +- **Support multiclass-multioutput:** + + - :class:`sklearn.tree.DecisionTreeClassifier` + - :class:`sklearn.tree.ExtraTreeClassifier` + - :class:`sklearn.ensemble.ExtraTreesClassifier` + - :class:`sklearn.neighbors.KNeighborsClassifier` + - :class:`sklearn.neighbors.RadiusNeighborsClassifier` + - :class:`sklearn.ensemble.RandomForestClassifier` + .. warning:: From 70cb5a7e1500935ef02208554378646840c80bc7 Mon Sep 17 00:00:00 2001 From: Adam Kleczewski Date: Tue, 25 Jul 2017 04:28:01 -0400 Subject: [PATCH 47/86] [MRG+1] Chassifier chain example fix (#9408) --- .../plot_classifier_chain_yeast.py | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/examples/multioutput/plot_classifier_chain_yeast.py b/examples/multioutput/plot_classifier_chain_yeast.py index 4fcdaaf150512..6a90e14dfc379 100644 --- a/examples/multioutput/plot_classifier_chain_yeast.py +++ b/examples/multioutput/plot_classifier_chain_yeast.py @@ -5,12 +5,12 @@ Example of using classifier chain on a multilabel dataset. For this example we will use the `yeast -`_ dataset which -contains 2417 datapoints each with 103 features and 14 possible labels. Each -datapoint has at least one label. As a baseline we first train a logistic -regression classifier for each of the 14 labels. To evaluate the performance -of these classifiers we predict on a held-out test set and calculate the -:ref:`User Guide `. +`_ dataset which contains +2417 datapoints each with 103 features and 14 possible labels. Each +data point has at least one label. As a baseline we first train a logistic +regression classifier for each of the 14 labels. To evaluate the performance of +these classifiers we predict on a held-out test set and calculate the +:ref:`jaccard similarity score `. Next we create 10 classifier chains. Each classifier chain contains a logistic regression model for each of the 14 labels. The models in each @@ -79,7 +79,7 @@ model_scores = [ovr_jaccard_score] + chain_jaccard_scores model_scores.append(ensemble_jaccard_score) -model_names = ('Independent Models', +model_names = ('Independent', 'Chain 1', 'Chain 2', 'Chain 3', @@ -90,21 +90,22 @@ 'Chain 8', 'Chain 9', 'Chain 10', - 'Ensemble Average') + 'Ensemble') -y_pos = np.arange(len(model_names)) -y_pos[1:] += 1 -y_pos[-1] += 1 +x_pos = np.arange(len(model_names)) # Plot the Jaccard similarity scores for the independent model, each of the # chains, and the ensemble (note that the vertical axis on this plot does # not begin at 0). -fig = plt.figure(figsize=(7, 4)) -plt.title('Classifier Chain Ensemble') -plt.xticks(y_pos, model_names, rotation='vertical') -plt.ylabel('Jaccard Similarity Score') -plt.ylim([min(model_scores) * .9, max(model_scores) * 1.1]) +fig, ax = plt.subplots(figsize=(7, 4)) +ax.grid(True) +ax.set_title('Classifier Chain Ensemble Performance Comparison') +ax.set_xticks(x_pos) +ax.set_xticklabels(model_names, rotation='vertical') +ax.set_ylabel('Jaccard Similarity Score') +ax.set_ylim([min(model_scores) * .9, max(model_scores) * 1.1]) colors = ['r'] + ['b'] * len(chain_jaccard_scores) + ['g'] -plt.bar(y_pos, model_scores, align='center', alpha=0.5, color=colors) +ax.bar(x_pos, model_scores, alpha=0.5, color=colors) +plt.tight_layout() plt.show() From 19c3ad73ac58a855a404a7fa83a6639bde8fd206 Mon Sep 17 00:00:00 2001 From: Vilhelm von Ehrenheim Date: Tue, 25 Jul 2017 11:46:17 +0200 Subject: [PATCH 48/86] Fixed incorrect docstring (#9446) --- sklearn/neighbors/approximate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/neighbors/approximate.py b/sklearn/neighbors/approximate.py index ac59305e12378..2f297ce68cc56 100644 --- a/sklearn/neighbors/approximate.py +++ b/sklearn/neighbors/approximate.py @@ -132,9 +132,9 @@ class LSHForest(BaseEstimator, KNeighborsMixin, RadiusNeighborsMixin): radius : float, optinal (default = 1.0) Radius from the data point to its neighbors. This is the parameter - space to use by default for the :meth`radius_neighbors` queries. + space to use by default for the :meth:`radius_neighbors` queries. - n_candidates : int (default = 10) + n_candidates : int (default = 50) Minimum number of candidates evaluated per estimator, assuming enough items meet the `min_hash_match` constraint. From d4bbaddf4578714040a90969408a711fc9101d35 Mon Sep 17 00:00:00 2001 From: hakaa1 Date: Tue, 25 Jul 2017 13:48:57 +0200 Subject: [PATCH 49/86] [MRG+1] retry mechanism for plot_stock_market.py (#9437) --- examples/applications/plot_stock_market.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/examples/applications/plot_stock_market.py b/examples/applications/plot_stock_market.py index f7ad4dcb526b5..8a85b0645cb8c 100644 --- a/examples/applications/plot_stock_market.py +++ b/examples/applications/plot_stock_market.py @@ -77,6 +77,17 @@ # ############################################################################# # Retrieve the data from Internet +def retry(f, n_attempts=3): + "Wrapper function to retry function calls in case of exceptions" + def wrapper(*args, **kwargs): + for i in range(n_attempts): + try: + return f(*args, **kwargs) + except Exception as e: + if i == n_attempts - 1: + raise + return wrapper + def quotes_historical_google(symbol, date1, date2): """Get the historical data from Google finance. @@ -179,8 +190,10 @@ def quotes_historical_google(symbol, date1, date2): symbols, names = np.array(list(symbol_dict.items())).T +# retry is used because quotes_historical_google can temporarily fail +# for various reasons (e.g. empty result from Google API). quotes = [ - quotes_historical_google(symbol, d1, d2) for symbol in symbols + retry(quotes_historical_google)(symbol, d1, d2) for symbol in symbols ] close_prices = np.vstack([q['close'] for q in quotes]) From 6deb8449f5d7c86f3e1298ccb9cdf61308e2635f Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Wed, 26 Jul 2017 02:45:21 +0800 Subject: [PATCH 50/86] [MRG+1] BUG Fix the shrinkage implementation in NearestCentroid (#9219) * fix the shrinkage implementation * update function name * update what's new * update what's new * spelling * confict fix * conflict fix --- doc/whats_new.rst | 4 ++++ sklearn/neighbors/nearest_centroid.py | 2 +- sklearn/neighbors/tests/test_nearest_centroid.py | 14 ++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 8d7728ccbcd39..b4b611a6f84ea 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -61,6 +61,7 @@ random sampling procedures. - :class:`linear_model.LassoLars` (bug fix) - :class:`linear_model.LassoLarsIC` (bug fix) - :class:`manifold.TSNE` (bug fix) +- :class:`neighbors.NearestCentroid` (bug fix) - :class:`semi_supervised.LabelSpreading` (bug fix) - :class:`semi_supervised.LabelPropagation` (bug fix) - tree based models where ``min_weight_fraction_leaf`` is used (enhancement) @@ -534,6 +535,9 @@ Decomposition, manifold learning and clustering - Fix bug where :mod:`mixture` ``sample`` methods did not return as many samples as requested. :issue:`7702` by :user:`Levi John Wolf `. +- Fixed the shrinkage implementation in :class:`neighbors.NearestCentroid`. + :issue:`9219` by `Hanmin Qin `_. + Preprocessing and feature selection - For sparse matrices, :func:`preprocessing.normalize` with ``return_norm=True`` diff --git a/sklearn/neighbors/nearest_centroid.py b/sklearn/neighbors/nearest_centroid.py index d15013a1e299a..ec00ec87aeabf 100644 --- a/sklearn/neighbors/nearest_centroid.py +++ b/sklearn/neighbors/nearest_centroid.py @@ -147,7 +147,7 @@ def fit(self, X, y): dataset_centroid_ = np.mean(X, axis=0) # m parameter for determining deviation - m = np.sqrt((1. / nk) + (1. / n_samples)) + m = np.sqrt((1. / nk) - (1. / n_samples)) # Calculate deviation using the standard deviation of centroids. variance = (X - self.centroids_[y_ind]) ** 2 variance = variance.sum(axis=0) diff --git a/sklearn/neighbors/tests/test_nearest_centroid.py b/sklearn/neighbors/tests/test_nearest_centroid.py index 65a0f7d64e249..e50a2e6f07445 100644 --- a/sklearn/neighbors/tests/test_nearest_centroid.py +++ b/sklearn/neighbors/tests/test_nearest_centroid.py @@ -97,6 +97,20 @@ def test_pickle(): " after pickling (classification).") +def test_shrinkage_correct(): + # Ensure that the shrinking is correct. + # The expected result is calculated by R (pamr), + # which is implemented by the author of the original paper. + # (One need to modify the code to output the new centroid in pamr.predict) + + X = np.array([[0, 1], [1, 0], [1, 1], [2, 0], [6, 8]]) + y = np.array([1, 1, 2, 2, 2]) + clf = NearestCentroid(shrink_threshold=0.1) + clf.fit(X, y) + expected_result = np.array([[0.7787310, 0.8545292], [2.814179, 2.763647]]) + np.testing.assert_array_almost_equal(clf.centroids_, expected_result) + + def test_shrinkage_threshold_decoded_y(): clf = NearestCentroid(shrink_threshold=0.01) y_ind = np.asarray(y) From b050a2cdc93a6820f16fd25308ea279cab2683f4 Mon Sep 17 00:00:00 2001 From: "(Venkat) Raghav, Rajagopalan" Date: Wed, 26 Jul 2017 09:32:35 +0200 Subject: [PATCH 51/86] [MRG] DOC use def instead of lambda in the multimetric example at model_evaluation.rst (#9442) --- doc/modules/model_evaluation.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index d20716c528697..125a48d46b713 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -242,14 +242,14 @@ permitted and will require a wrapper to return a single metric:: >>> # A sample toy binary classification dataset >>> X, y = datasets.make_classification(n_classes=2, random_state=0) >>> svm = LinearSVC(random_state=0) - >>> tp = lambda y_true, y_pred: confusion_matrix(y_true, y_pred)[0, 0] - >>> tn = lambda y_true, y_pred: confusion_matrix(y_true, y_pred)[0, 0] - >>> fp = lambda y_true, y_pred: confusion_matrix(y_true, y_pred)[1, 0] - >>> fn = lambda y_true, y_pred: confusion_matrix(y_true, y_pred)[0, 1] + >>> def tp(y_true, y_pred): return confusion_matrix(y_true, y_pred)[0, 0] + >>> def tn(y_true, y_pred): return confusion_matrix(y_true, y_pred)[0, 0] + >>> def fp(y_true, y_pred): return confusion_matrix(y_true, y_pred)[1, 0] + >>> def fn(y_true, y_pred): return confusion_matrix(y_true, y_pred)[0, 1] >>> scoring = {'tp' : make_scorer(tp), 'tn' : make_scorer(tn), ... 'fp' : make_scorer(fp), 'fn' : make_scorer(fn)} >>> cv_results = cross_validate(svm.fit(X, y), X, y, scoring=scoring) - >>> # Getting the test set false positive scores + >>> # Getting the test set true positive scores >>> print(cv_results['test_tp']) # doctest: +NORMALIZE_WHITESPACE [12 13 15] >>> # Getting the test set false negative scores From 2cc6c52f74d1981ac2a2c6325894d28e3e802860 Mon Sep 17 00:00:00 2001 From: Balakumaran Manoharan Date: Thu, 27 Jul 2017 06:16:01 -0500 Subject: [PATCH 52/86] [MRG+1] Rearrange modules in alphabetical order (#9449) --- doc/modules/classes.rst | 455 ++++++++++++++++++++-------------------- 1 file changed, 225 insertions(+), 230 deletions(-) diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index b41de5c108b5c..128f1c85f13e2 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -41,9 +41,34 @@ Functions base.clone config_context - set_config get_config + set_config + +.. _calibration_ref: +:mod:`sklearn.calibration`: Probability Calibration +=================================================== + +.. automodule:: sklearn.calibration + :no-members: + :no-inherited-members: + +**User guide:** See the :ref:`calibration` section for further details. + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + calibration.CalibratedClassifierCV + + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + calibration.calibration_curve .. _cluster_ref: @@ -80,13 +105,13 @@ Functions :toctree: generated/ :template: function.rst - cluster.estimate_bandwidth - cluster.k_means - cluster.ward_tree cluster.affinity_propagation cluster.dbscan + cluster.estimate_bandwidth + cluster.k_means cluster.mean_shift cluster.spectral_clustering + cluster.ward_tree .. _bicluster_ref: @@ -141,60 +166,21 @@ Classes :template: function.rst covariance.empirical_covariance + covariance.graph_lasso covariance.ledoit_wolf - covariance.shrunk_covariance covariance.oas - covariance.graph_lasso + covariance.shrunk_covariance +.. _cross_decomposition_ref: -:mod:`sklearn.model_selection`: Model Selection -=============================================== +:mod:`sklearn.cross_decomposition`: Cross decomposition +======================================================= -.. automodule:: sklearn.model_selection +.. automodule:: sklearn.cross_decomposition :no-members: :no-inherited-members: -**User guide:** See the :ref:`cross_validation`, :ref:`grid_search` and -:ref:`learning_curve` sections for further details. - -Splitter Classes ----------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - model_selection.KFold - model_selection.GroupKFold - model_selection.StratifiedKFold - model_selection.LeaveOneGroupOut - model_selection.LeavePGroupsOut - model_selection.LeaveOneOut - model_selection.LeavePOut - model_selection.RepeatedKFold - model_selection.RepeatedStratifiedKFold - model_selection.ShuffleSplit - model_selection.GroupShuffleSplit - model_selection.StratifiedShuffleSplit - model_selection.PredefinedSplit - model_selection.TimeSeriesSplit - -Splitter Functions ------------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - model_selection.train_test_split - model_selection.check_cv - -Hyper-parameter optimizers --------------------------- +**User guide:** See the :ref:`cross_decomposition` section for further details. .. currentmodule:: sklearn @@ -202,33 +188,10 @@ Hyper-parameter optimizers :toctree: generated/ :template: class.rst - model_selection.GridSearchCV - model_selection.RandomizedSearchCV - model_selection.ParameterGrid - model_selection.ParameterSampler - - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - model_selection.fit_grid_point - -Model validation ----------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - model_selection.cross_validate - model_selection.cross_val_score - model_selection.cross_val_predict - model_selection.permutation_test_score - model_selection.learning_curve - model_selection.validation_curve + cross_decomposition.CCA + cross_decomposition.PLSCanonical + cross_decomposition.PLSRegression + cross_decomposition.PLSSVD .. _datasets_ref: @@ -251,33 +214,33 @@ Loaders :template: function.rst datasets.clear_data_home - datasets.get_data_home + datasets.dump_svmlight_file datasets.fetch_20newsgroups datasets.fetch_20newsgroups_vectorized + datasets.fetch_california_housing + datasets.fetch_covtype + datasets.fetch_kddcup99 + datasets.fetch_lfw_pairs + datasets.fetch_lfw_people + datasets.fetch_mldata + datasets.fetch_olivetti_faces + datasets.fetch_rcv1 + datasets.fetch_species_distributions + datasets.get_data_home datasets.load_boston datasets.load_breast_cancer datasets.load_diabetes datasets.load_digits datasets.load_files datasets.load_iris - datasets.load_wine - datasets.fetch_lfw_pairs - datasets.fetch_lfw_people datasets.load_linnerud - datasets.mldata_filename - datasets.fetch_mldata - datasets.fetch_olivetti_faces - datasets.fetch_california_housing - datasets.fetch_covtype - datasets.fetch_kddcup99 - datasets.fetch_rcv1 datasets.load_mlcomp datasets.load_sample_image datasets.load_sample_images - datasets.fetch_species_distributions datasets.load_svmlight_file datasets.load_svmlight_files - datasets.dump_svmlight_file + datasets.load_wine + datasets.mldata_filename Samples generator ----------------- @@ -288,9 +251,11 @@ Samples generator :toctree: generated/ :template: function.rst + datasets.make_biclusters datasets.make_blobs - datasets.make_classification + datasets.make_checkerboard datasets.make_circles + datasets.make_classification datasets.make_friedman1 datasets.make_friedman2 datasets.make_friedman3 @@ -306,8 +271,6 @@ Samples generator datasets.make_sparse_uncorrelated datasets.make_spd_matrix datasets.make_swiss_roll - datasets.make_biclusters - datasets.make_checkerboard .. _decomposition_ref: @@ -327,29 +290,49 @@ Samples generator :toctree: generated/ :template: class.rst - decomposition.PCA - decomposition.IncrementalPCA - decomposition.KernelPCA + decomposition.DictionaryLearning decomposition.FactorAnalysis decomposition.FastICA - decomposition.TruncatedSVD + decomposition.IncrementalPCA + decomposition.KernelPCA + decomposition.LatentDirichletAllocation + decomposition.MiniBatchDictionaryLearning + decomposition.MiniBatchSparsePCA decomposition.NMF + decomposition.PCA decomposition.SparsePCA - decomposition.MiniBatchSparsePCA decomposition.SparseCoder - decomposition.DictionaryLearning - decomposition.MiniBatchDictionaryLearning - decomposition.LatentDirichletAllocation + decomposition.TruncatedSVD .. autosummary:: :toctree: generated/ :template: function.rst - decomposition.fastica decomposition.dict_learning decomposition.dict_learning_online + decomposition.fastica decomposition.sparse_encode +.. _lda_ref: + +:mod:`sklearn.discriminant_analysis`: Discriminant Analysis +=========================================================== + +.. automodule:: sklearn.discriminant_analysis + :no-members: + :no-inherited-members: + +**User guide:** See the :ref:`lda_qda` section for further details. + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated + :template: class.rst + + discriminant_analysis.LinearDiscriminantAnalysis + discriminant_analysis.QuadraticDiscriminantAnalysis + .. _dummy_ref: :mod:`sklearn.dummy`: Dummy estimators @@ -401,8 +384,8 @@ Samples generator ensemble.GradientBoostingRegressor ensemble.IsolationForest ensemble.RandomForestClassifier - ensemble.RandomTreesEmbedding ensemble.RandomForestRegressor + ensemble.RandomTreesEmbedding ensemble.VotingClassifier .. autosummary:: @@ -442,13 +425,13 @@ partial dependence :toctree: generated/ :template: class_without_init.rst - exceptions.NotFittedError exceptions.ChangedBehaviorWarning exceptions.ConvergenceWarning exceptions.DataConversionWarning exceptions.DataDimensionalityWarning exceptions.EfficiencyWarning exceptions.FitFailedWarning + exceptions.NotFittedError exceptions.NonBLASDotWarning exceptions.UndefinedMetricWarning @@ -485,9 +468,9 @@ From images :toctree: generated/ :template: function.rst - feature_extraction.image.img_to_graph - feature_extraction.image.grid_to_graph feature_extraction.image.extract_patches_2d + feature_extraction.image.grid_to_graph + feature_extraction.image.img_to_graph feature_extraction.image.reconstruct_from_patches_2d :template: class.rst @@ -571,8 +554,8 @@ From text :toctree: generated/ :template: class.rst - gaussian_process.GaussianProcessRegressor gaussian_process.GaussianProcessClassifier + gaussian_process.GaussianProcessRegressor Kernels: @@ -580,20 +563,20 @@ Kernels: :toctree: generated/ :template: class_with_call.rst + gaussian_process.kernels.CompoundKernel + gaussian_process.kernels.ConstantKernel + gaussian_process.kernels.DotProduct + gaussian_process.kernels.ExpSineSquared + gaussian_process.kernels.Exponentiation + gaussian_process.kernels.Hyperparameter gaussian_process.kernels.Kernel - gaussian_process.kernels.Sum + gaussian_process.kernels.Matern + gaussian_process.kernels.PairwiseKernel gaussian_process.kernels.Product - gaussian_process.kernels.Exponentiation - gaussian_process.kernels.ConstantKernel - gaussian_process.kernels.WhiteKernel gaussian_process.kernels.RBF - gaussian_process.kernels.Matern gaussian_process.kernels.RationalQuadratic - gaussian_process.kernels.ExpSineSquared - gaussian_process.kernels.DotProduct - gaussian_process.kernels.PairwiseKernel - gaussian_process.kernels.CompoundKernel - gaussian_process.kernels.Hyperparameter + gaussian_process.kernels.Sum + gaussian_process.kernels.WhiteKernel .. _isotonic_ref: @@ -618,8 +601,8 @@ Kernels: :toctree: generated :template: function.rst - isotonic.isotonic_regression isotonic.check_increasing + isotonic.isotonic_regression .. _kernel_approximation_ref: @@ -662,27 +645,6 @@ Kernels: kernel_ridge.KernelRidge -.. _lda_ref: - -:mod:`sklearn.discriminant_analysis`: Discriminant Analysis -=========================================================== - -.. automodule:: sklearn.discriminant_analysis - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`lda_qda` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated - :template: class.rst - - discriminant_analysis.LinearDiscriminantAnalysis - discriminant_analysis.QuadraticDiscriminantAnalysis - - .. _linear_model_ref: :mod:`sklearn.linear_model`: Generalized Linear Models @@ -763,8 +725,8 @@ Kernels: :toctree: generated :template: class.rst - manifold.LocallyLinearEmbedding manifold.Isomap + manifold.LocallyLinearEmbedding manifold.MDS manifold.SpectralEmbedding manifold.TSNE @@ -774,8 +736,8 @@ Kernels: :template: function.rst manifold.locally_linear_embedding - manifold.spectral_embedding manifold.smacof + manifold.spectral_embedding .. _metrics_ref: @@ -801,8 +763,8 @@ details. :toctree: generated/ :template: function.rst - metrics.make_scorer metrics.get_scorer + metrics.make_scorer Classification metrics ---------------------- @@ -930,9 +892,12 @@ See the :ref:`metrics` section of the user guide for further details. metrics.pairwise.additive_chi2_kernel metrics.pairwise.chi2_kernel + metrics.pairwise.cosine_similarity + metrics.pairwise.cosine_distances metrics.pairwise.distance_metrics metrics.pairwise.euclidean_distances metrics.pairwise.kernel_metrics + metrics.pairwise.laplacian_kernel metrics.pairwise.linear_kernel metrics.pairwise.manhattan_distances metrics.pairwise.pairwise_distances @@ -940,16 +905,13 @@ See the :ref:`metrics` section of the user guide for further details. metrics.pairwise.polynomial_kernel metrics.pairwise.rbf_kernel metrics.pairwise.sigmoid_kernel - metrics.pairwise.cosine_similarity - metrics.pairwise.cosine_distances - metrics.pairwise.laplacian_kernel - metrics.pairwise_distances - metrics.pairwise_distances_argmin - metrics.pairwise_distances_argmin_min metrics.pairwise.paired_euclidean_distances metrics.pairwise.paired_manhattan_distances metrics.pairwise.paired_cosine_distances metrics.pairwise.paired_distances + metrics.pairwise_distances + metrics.pairwise_distances_argmin + metrics.pairwise_distances_argmin_min .. _mixture_ref: @@ -969,9 +931,93 @@ See the :ref:`metrics` section of the user guide for further details. :toctree: generated/ :template: class.rst - mixture.GaussianMixture mixture.BayesianGaussianMixture + mixture.GaussianMixture + +.. _modelselection_ref: + +:mod:`sklearn.model_selection`: Model Selection +=============================================== + +.. automodule:: sklearn.model_selection + :no-members: + :no-inherited-members: + +**User guide:** See the :ref:`cross_validation`, :ref:`grid_search` and +:ref:`learning_curve` sections for further details. + +Splitter Classes +---------------- + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + model_selection.GroupKFold + model_selection.GroupShuffleSplit + model_selection.KFold + model_selection.LeaveOneGroupOut + model_selection.LeavePGroupsOut + model_selection.LeaveOneOut + model_selection.LeavePOut + model_selection.PredefinedSplit + model_selection.RepeatedKFold + model_selection.RepeatedStratifiedKFold + model_selection.ShuffleSplit + model_selection.StratifiedKFold + model_selection.StratifiedShuffleSplit + model_selection.TimeSeriesSplit +Splitter Functions +------------------ + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + model_selection.check_cv + model_selection.train_test_split + +Hyper-parameter optimizers +-------------------------- + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + model_selection.GridSearchCV + model_selection.ParameterGrid + model_selection.ParameterSampler + model_selection.RandomizedSearchCV + + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + model_selection.fit_grid_point + +Model validation +---------------- + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + model_selection.cross_validate + model_selection.cross_val_predict + model_selection.cross_val_score + model_selection.learning_curve + model_selection.permutation_test_score + model_selection.validation_curve .. _multiclass_ref: @@ -1011,9 +1057,9 @@ See the :ref:`metrics` section of the user guide for further details. :toctree: generated :template: class.rst + multioutput.ClassifierChain multioutput.MultiOutputRegressor multioutput.MultiOutputClassifier - multioutput.ClassifierChain .. _naive_bayes_ref: @@ -1032,9 +1078,9 @@ See the :ref:`metrics` section of the user guide for further details. :toctree: generated/ :template: class.rst + naive_bayes.BernoulliNB naive_bayes.GaussianNB naive_bayes.MultinomialNB - naive_bayes.BernoulliNB .. _neighbors_ref: @@ -1054,17 +1100,17 @@ See the :ref:`metrics` section of the user guide for further details. :toctree: generated/ :template: class.rst - neighbors.NearestNeighbors - neighbors.KNeighborsClassifier - neighbors.RadiusNeighborsClassifier - neighbors.KNeighborsRegressor - neighbors.RadiusNeighborsRegressor - neighbors.NearestCentroid neighbors.BallTree - neighbors.KDTree neighbors.DistanceMetric + neighbors.KDTree neighbors.KernelDensity + neighbors.KNeighborsClassifier + neighbors.KNeighborsRegressor neighbors.LocalOutlierFactor + neighbors.RadiusNeighborsClassifier + neighbors.RadiusNeighborsRegressor + neighbors.NearestCentroid + neighbors.NearestNeighbors .. autosummary:: :toctree: generated/ @@ -1094,57 +1140,6 @@ See the :ref:`metrics` section of the user guide for further details. neural_network.MLPClassifier neural_network.MLPRegressor - -.. _calibration_ref: - -:mod:`sklearn.calibration`: Probability Calibration -=================================================== - -.. automodule:: sklearn.calibration - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`calibration` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - calibration.CalibratedClassifierCV - - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - calibration.calibration_curve - - -.. _cross_decomposition_ref: - -:mod:`sklearn.cross_decomposition`: Cross decomposition -======================================================= - -.. automodule:: sklearn.cross_decomposition - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`cross_decomposition` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - cross_decomposition.PLSRegression - cross_decomposition.PLSCanonical - cross_decomposition.CCA - cross_decomposition.PLSSVD - - .. _pipeline_ref: :mod:`sklearn.pipeline`: Pipeline @@ -1160,8 +1155,8 @@ See the :ref:`metrics` section of the user guide for further details. :toctree: generated/ :template: class.rst - pipeline.Pipeline pipeline.FeatureUnion + pipeline.Pipeline .. autosummary:: :toctree: generated/ @@ -1287,13 +1282,13 @@ Estimators :toctree: generated/ :template: class.rst - svm.SVC svm.LinearSVC - svm.NuSVC - svm.SVR svm.LinearSVR + svm.NuSVC svm.NuSVR svm.OneClassSVM + svm.SVC + svm.SVR .. autosummary:: :toctree: generated/ @@ -1308,11 +1303,11 @@ Low-level methods :toctree: generated :template: function.rst - svm.libsvm.fit + svm.libsvm.cross_validation svm.libsvm.decision_function + svm.libsvm.fit svm.libsvm.predict svm.libsvm.predict_proba - svm.libsvm.cross_validation .. _tree_ref: @@ -1361,26 +1356,26 @@ Low-level methods :toctree: generated/ :template: function.rst - utils.assert_all_finite utils.as_float_array + utils.assert_all_finite utils.check_X_y utils.check_array utils.check_consistent_length utils.check_random_state - utils.indexable utils.class_weight.compute_class_weight utils.class_weight.compute_sample_weight utils.estimator_checks.check_estimator utils.extmath.safe_sparse_dot + utils.indexable utils.resample utils.safe_indexing utils.shuffle - utils.sparsefuncs.mean_variance_axis utils.sparsefuncs.incr_mean_variance_axis utils.sparsefuncs.inplace_column_scale utils.sparsefuncs.inplace_row_scale utils.sparsefuncs.inplace_swap_row utils.sparsefuncs.inplace_swap_column + utils.sparsefuncs.mean_variance_axis utils.validation.check_is_fitted utils.validation.check_symmetric utils.validation.column_or_1d @@ -1409,25 +1404,25 @@ To be removed in 0.20 :toctree: generated/ :template: deprecated_class.rst - grid_search.ParameterGrid - grid_search.ParameterSampler - grid_search.GridSearchCV - grid_search.RandomizedSearchCV - cross_validation.LeaveOneOut - cross_validation.LeavePOut cross_validation.KFold cross_validation.LabelKFold cross_validation.LeaveOneLabelOut + cross_validation.LeaveOneOut + cross_validation.LeavePOut cross_validation.LeavePLabelOut cross_validation.LabelShuffleSplit - cross_validation.StratifiedKFold cross_validation.ShuffleSplit + cross_validation.StratifiedKFold cross_validation.StratifiedShuffleSplit cross_validation.PredefinedSplit decomposition.RandomizedPCA gaussian_process.GaussianProcess - mixture.GMM + grid_search.ParameterGrid + grid_search.ParameterSampler + grid_search.GridSearchCV + grid_search.RandomizedSearchCV mixture.DPGMM + mixture.GMM mixture.VBGMM @@ -1435,11 +1430,11 @@ To be removed in 0.20 :toctree: generated/ :template: deprecated_function.rst - grid_search.fit_grid_point - learning_curve.learning_curve - learning_curve.validation_curve + cross_validation.check_cv cross_validation.cross_val_predict cross_validation.cross_val_score - cross_validation.check_cv cross_validation.permutation_test_score cross_validation.train_test_split + grid_search.fit_grid_point + learning_curve.learning_curve + learning_curve.validation_curve From bacd7a50eb525287c373e190a4a37ed155e14191 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Thu, 27 Jul 2017 20:39:42 +0800 Subject: [PATCH 53/86] [MRG+1] DOC improve RFE/RFECV estimator docstring (#9233) --- doc/modules/feature_selection.rst | 9 +++++---- sklearn/feature_selection/rfe.py | 25 +++++++++---------------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/doc/modules/feature_selection.rst b/doc/modules/feature_selection.rst index 0f0adecdd3cf3..f9b767bd2ae89 100644 --- a/doc/modules/feature_selection.rst +++ b/doc/modules/feature_selection.rst @@ -123,10 +123,11 @@ Given an external estimator that assigns weights to features (e.g., the coefficients of a linear model), recursive feature elimination (:class:`RFE`) is to select features by recursively considering smaller and smaller sets of features. First, the estimator is trained on the initial set of features and -weights are assigned to each one of them. Then, features whose absolute weights -are the smallest are pruned from the current set features. That procedure is -recursively repeated on the pruned set until the desired number of features to -select is eventually reached. +the importance of each feature is obtained either through a ``coef_`` attribute +or through a ``feature_importances_`` attribute. Then, the least important +features are pruned from current set of features.That procedure is recursively +repeated on the pruned set until the desired number of features to select is +eventually reached. :class:`RFECV` performs RFE in a cross-validation loop to find the optimal number of features. diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py index dc7e9e8e206be..d505099cc6a88 100644 --- a/sklearn/feature_selection/rfe.py +++ b/sklearn/feature_selection/rfe.py @@ -39,8 +39,9 @@ class RFE(BaseEstimator, MetaEstimatorMixin, SelectorMixin): coefficients of a linear model), the goal of recursive feature elimination (RFE) is to select features by recursively considering smaller and smaller sets of features. First, the estimator is trained on the initial set of - features and weights are assigned to each one of them. Then, features whose - absolute weights are the smallest are pruned from the current set features. + features and the importance of each feature is obtained either through a + ``coef_`` attribute or through a ``feature_importances_`` attribute. + Then, the least important features are pruned from current set of features. That procedure is recursively repeated on the pruned set until the desired number of features to select is eventually reached. @@ -49,13 +50,9 @@ class RFE(BaseEstimator, MetaEstimatorMixin, SelectorMixin): Parameters ---------- estimator : object - A supervised learning estimator with a `fit` method that updates a - `coef_` attribute that holds the fitted parameters. Important features - must correspond to high absolute values in the `coef_` array. - - For instance, this is the case for most supervised learning - algorithms such as Support Vector Classifiers and Generalized - Linear Models from the `svm` and `linear_model` modules. + A supervised learning estimator with a ``fit`` method that provides + information about feature importance either through a ``coef_`` + attribute or through a ``feature_importances_`` attribute. n_features_to_select : int or None (default=None) The number of features to select. If `None`, half of the features @@ -282,13 +279,9 @@ class RFECV(RFE, MetaEstimatorMixin): Parameters ---------- estimator : object - A supervised learning estimator with a `fit` method that updates a - `coef_` attribute that holds the fitted parameters. Important features - must correspond to high absolute values in the `coef_` array. - - For instance, this is the case for most supervised learning - algorithms such as Support Vector Classifiers and Generalized - Linear Models from the `svm` and `linear_model` modules. + A supervised learning estimator with a ``fit`` method that provides + information about feature importance either through a ``coef_`` + attribute or through a ``feature_importances_`` attribute. step : int or float, optional (default=1) If greater than or equal to 1, then `step` corresponds to the (integer) From 12cd3f7415e654c81a898e4ac1fbc6486a9049dd Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay Date: Thu, 27 Jul 2017 15:47:13 +0200 Subject: [PATCH 54/86] Increase the max_iter for LabelPropagation. (#9441) LabelPropagation converges much slower than LabelSpreading. The default of max_iter=30 works well for LabelSpreading but not for LabelPropagation. This was extracted from #5893. --- sklearn/semi_supervised/label_propagation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index 5e35efe82f914..c690ac1f151f4 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -380,7 +380,7 @@ class LabelPropagation(BaseLabelPropagation): _variant = 'propagation' def __init__(self, kernel='rbf', gamma=20, n_neighbors=7, - alpha=None, max_iter=30, tol=1e-3, n_jobs=1): + alpha=None, max_iter=1000, tol=1e-3, n_jobs=1): super(LabelPropagation, self).__init__( kernel=kernel, gamma=gamma, n_neighbors=n_neighbors, alpha=alpha, max_iter=max_iter, tol=tol, n_jobs=n_jobs) From 6dbaa51e674ce3ca74726ec94fe65fb56073358b Mon Sep 17 00:00:00 2001 From: Alan Yee Date: Sat, 29 Jul 2017 05:13:38 -0700 Subject: [PATCH 55/86] DOC Explicitly use https in index.rst links (#9462) --- doc/datasets/index.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst index 8168434e697e8..f91163fc235c5 100644 --- a/doc/datasets/index.rst +++ b/doc/datasets/index.rst @@ -252,7 +252,7 @@ features:: .. topic:: Related links: - _`Public datasets in svmlight / libsvm format`: http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/ + _`Public datasets in svmlight / libsvm format`: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets _`Faster API-compatible implementation`: https://github.com/mblondel/svmlight-loader @@ -268,15 +268,15 @@ DataFrame are also acceptable. Here are some recommended ways to load standard columnar data into a format usable by scikit-learn: -* `pandas.io `_ +* `pandas.io `_ provides tools to read data from common formats including CSV, Excel, JSON and SQL. DataFrames may also be constructed from lists of tuples or dicts. Pandas handles heterogeneous data smoothly and provides tools for manipulation and conversion into a numeric array suitable for scikit-learn. -* `scipy.io `_ +* `scipy.io `_ specializes in binary formats often used in scientific computing context such as .mat and .arff -* `numpy/routines.io `_ +* `numpy/routines.io `_ for standard loading of columnar data into numpy arrays * scikit-learn's :func:`datasets.load_svmlight_file` for the svmlight or libSVM sparse format @@ -288,14 +288,14 @@ For some miscellaneous data such as images, videos, and audio, you may wish to refer to: * `skimage.io `_ or - `Imageio `_ + `Imageio `_ for loading images and videos to numpy arrays -* `scipy.misc.imread `_ (requires the `Pillow `_ package) to load pixel intensities data from various image file formats * `scipy.io.wavfile.read - `_ + `_ for reading WAV files into a numpy array Categorical (or nominal) features stored as strings (common in pandas DataFrames) From 86893effeb5338ed90f12c30ad2e955244f188c6 Mon Sep 17 00:00:00 2001 From: Naoya Kanai Date: Sat, 29 Jul 2017 05:23:46 -0700 Subject: [PATCH 56/86] DOC Clarify RobustScaler behavior with sparse input (#8858) --- doc/modules/preprocessing.rst | 2 +- sklearn/preprocessing/data.py | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst index a4e1364a85ae6..18ef7e004c8de 100644 --- a/doc/modules/preprocessing.rst +++ b/doc/modules/preprocessing.rst @@ -199,7 +199,7 @@ matrices as input, as long as ``with_mean=False`` is explicitly passed to the constructor. Otherwise a ``ValueError`` will be raised as silently centering would break the sparsity and would often crash the execution by allocating excessive amounts of memory unintentionally. -:class:`RobustScaler` cannot be fited to sparse inputs, but you can use +:class:`RobustScaler` cannot be fitted to sparse inputs, but you can use the ``transform`` method on sparse inputs. Note that the scalers accept both Compressed Sparse Rows and Compressed diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index b1c767eedb364..aec1ec7c045de 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -945,9 +945,9 @@ class RobustScaler(BaseEstimator, TransformerMixin): and the 3rd quartile (75th quantile). Centering and scaling happen independently on each feature (or each - sample, depending on the `axis` argument) by computing the relevant + sample, depending on the ``axis`` argument) by computing the relevant statistics on the samples in the training set. Median and interquartile - range are then stored to be used on later data using the `transform` + range are then stored to be used on later data using the ``transform`` method. Standardization of a dataset is a common requirement for many @@ -964,7 +964,7 @@ class RobustScaler(BaseEstimator, TransformerMixin): ---------- with_centering : boolean, True by default If True, center the data before scaling. - This does not work (and will raise an exception) when attempted on + This will cause ``transform`` to raise an exception when attempted on sparse matrices, because centering them entails building a dense matrix which in common use cases is likely to be too large to fit in memory. @@ -1059,11 +1059,14 @@ def fit(self, X, y=None): return self def transform(self, X): - """Center and scale the data + """Center and scale the data. + + Can be called on sparse input, provided that ``RobustScaler`` has been + fitted to dense input and ``with_centering=False``. Parameters ---------- - X : array-like + X : {array-like, sparse matrix} The data used to scale along the specified axis. """ if self.with_centering: From 5146e88e0ce61837900cd405cca572084ae3b091 Mon Sep 17 00:00:00 2001 From: Balakumaran Manoharan Date: Sun, 30 Jul 2017 00:22:10 -0500 Subject: [PATCH 57/86] [MRG + 1] DOC Fix Sphinx errors (#9420) * Fix Rouseeuw1984 broken link * Change label vbgmm to bgmm Previously modified with PR #6651 * Change tag name Old refers to new tag added with PR #7388 * Remove prefix underscore to match tag * Realign to fit 80 chars * Link to metrics.rst. pairwise metrics yet to be documented * Remove tag as LSHForest is deprecated * Remove all references to randomized_l1 and sphx_glr_auto_examples_linear_model_plot_sparse_recovery.py. It is deprecated. * Fix few Sphinx warnings * Realign to 80 chars * Changes based on PR review * Remove unused ref in calibration * Fix link ref in covariance.rst * Fix linking issues * Differentiate Rouseeuw1999 tag within file. * Change all duplicate Rouseeuw1999 tags * Remove numbers from tag Rousseeuw --- doc/modules/calibration.rst | 24 ++++++------- doc/modules/clustering.rst | 2 +- doc/modules/covariance.rst | 35 +++++++++++-------- doc/modules/ensemble.rst | 2 +- doc/modules/linear_model.rst | 2 +- doc/modules/multiclass.rst | 18 +++++----- doc/modules/outlier_detection.rst | 20 +++++------ .../putting_together.rst | 2 +- .../ensemble/plot_adaboost_hastie_10_2.py | 10 +++--- examples/ensemble/plot_adaboost_multiclass.py | 4 +-- examples/ensemble/plot_adaboost_regression.py | 2 +- examples/ensemble/plot_ensemble_oob.py | 2 +- .../plot_gradient_boosting_regularization.py | 2 +- sklearn/covariance/robust_covariance.py | 31 +++++++++++----- sklearn/datasets/lfw.py | 1 + sklearn/linear_model/randomized_l1.py | 25 ++----------- sklearn/metrics/scorer.py | 2 +- sklearn/mixture/dpgmm.py | 2 +- sklearn/model_selection/_search.py | 4 +-- sklearn/model_selection/_validation.py | 6 ++-- sklearn/neighbors/approximate.py | 2 -- sklearn/neighbors/lof.py | 4 +-- 22 files changed, 100 insertions(+), 102 deletions(-) diff --git a/doc/modules/calibration.rst b/doc/modules/calibration.rst index 0c0af594398a0..9762414ac8cc0 100644 --- a/doc/modules/calibration.rst +++ b/doc/modules/calibration.rst @@ -44,7 +44,7 @@ with different biases per method: * :class:`RandomForestClassifier` shows the opposite behavior: the histograms show peaks at approximately 0.2 and 0.9 probability, while probabilities close to 0 or 1 are very rare. An explanation for this is given by Niculescu-Mizil - and Caruana [4]: "Methods such as bagging and random forests that average + and Caruana [4]_: "Methods such as bagging and random forests that average predictions from a base set of models can have difficulty making predictions near 0 and 1 because variance in the underlying base models will bias predictions that should be near zero or one away from these values. Because @@ -57,7 +57,7 @@ with different biases per method: ensemble away from 0. We observe this effect most strongly with random forests because the base-level trees trained with random forests have relatively high variance due to feature subseting." As a result, the - calibration curve also referred to as the reliability diagram (Wilks 1995[5]) shows a + calibration curve also referred to as the reliability diagram (Wilks 1995 [5]_) shows a characteristic sigmoid shape, indicating that the classifier could trust its "intuition" more and return probabilties closer to 0 or 1 typically. @@ -65,7 +65,7 @@ with different biases per method: * Linear Support Vector Classification (:class:`LinearSVC`) shows an even more sigmoid curve as the RandomForestClassifier, which is typical for maximum-margin methods - (compare Niculescu-Mizil and Caruana [4]), which focus on hard samples + (compare Niculescu-Mizil and Caruana [4]_), which focus on hard samples that are close to the decision boundary (the support vectors). .. currentmodule:: sklearn.calibration @@ -190,18 +190,18 @@ a similar decrease in log-loss. .. topic:: References: - .. [1] Obtaining calibrated probability estimates from decision trees - and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001 + * Obtaining calibrated probability estimates from decision trees + and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001 - .. [2] Transforming Classifier Scores into Accurate Multiclass - Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002) + * Transforming Classifier Scores into Accurate Multiclass + Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002) - .. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to - Regularized Likelihood Methods, J. Platt, (1999) + * Probabilistic Outputs for Support Vector Machines and Comparisons to + Regularized Likelihood Methods, J. Platt, (1999) .. [4] Predicting Good Probabilities with Supervised Learning, - A. Niculescu-Mizil & R. Caruana, ICML 2005 + A. Niculescu-Mizil & R. Caruana, ICML 2005 .. [5] On the combination of forecast probabilities for - consecutive precipitation periods. Wea. Forecasting, 5, 640– - 650., Wilks, D. S., 1990a + consecutive precipitation periods. Wea. Forecasting, 5, 640–650., + Wilks, D. S., 1990a diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index 7189474752005..b18cb3a6adcf7 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -1343,7 +1343,7 @@ mean of homogeneity and completeness**: .. topic:: References - .. [RH2007] `V-Measure: A conditional entropy-based external cluster evaluation + * `V-Measure: A conditional entropy-based external cluster evaluation measure `_ Andrew Rosenberg and Julia Hirschberg, 2007 diff --git a/doc/modules/covariance.rst b/doc/modules/covariance.rst index 88f40f3896190..2f95051ac9ea3 100644 --- a/doc/modules/covariance.rst +++ b/doc/modules/covariance.rst @@ -95,7 +95,7 @@ bias/variance trade-off, and is discussed below. Ledoit-Wolf shrinkage --------------------- -In their 2004 paper [1], O. Ledoit and M. Wolf propose a formula so as +In their 2004 paper [1]_, O. Ledoit and M. Wolf propose a formula so as to compute the optimal shrinkage coefficient :math:`\alpha` that minimizes the Mean Squared Error between the estimated and the real covariance matrix. @@ -112,10 +112,11 @@ fitting a :class:`LedoitWolf` object to the same sample. for visualizing the performances of the Ledoit-Wolf estimator in terms of likelihood. +.. topic:: References: -[1] O. Ledoit and M. Wolf, "A Well-Conditioned Estimator for Large-Dimensional - Covariance Matrices", Journal of Multivariate Analysis, Volume 88, Issue 2, - February 2004, pages 365-411. + .. [1] O. Ledoit and M. Wolf, "A Well-Conditioned Estimator for Large-Dimensional + Covariance Matrices", Journal of Multivariate Analysis, Volume 88, Issue 2, + February 2004, pages 365-411. .. _oracle_approximating_shrinkage: @@ -123,7 +124,7 @@ Oracle Approximating Shrinkage ------------------------------ Under the assumption that the data are Gaussian distributed, Chen et -al. [2] derived a formula aimed at choosing a shrinkage coefficient that +al. [2]_ derived a formula aimed at choosing a shrinkage coefficient that yields a smaller Mean Squared Error than the one given by Ledoit and Wolf's formula. The resulting estimator is known as the Oracle Shrinkage Approximating estimator of the covariance. @@ -141,8 +142,10 @@ object to the same sample. Bias-variance trade-off when setting the shrinkage: comparing the choices of Ledoit-Wolf and OAS estimators -[2] Chen et al., "Shrinkage Algorithms for MMSE Covariance Estimation", - IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010. +.. topic:: References: + + .. [2] Chen et al., "Shrinkage Algorithms for MMSE Covariance Estimation", + IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010. .. topic:: Examples: @@ -266,14 +269,14 @@ perform outlier detection and discard/downweight some observations according to further processing of the data. The ``sklearn.covariance`` package implements a robust estimator of covariance, -the Minimum Covariance Determinant [3]. +the Minimum Covariance Determinant [3]_. Minimum Covariance Determinant ------------------------------ The Minimum Covariance Determinant estimator is a robust estimator of -a data set's covariance introduced by P.J. Rousseeuw in [3]. The idea +a data set's covariance introduced by P.J. Rousseeuw in [3]_. The idea is to find a given proportion (h) of "good" observations which are not outliers and compute their empirical covariance matrix. This empirical covariance matrix is then rescaled to compensate the @@ -283,7 +286,7 @@ weights to observations according to their Mahalanobis distance, leading to a reweighted estimate of the covariance matrix of the data set ("reweighting step"). -Rousseeuw and Van Driessen [4] developed the FastMCD algorithm in order +Rousseeuw and Van Driessen [4]_ developed the FastMCD algorithm in order to compute the Minimum Covariance Determinant. This algorithm is used in scikit-learn when fitting an MCD object to data. The FastMCD algorithm also computes a robust estimate of the data set location at @@ -292,11 +295,13 @@ the same time. Raw estimates can be accessed as ``raw_location_`` and ``raw_covariance_`` attributes of a :class:`MinCovDet` robust covariance estimator object. -[3] P. J. Rousseeuw. Least median of squares regression. - J. Am Stat Ass, 79:871, 1984. -[4] A Fast Algorithm for the Minimum Covariance Determinant Estimator, - 1999, American Statistical Association and the American Society - for Quality, TECHNOMETRICS. +.. topic:: References: + + .. [3] P. J. Rousseeuw. Least median of squares regression. + J. Am Stat Ass, 79:871, 1984. + .. [4] A Fast Algorithm for the Minimum Covariance Determinant Estimator, + 1999, American Statistical Association and the American Society + for Quality, TECHNOMETRICS. .. topic:: Examples: diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index 12a0ff6a74ba0..40a3e834e22c9 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -246,7 +246,7 @@ amount of time (e.g., on large datasets). .. [B1998] L. Breiman, "Arcing Classifiers", Annals of Statistics 1998. - .. [GEW2006] P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized + * P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized trees", Machine Learning, 63(1), 3-42, 2006. .. _random_forest_feature_importance: diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst index e6d0ea882f6d3..018ff884c4ae2 100644 --- a/doc/modules/linear_model.rst +++ b/doc/modules/linear_model.rst @@ -1141,7 +1141,7 @@ in the following ways. .. topic:: References: - .. [#f1] Peter J. Huber, Elvezio M. Ronchetti: Robust Statistics, Concomitant scale estimates, pg 172 + * Peter J. Huber, Elvezio M. Ronchetti: Robust Statistics, Concomitant scale estimates, pg 172 Also, this estimator is different from the R implementation of Robust Regression (http://www.ats.ucla.edu/stat/r/dae/rreg.htm) because the R implementation does a weighted least diff --git a/doc/modules/multiclass.rst b/doc/modules/multiclass.rst index 5ae785400782d..2eec94f76b1c2 100644 --- a/doc/modules/multiclass.rst +++ b/doc/modules/multiclass.rst @@ -251,8 +251,8 @@ Below is an example of multiclass learning using OvO:: .. topic:: References: - .. [1] "Pattern Recognition and Machine Learning. Springer", - Christopher M. Bishop, page 183, (First Edition) + * "Pattern Recognition and Machine Learning. Springer", + Christopher M. Bishop, page 183, (First Edition) .. _ecoc: @@ -315,19 +315,19 @@ Below is an example of multiclass learning using Output-Codes:: .. topic:: References: - .. [2] "Solving multiclass learning problems via error-correcting output codes", - Dietterich T., Bakiri G., - Journal of Artificial Intelligence Research 2, - 1995. + * "Solving multiclass learning problems via error-correcting output codes", + Dietterich T., Bakiri G., + Journal of Artificial Intelligence Research 2, + 1995. .. [3] "The error coding method and PICTs", James G., Hastie T., Journal of Computational and Graphical statistics 7, 1998. - .. [4] "The Elements of Statistical Learning", - Hastie T., Tibshirani R., Friedman J., page 606 (second-edition) - 2008. + * "The Elements of Statistical Learning", + Hastie T., Tibshirani R., Friedman J., page 606 (second-edition) + 2008. Multioutput regression ====================== diff --git a/doc/modules/outlier_detection.rst b/doc/modules/outlier_detection.rst index 011bb6ea07889..db130403f9023 100644 --- a/doc/modules/outlier_detection.rst +++ b/doc/modules/outlier_detection.rst @@ -126,8 +126,8 @@ This strategy is illustrated below. .. topic:: References: - .. [RD1999] Rousseeuw, P.J., Van Driessen, K. "A fast algorithm for the minimum - covariance determinant estimator" Technometrics 41(3), 212 (1999) + * Rousseeuw, P.J., Van Driessen, K. "A fast algorithm for the minimum + covariance determinant estimator" Technometrics 41(3), 212 (1999) .. _isolation_forest: @@ -172,8 +172,8 @@ This strategy is illustrated below. .. topic:: References: - .. [LTZ2008] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. "Isolation forest." - Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on. + * Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. "Isolation forest." + Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on. Local Outlier Factor @@ -228,7 +228,7 @@ This strategy is illustrated below. .. topic:: References: - .. [BKNS2000] Breunig, Kriegel, Ng, and Sander (2000) + * Breunig, Kriegel, Ng, and Sander (2000) `LOF: identifying density-based local outliers. `_ Proc. ACM SIGMOD @@ -272,16 +272,16 @@ multiple modes and :class:`ensemble.IsolationForest` and opposite, the decision rule based on fitting an :class:`covariance.EllipticEnvelope` learns an ellipse, which fits well the inlier distribution. The :class:`ensemble.IsolationForest` - and :class:`neighbors.LocalOutlierFactor` perform as well. + and :class:`neighbors.LocalOutlierFactor` perform as well. - |outlier1| * - As the inlier distribution becomes bimodal, the :class:`covariance.EllipticEnvelope` does not fit well the inliers. However, we can see that :class:`ensemble.IsolationForest`, - :class:`svm.OneClassSVM` and :class:`neighbors.LocalOutlierFactor` - have difficulties to detect the two modes, - and that the :class:`svm.OneClassSVM` + :class:`svm.OneClassSVM` and :class:`neighbors.LocalOutlierFactor` + have difficulties to detect the two modes, + and that the :class:`svm.OneClassSVM` tends to overfit: because it has no model of inliers, it interprets a region where, by chance some outliers are clustered, as inliers. @@ -292,7 +292,7 @@ multiple modes and :class:`ensemble.IsolationForest` and :class:`svm.OneClassSVM` is able to recover a reasonable approximation as well as :class:`ensemble.IsolationForest` and :class:`neighbors.LocalOutlierFactor`, - whereas the :class:`covariance.EllipticEnvelope` completely fails. + whereas the :class:`covariance.EllipticEnvelope` completely fails. - |outlier3| .. topic:: Examples: diff --git a/doc/tutorial/statistical_inference/putting_together.rst b/doc/tutorial/statistical_inference/putting_together.rst index acac7c03d1d06..556b6b8df0894 100644 --- a/doc/tutorial/statistical_inference/putting_together.rst +++ b/doc/tutorial/statistical_inference/putting_together.rst @@ -17,7 +17,7 @@ can predict variables. We can also create combined estimators: :align: right .. literalinclude:: ../../auto_examples/plot_digits_pipe.py - :lines: 26-66 + :lines: 23-63 diff --git a/examples/ensemble/plot_adaboost_hastie_10_2.py b/examples/ensemble/plot_adaboost_hastie_10_2.py index b27636956ef26..4d48d13dd24f2 100644 --- a/examples/ensemble/plot_adaboost_hastie_10_2.py +++ b/examples/ensemble/plot_adaboost_hastie_10_2.py @@ -3,11 +3,11 @@ Discrete versus Real AdaBoost ============================= -This example is based on Figure 10.2 from Hastie et al 2009 [1] and illustrates -the difference in performance between the discrete SAMME [2] boosting -algorithm and real SAMME.R boosting algorithm. Both algorithms are evaluated -on a binary classification task where the target Y is a non-linear function -of 10 input features. +This example is based on Figure 10.2 from Hastie et al 2009 [1]_ and +illustrates the difference in performance between the discrete SAMME [2]_ +boosting algorithm and real SAMME.R boosting algorithm. Both algorithms are +evaluated on a binary classification task where the target Y is a non-linear +function of 10 input features. Discrete SAMME AdaBoost adapts based on errors in predicted class labels whereas real SAMME.R uses the predicted class probabilities. diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py index 39e7cdcb8ef4d..906df85ccf645 100644 --- a/examples/ensemble/plot_adaboost_multiclass.py +++ b/examples/ensemble/plot_adaboost_multiclass.py @@ -3,14 +3,14 @@ Multi-class AdaBoosted Decision Trees ===================================== -This example reproduces Figure 1 of Zhu et al [1] and shows how boosting can +This example reproduces Figure 1 of Zhu et al [1]_ and shows how boosting can improve prediction accuracy on a multi-class problem. The classification dataset is constructed by taking a ten-dimensional standard normal distribution and defining three classes separated by nested concentric ten-dimensional spheres such that roughly equal numbers of samples are in each class (quantiles of the :math:`\chi^2` distribution). -The performance of the SAMME and SAMME.R [1] algorithms are compared. SAMME.R +The performance of the SAMME and SAMME.R [1]_ algorithms are compared. SAMME.R uses the probability estimates to update the additive model, while SAMME uses the classifications only. As the example illustrates, the SAMME.R algorithm typically converges faster than SAMME, achieving a lower test error with fewer diff --git a/examples/ensemble/plot_adaboost_regression.py b/examples/ensemble/plot_adaboost_regression.py index b5b98d140da1b..0c76ac6af3ae9 100644 --- a/examples/ensemble/plot_adaboost_regression.py +++ b/examples/ensemble/plot_adaboost_regression.py @@ -3,7 +3,7 @@ Decision Tree Regression with AdaBoost ====================================== -A decision tree is boosted using the AdaBoost.R2 [1] algorithm on a 1D +A decision tree is boosted using the AdaBoost.R2 [1]_ algorithm on a 1D sinusoidal dataset with a small amount of Gaussian noise. 299 boosts (300 decision trees) is compared with a single decision tree regressor. As the number of boosts is increased the regressor can fit more diff --git a/examples/ensemble/plot_ensemble_oob.py b/examples/ensemble/plot_ensemble_oob.py index 811cec13b24be..19b01772d5c24 100644 --- a/examples/ensemble/plot_ensemble_oob.py +++ b/examples/ensemble/plot_ensemble_oob.py @@ -8,7 +8,7 @@ :math:`z_i = (x_i, y_i)`. The *out-of-bag* (OOB) error is the average error for each :math:`z_i` calculated using predictions from the trees that do not contain :math:`z_i` in their respective bootstrap sample. This allows the -``RandomForestClassifier`` to be fit and validated whilst being trained [1]. +``RandomForestClassifier`` to be fit and validated whilst being trained [1]_. The example below demonstrates how the OOB error can be measured at the addition of each new tree during training. The resulting plot allows a diff --git a/examples/ensemble/plot_gradient_boosting_regularization.py b/examples/ensemble/plot_gradient_boosting_regularization.py index e5a01240ccdb0..592dd40ca47cb 100644 --- a/examples/ensemble/plot_gradient_boosting_regularization.py +++ b/examples/ensemble/plot_gradient_boosting_regularization.py @@ -4,7 +4,7 @@ ================================ Illustration of the effect of different regularization strategies -for Gradient Boosting. The example is taken from Hastie et al 2009. +for Gradient Boosting. The example is taken from Hastie et al 2009 [1]_. The loss function used is binomial deviance. Regularization via shrinkage (``learning_rate < 1.0``) improves performance considerably. diff --git a/sklearn/covariance/robust_covariance.py b/sklearn/covariance/robust_covariance.py index 985dda92f990c..de5ee308764bb 100644 --- a/sklearn/covariance/robust_covariance.py +++ b/sklearn/covariance/robust_covariance.py @@ -190,7 +190,7 @@ def select_candidates(X, n_support, n_trials, select=1, n_iter=30, Starting from a random support, the pure data set is found by the c_step procedure introduced by Rousseeuw and Van Driessen in - [Rouseeuw1999]_. + [RV]_. Parameters ---------- @@ -250,7 +250,7 @@ def select_candidates(X, n_support, n_trials, select=1, n_iter=30, References ---------- - .. [Rouseeuw1999] A Fast Algorithm for the Minimum Covariance Determinant + .. [RV] A Fast Algorithm for the Minimum Covariance Determinant Estimator, 1999, American Statistical Association and the American Society for Quality, TECHNOMETRICS @@ -339,13 +339,13 @@ def fast_mcd(X, support_fraction=None, such computation levels. Note that only raw estimates are returned. If one is interested in - the correction and reweighting steps described in [Rouseeuw1999]_, + the correction and reweighting steps described in [RouseeuwVan]_, see the MinCovDet object. References ---------- - .. [Rouseeuw1999] A Fast Algorithm for the Minimum Covariance + .. [RouseeuwVan] A Fast Algorithm for the Minimum Covariance Determinant Estimator, 1999, American Statistical Association and the American Society for Quality, TECHNOMETRICS @@ -580,10 +580,10 @@ class MinCovDet(EmpiricalCovariance): .. [Rouseeuw1984] `P. J. Rousseeuw. Least median of squares regression. J. Am Stat Ass, 79:871, 1984.` - .. [Rouseeuw1999] `A Fast Algorithm for the Minimum Covariance Determinant + .. [Rousseeuw] `A Fast Algorithm for the Minimum Covariance Determinant Estimator, 1999, American Statistical Association and the American Society for Quality, TECHNOMETRICS` - .. [Butler1993] `R. W. Butler, P. L. Davies and M. Jhun, + .. [ButlerDavies] `R. W. Butler, P. L. Davies and M. Jhun, Asymptotics For The Minimum Covariance Determinant Estimator, The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400` @@ -650,7 +650,7 @@ def correct_covariance(self, data): """Apply a correction to raw Minimum Covariance Determinant estimates. Correction using the empirical correction factor suggested - by Rousseeuw and Van Driessen in [Rouseeuw1984]_. + by Rousseeuw and Van Driessen in [RVD]_. Parameters ---------- @@ -659,6 +659,13 @@ def correct_covariance(self, data): The data set must be the one which was used to compute the raw estimates. + References + ---------- + + .. [RVD] `A Fast Algorithm for the Minimum Covariance + Determinant Estimator, 1999, American Statistical Association + and the American Society for Quality, TECHNOMETRICS` + Returns ------- covariance_corrected : array-like, shape (n_features, n_features) @@ -675,7 +682,8 @@ def reweight_covariance(self, data): Re-weight observations using Rousseeuw's method (equivalent to deleting outlying observations from the data set before - computing location and covariance estimates). [Rouseeuw1984]_ + computing location and covariance estimates) described + in [RVDriessen]_. Parameters ---------- @@ -684,6 +692,13 @@ def reweight_covariance(self, data): The data set must be the one which was used to compute the raw estimates. + References + ---------- + + .. [RVDriessen] `A Fast Algorithm for the Minimum Covariance + Determinant Estimator, 1999, American Statistical Association + and the American Society for Quality, TECHNOMETRICS` + Returns ------- location_reweighted : array-like, shape (n_features, ) diff --git a/sklearn/datasets/lfw.py b/sklearn/datasets/lfw.py index 50834f7705ef6..4d188f00bcffa 100644 --- a/sklearn/datasets/lfw.py +++ b/sklearn/datasets/lfw.py @@ -68,6 +68,7 @@ def scale_face(face): def check_fetch_lfw(data_home=None, funneled=True, download_if_missing=True): """Helper function to download any missing LFW data""" + data_home = get_data_home(data_home=data_home) lfw_home = join(data_home, "lfw_home") diff --git a/sklearn/linear_model/randomized_l1.py b/sklearn/linear_model/randomized_l1.py index a84558823146e..8f3692dc8675b 100644 --- a/sklearn/linear_model/randomized_l1.py +++ b/sklearn/linear_model/randomized_l1.py @@ -195,8 +195,6 @@ class RandomizedLasso(BaseRandomizedLinearModel): is known as stability selection. In short, features selected more often are considered good features. - Read more in the :ref:`User Guide `. - Parameters ---------- alpha : float, 'aic', or 'bic', optional @@ -206,7 +204,7 @@ class RandomizedLasso(BaseRandomizedLinearModel): scaling : float, optional The s parameter used to randomly scale the penalty of different - features (See :ref:`User Guide ` for details ). + features. Should be between 0 and 1. sample_fraction : float, optional @@ -300,11 +298,6 @@ class RandomizedLasso(BaseRandomizedLinearModel): >>> from sklearn.linear_model import RandomizedLasso >>> randomized_lasso = RandomizedLasso() - Notes - ----- - For an example, see :ref:`examples/linear_model/plot_sparse_recovery.py - `. - References ---------- Stability selection @@ -407,8 +400,6 @@ class RandomizedLogisticRegression(BaseRandomizedLinearModel): randomizations. This is known as stability selection. In short, features selected more often are considered good features. - Read more in the :ref:`User Guide `. - Parameters ---------- C : float or array-like of shape [n_reg_parameter], optional, default=1 @@ -420,7 +411,7 @@ class RandomizedLogisticRegression(BaseRandomizedLinearModel): scaling : float, optional, default=0.5 The s parameter used to randomly scale the penalty of different - features (See :ref:`User Guide ` for details ). + features. Should be between 0 and 1. sample_fraction : float, optional, default=0.75 @@ -501,11 +492,6 @@ class RandomizedLogisticRegression(BaseRandomizedLinearModel): >>> from sklearn.linear_model import RandomizedLogisticRegression >>> randomized_logistic = RandomizedLogisticRegression() - Notes - ----- - For an example, see :ref:`examples/linear_model/plot_sparse_recovery.py - `. - References ---------- Stability selection @@ -590,8 +576,6 @@ def lasso_stability_path(X, y, scaling=0.5, random_state=None, verbose=False): """Stability path based on randomized Lasso estimates - Read more in the :ref:`User Guide `. - Parameters ---------- X : array-like, shape = [n_samples, n_features] @@ -638,11 +622,6 @@ def lasso_stability_path(X, y, scaling=0.5, random_state=None, scores_path : array, shape = [n_features, n_grid] The scores for each feature along the path. - - Notes - ----- - For an example, see :ref:`examples/linear_model/plot_sparse_recovery.py - `. """ X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo']) rng = check_random_state(random_state) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 7d213ae39aaed..f13068d477b09 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -320,7 +320,7 @@ def _check_multimetric_scoring(estimator, scoring=None): value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. - See :ref:`multivalued_scorer_wrapping` for an example. + See :ref:`multimetric_grid_search` for an example. If None the estimator's default scorer (if available) is used. The return value in that case will be ``{'score': }``. diff --git a/sklearn/mixture/dpgmm.py b/sklearn/mixture/dpgmm.py index 3d1858c513b2a..75b0b88e9b4cf 100644 --- a/sklearn/mixture/dpgmm.py +++ b/sklearn/mixture/dpgmm.py @@ -672,7 +672,7 @@ class VBGMM(_DPGMMBase): Initialization is with normally-distributed means and identity covariance, for proper convergence. - Read more in the :ref:`User Guide `. + Read more in the :ref:`User Guide `. Parameters ---------- diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index db41c19218fa7..ebfa1e9bd3e18 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -801,7 +801,7 @@ class GridSearchCV(BaseSearchCV): value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. - See :ref:`multivalued_scorer_wrapping` for an example. + See :ref:`multimetric_grid_search` for an example. If None, the estimator's default scorer (if available) is used. @@ -1111,7 +1111,7 @@ class RandomizedSearchCV(BaseSearchCV): value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. - See :ref:`multivalued_scorer_wrapping` for an example. + See :ref:`multimetric_grid_search` for an example. If None, the estimator's default scorer (if available) is used. diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 1e5ea29740c00..147d741b500b9 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -69,7 +69,7 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None, value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. - See :ref:`multivalued_scorer_wrapping` for an example. + See :ref:`multimetric_grid_search` for an example. If None, the estimator's default scorer (if available) is used. @@ -803,8 +803,8 @@ def permutation_test_score(estimator, X, y, groups=None, cv=None, the dataset into train/test set. scoring : string, callable or None, optional, default: None - A single string (see :ref:`_scoring_parameter`) or a callable - (see :ref:`_scoring`) to evaluate the predictions on the test set. + A single string (see :ref:`scoring_parameter`) or a callable + (see :ref:`scoring`) to evaluate the predictions on the test set. If None the estimator's default scorer, if available, is used. diff --git a/sklearn/neighbors/approximate.py b/sklearn/neighbors/approximate.py index 2f297ce68cc56..907b379731a2f 100644 --- a/sklearn/neighbors/approximate.py +++ b/sklearn/neighbors/approximate.py @@ -122,8 +122,6 @@ class LSHForest(BaseEstimator, KNeighborsMixin, RadiusNeighborsMixin): points. Its value does not depend on the norm of the vector points but only on their relative angles. - Read more in the :ref:`User Guide `. - Parameters ---------- diff --git a/sklearn/neighbors/lof.py b/sklearn/neighbors/lof.py index 3559d76cf898a..b3686d69d771b 100644 --- a/sklearn/neighbors/lof.py +++ b/sklearn/neighbors/lof.py @@ -85,8 +85,8 @@ class LocalOutlierFactor(NeighborsBase, KNeighborsMixin, UnsupervisedMixin): p : integer, optional (default=2) Parameter for the Minkowski metric from - :ref:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this is - equivalent to using manhattan_distance (l1), and euclidean_distance + :func:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this + is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used. metric_params : dict, optional (default=None) From dd898b19cef6a860a595a1ea13e868e430930f66 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sun, 30 Jul 2017 20:36:20 +1000 Subject: [PATCH 58/86] DOC Use :class: for first VotingClassifier reference --- doc/modules/ensemble.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index 40a3e834e22c9..b766f4dfd4d0c 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -915,10 +915,10 @@ averaged. .. _voting_classifier: -VotingClassifier +Voting Classifier ======================== -The idea behind the voting classifier implementation is to combine +The idea behind the :class:`VotingClassifier` is to combine conceptually different machine learning classifiers and use a majority vote or the average predicted probabilities (soft vote) to predict the class labels. Such a classifier can be useful for a set of equally well performing model From 81b7badceb81a4ad83c920524de5869a888873ac Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Tue, 1 Aug 2017 19:07:02 +1000 Subject: [PATCH 59/86] Credit University of Sydney sponsorship (#9466) --- doc/about.rst | 7 +++++++ doc/index.rst | 3 ++- .../scikit-learn/static/img/sydney-primary.jpeg | Bin 0 -> 38356 bytes .../scikit-learn/static/img/sydney-stacked.jpeg | Bin 0 -> 3356 bytes 4 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 doc/themes/scikit-learn/static/img/sydney-primary.jpeg create mode 100644 doc/themes/scikit-learn/static/img/sydney-stacked.jpeg diff --git a/doc/about.rst b/doc/about.rst index 9f15362dadd6d..d85e2cef387d3 100644 --- a/doc/about.rst +++ b/doc/about.rst @@ -118,6 +118,13 @@ Andreas Müller also received a grant to improve scikit-learn from the `Alfred P :align: center :target: https://sloan.org/ +`The University of Sydney `_ funds Joel Nothman since July 2017. + +.. image:: themes/scikit-learn/static/img/sydney-primary.jpeg + :width: 200pt + :align: center + :target: http://www.sydney.edu.au/ + The following students were sponsored by `Google `_ to work on scikit-learn through the `Google Summer of Code `_ diff --git a/doc/index.rst b/doc/index.rst index a04d529121de3..e835de46a660e 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -323,7 +323,7 @@ Funding provided by INRIA and others.

diff --git a/doc/themes/scikit-learn/static/img/sydney-primary.jpeg b/doc/themes/scikit-learn/static/img/sydney-primary.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..292e217402f2fbb1d0df12fe2fe0a2d7501a987e GIT binary patch literal 38356 zcmeEtXIN9)wrJ=gV(176L5iT1NSBsK6A=+?prAyghGLW&B_s#}0wSUt*y@I$2#5+u zFCq~_QBgraI;3Esvto%$Qg{pZIs2URzWeUG_r9O+tnW+KOftts1BEQBk*ckzp zzs8{gx__G^2+c8qLGLg%=oKko{A(-+jjyL+VZwidy${{jhtLy$Vf}Tt3&5QTRQCwM zMdPAEaOcz=j+m;u`T1kdu78FZ@7iT)Wn^q+WTp-cjCWg^?uNmHuM7W$f4oTGA9NN8 z{{2}3*NcSyK89Ugf9ju@jI4GUs~eeHne4JMHi7uwY!eb4ea;GrJR4=`w@!(nKQ01^ z_dAC)GTeoP*`V>~{QScLqt(v@24m0KYOFk^XsBZYY&E>h+;+L0a|jH2 zqy87e{4E1C&?t2qycIs;Ttr~BpE^Dw{A`pJ-d5umaw}-O{xwoV{nwP}Fk6k|ZXW6m zxX3_tGec9uT^bN{WI&MBVaL6HgM~iXYW$7JxVSjOI1@u$WH8dm($W&S%NS{FYyizL zh`Mk#+7EAVHcIpF7#stm{3EgFqOrKM>gyQ&&fsFAZ8e}L|4CBBxxdi6*IzzwEMnxYE{0F`NA>yc`7tRGD4+lo!Vj}$mq3N3c0CtLnNc_)MSJy4A>iqi%|ci-d5OulFcvvjO^^qE%ut5JD59|uJdPUZ)9(8 zzt_ph(r&leUK40>{{#&9-vwi4Z@J6V*v{U>ez(&v!ISVmjOau8B z8T+tNkg{I*n|(MEc>Zsra4duz+Ba5FxS;4bzsNwlU`X`;*H!)##y`aGY;YiC&Y+F) z=QnX7fsmQ|Pt^3!Y5y#O|GIko3G3&-Zj@{_qWmDS{^Ah1ZlL~T3Hhhc|G_P^u+iA) z@W8*ZYZo5<55^&zV-*f@0I8@!KwywxOn9^g>e!JJm?&&;l(Ee(wBcB2{;$!WpvZqh z`0ML`GL(Ox30dFu;a_GPrT*{h-xBz@1pY07e@o!s68N_S{{NG}KLWwPvyi(K2ZedC zwIx{OKjKys!(Fi5_AYM1>(MR@X0=to8z!Wp?YtF=iUnbUTLn?D4eM@|k{4c-d7Z&YbSft_;eQ2VSzt=792vmx!9;2^4he>aMT@|=4 zBp?kFlok+@7Fg?qA)t6#MBvx;3k-S_5EK#?5f$4YzEJ|2P%i}&6c7>;6c!Q@Stn2+ z0s0*#EG;6l&B#tv_K=^L#(6p8OF4HpXxdk|%O8FRXqlXeOcLLyptwazS$n&V?v9VEv+0Jo%TBKbJ>67=rK>P<0riRA;&8?1REL^9TOXezi=`6a!P92mGq1oH-F2` z%m4k>?R&-dOG?Wgls~Mgt*dWnY-(=lpmuh3_q=%dYG82a{qV@>*f@hZ^Wo#C&$Dy$ ztS?``eP{mwf3B|c1;zG%vIV{W$=UzL7bFA$L1AGbVX<|-1O(&O8J89o*=8gvV|Pf* z@4T#r@udxN_BnT}+r>3a4g>OMBHwLP&@yFcv({Pr#o2$2v84YIXMZvF556W~4p8dl z|9lDl`IF$kTtfePtwBish5?!`CLMBv2Qam?=c)>8uujZY;%;i0jKZIx2 zi9rGN)3{sTaO_?*U_{qo0ndVv^`o~^>*-`rr>t{I$gWX_FlSJePDJ4=r(b#L72Hwn z?w3&4l93rWb&%?g0d10*ZOxx}wM-KA4bnp(py~Y}8wR2o1>3dL9Z1EdDCF?_QJJM3 z_C?9Bq~t1OR0n-YBj0)03HVqTfAhm!n-v-UbfGK|P$PAihT&DBMCX>aX(fN`DdK83 zXTn&CJa?tVt}lE_m0b?Mmjpy{%fivX;Wb#wVAJWsljsIaM`>gy(1Z}X$nnrL@!}Ow z=*!ug#{0_g?(CcIQJT*lxD?d^Leqxw*?Q0eY9$Fw^IXW16GG4yap4g zSFtYl-1B^wv}HnZZ~F^%nn7;1;Ru@kp${}vT;!=X6tfoS!PKSAuOuFwf8ayF^U^3G z?w8Idu6!`d{haxEM1N7=75(zFjZ7jJc2P=Zjv*f*mPiFM&Xj*c_qZ`k6m1g5ks@0;|hUM#StCX_nmq8 zJd(1o%5QDq-{;Cf+Wiw!R1LnhZApGADWdQ4G<>8A$sE)G8oSHQD3?&$%9p=fF<68& zxy`_PPQOpD=8C|nPt5ZbM@%VF zb8+J8li8B|9Q}7SIsK`h60JP!W)s>9yAu@>H^OvbMS}nR8+xt(XBB_58ASah{zTy> z=$=dLN7T9}kwPig+5|y)dLNuC{oWLwO4>|_i2_FKQ*jDhlgwiynr*`W7VXRb^wyB)%skL+B7Wl%PnLYgRzKf?HNCHk2) zGj2(Y<3c!D53uv^fjvk1mu#F$edc|&M+{o+m|6BAbqkr+jXClf`#z*wGUI&JpbFtSgpwk)+YpuASPot_tpk}^p*eY$D*95Y{=_hNVwq2QQ4+<$k=n)hg17FlV<0l^xz4-?1edMF2dY^V zuAJCR>&3j(@bIAP+bdD336yTP5X0TS`BbGXY2VbndEdu5PfWiMvJ6bu2c-6wm z5vBd4z^bt|7<)4>i>nzNA)l6JN=nAmCgE=$8L05PqG65C?DW2t6ZUM+Q73o!>4>)n z?TgIBpsiFpgV{=I4@T1~(JKB4VyAemYcQT~LMG2*bkJ}Gsv%J^|U^4?D4%T`;>F1(Z5;MpSOuDZM0IlwKp6~c6=L66%rPZC4 z+C4fyG9uM={=9tQ?ZFrR*IZ&Ri@aE83NB(tG?%jrCt!9i%SF3s%&t!~k_xdMp zb!X+1q1huEmL=2 z3EFTn&vu4E=5FDs)ZQz~Kla8W}>{UuiIM*nC z=j*ghUofwf&K=5+GD(Tl!}5sekE?3UQ4osPpb_h3_!^Ay6gr!(t-%0y#CX`@6Vrnp ztPyI3Acv0ZOnXk!xj38T`ZPW}?H%~VJ&7XE?>#Sj5}S9Cl2U0%a4&rm z{Vu|V-dr=N8F}E6-$~~^QEikF1}_dRyV&Z$*-PwNgDDWzLA{P?BuM6%68yoi8Qb9J zWGU0W6ogDYE$ucrjSxkD^wsa-TuTnZRKRM}Lrhf%Dqjq|d4bU<=@S|i;+eP0 zU0>mPclNmm>Wj!_7t5lTvx`JVzxw~gIzm=eXzd;M!5S<%5BT*ENEvAwO5)g}JeuJ8!j#;(eI6t5V^eZ_tb%?9#H} z$6%^LO8vWtkS^0Eg~?R6De^lnu5@E3tqyYZ&|}`{^fg#rS|?48UG5^H7;o~v(sXg4KKx4&gU6mA4%9+*fsLsWYDoc6umR4>?Cn= zE~0)ohvUnIypp6QP=aj_el@FFvf!BV2ikPr5!DIWpxFbwKYeo*N5O~xk_1=Jr_$57j(jXQzcOgcLJX%THYzkCD;hIwZxWX1(Q|C_W`&4^kPx?lBqsu zaA#}|0B1mq$lx{EDlA&Z$`;}voF#PlDmcP)@5mO$OLSf&rP*8n99*BVw7hcWSIO+u zs*=Xk?+Jc}0dB_eIdSq!Z|}(EZ#%h6gM6{*xvxDPaBwGrE0xTa2WuIm3`+GHj4Nv~ zgXpf3^0wXlcP*+_S8>4&Pnv)sqCvkYS}AP&4mLd@%(PLVC4x^bbCLSp zMLVmcKVXmkG-ZTIyxOw{6LTQGLT!h3%^25=`3A`r9%?;Q1@?CN?gXkjNGa)@gT(hV zxq2S4K~KG`KCklA2P;;BFxF*6_Q;|+?0hz$l{XSnej}W zbPB_Y^`jH5c47_Ikx((mH({;8vI!BmN-()SKf5xtp-rC^vyy(qizPQLG)Q_vsx$Qb zyri7d&z_@Tv2ID)Wxr)8SJ)1wH53lBEw>M@)baLj@zg1FGx4Y$a;kaT<;OWQ_Owpq z#Wu%Rm#^;;6N7Cp>@3PX{c8Uc%T2paJF6$@Dy`-~s(SQj3$*$Tq;?dpvN(0Y!+jjn z!7hI=NtIi@;Z5ogV_wL*Juj2IJMH45pT0qFUR5;i+@pr^>F1L@i%IS3Gr3@ z*~xQjF#rAiM`QU<5wo^yFwxpI*eh)t#Wh&4B?OvlBzB{v2xvCSH^EW$f?u8JkqlT#HjQb^*qIihafmjMG`=CW#>i+PJv{r8ma zf^ur01g)EJC$}j{iFrcE(xo)v5_EV>K#xTd5YT%d@iqC$=S;pDnDmKsg(7WZ%|s`X z>KZD4SWUKV1~XX?;g_?SyZL&c^MT-mNnEhYcrV{iG9NnXN4Q%;U-P3YA!nv9jZ%wAqXZ!)zCwCl z`{~XMJj;6x_8dMQ#ow^Rxj2WGAX;-xLCoV&bbTW=TMoprNg#42TN-cCeX$|`lS@3q z9gZ(##5{7-w|#cgr{S(yMSWIn`6EeE)k5>?5s2W&ygN454BUhdTaTA#BZ0US{YHl` zj^v2?a8L3c){|fa`vx#8Y6b)9dU53x_q*vZMKhTQIeWvbu4jehd-JRJH<-aLon6}2 z*x=(6c1G$V*^W3_s16CbiQkWDM5eTvLw^2Ft{CtNA(6il6zLk-K31$=C|nN zai30%^y5~M8A`g91SqLL9{Y~F<9u0r@%9phir@sbW)XTy-t;h2f^G^~s)es#5OM@b z2zv_H)Aa)(YZDJC@eHY5*CvLAJK7jG%-6(lgFvsSNAvm1A3wA_z2gb|F>>JT8mtu+ z_#x}%>*no=6#rC~Y9DuNdC4h#v;dm^8yVQYl4P~fLD*=HZH!mxjAKchmJv`Wi(n=w zt7aIR*JnzPUc0>hc)mc%H_IH>BmU-3wZB7)vdN93fyunEDM%a0ZogYHwZSJyvjByp zqHlmdReH17*Y}sx5s(oy8n^8$<6ht(@Hg3CAnNhNcfKg-KFIH+)aoprV11=iE^#B6 z$TUnHx&tAPH*0lZL|E7ZK42b)x#{*Y+4ko^G`LccH{sDAV|#YB?@;6Bo1I6n=X}$C z)^!ZHMEKjh+hAx|_r~zc{Mg~+!vkM_xMeJVdE@^_?SoV=eA0`q)JY?Qn|3v$JU8gM z+o5fLoI)p)*iqBa=6_FO-ri8&&q9DZdbsx?bD<3xgldjGY5s7Xyeg&g#Tv|R1vDe` z5AC2+IJi(`(p z!Yc?zMot59m%o?W$mAJ=$?j{g%hcmvzP&(7n@I#W);71eR6742G2p)QR3ZYs2BUtY z&~G;NacxNKozQuD4aAUpInJg66L`o+hfY>u9+*Z)rehRrwsDSgm4;E$TrJiTnTk*) zoXYOC*{=|U zg4~?<$)}U-ynAj=@ux}ExoKTxphP_9AVIDk9M7L|!(Fb$a~!jo$nk59Yp@OdLhVaB z6R%0*h*{rri9Mf*E5v18)mxb?9sWiGjyPc}Yigup4VH?&)ACqWuS#bsh- zR%}F=xbz&zItQqg#5GN@NG%spFVDHn$m3FnlAi0 zadG(bJB)POcDxuv@$z@LD)#{EY#N;;hBZFnR1W5M#=Ve zMjWKo(iT757N)47j0x7DlUT-CVi%(~`wB{DkZTlCfib@ucHharbTafnPOnXVTtjTN zeMOr~P=={zoX4Ksm~>KO@M07YiWGEbg|UAJk-z{VeKj`aA=fIu$Jh3q4HA*H$UQiS zOjGITd)%t=eI*MDUISAF2VANfiX(Q51veMWtbn?RMOUEm$8&F7XWuv(O7-wunT0p+ zhLI(qw15)9v7ca>FvCN34yusgCqeT;&l@|;kPR<3#l8%wT<+98d3}SP$?Q{Px|-+^ zK|ry~8ZH;|gS7cG(2IYC*kTv0)n5WU94$Do@NT+FbYhYI=955H>x`Vh*Js&FYcP)7 zJV}ypjCJ!MuN)7i#b!q+5DVQS#*eL|oZ)4AJy9Kpb}(}cNhKwkDt zwlJ8+?8$hH+BzEMt2|P%RB8J3TYgtX=r*j#$C{tAi@Cl|d-rfXtmROOq;@;BA1#ZB zt^tn#`Rqns0e>6!;Lx|Zm;@8X=M zp*g#91tU!m0q(S;w85J!hwefJ@B+YTzwI_Xxm)E*EAIU(y7pG3fb*}w`zjEWfVq5sI@?;!ZlkW}}yRrr{sy`zvP4VU;$5DHHL%~t28_%UegkH5qLDcv{ z)STt&gO_IdQqb~iu!!t2D#N0alIp8N?ocxWbh?lkzFWWx%x8+;X>RrJhfwLvmrKEU z<33@%7a~^toRd5{oi*jo6>W_zfzaKD&>g^fC-6(oL!Xp*S!KxwYo1aHB~502P~Z4? z+*Z+xZO$`5yAI+?npz5S_oa?cTjY7^Z4G_fzE7+kEaPw7#*xCqeSxyegapnZ!WKX= z%O;AEpwdm01}w6;5hdV}9yN7fn!=cE^(vkB##}ZlP4l?r<6dZeOGYm?X*5J`t}U5+>i z%^ACz+^4w+twRAdC4K!h9CqpS4A;#zQXp$0Mrz=+cAsE($LOjbp^v-O`*eirN zWcm~&y4;PdsBzpK9w4)8pO^@1hJ76r;TVEyxO7Y%rrSN0CQJ|!dF|=kMV96I-@{ut z74l=nMehX0RJbXQpS`cOVX1=pqLXp7uxKm#x%)&gdq3|%Jy{kEW)pdL`LbLumRHif z>LylNGFN*>()cT+e0A@oUY7cTY3+li7YhoWy?SK!tdpI&HBsi86U<^Jd+8}>8}aq{ zrve6LT!noDGy>dD4tu#rAl$&>8Jmv4d#KTA{9_(BP0w$0cK=P}>NHSqIuA0gKBv;xj66#juuP3XW0W+HK*v z2J`E0Whd#@O=)^G=grRq?9>>?#NT^jZQHjt`2Q9V9Hh|mDZo*scWjBi$IHiaHOGsmT6ZCq zHrz}16EL(G<#$0j01|K<@~?p-5Q7TnAjt7C0xel+rqd!u5v7CArjD0Ae~K0-zyR7J zh0b8C!QehL?&0@@_y_Y=n?{bzuMWLC;S^yrB0X*@3OeXw12#=YKv_vBo0v9kK?Cep zK%JFrqAHk);U)8pxk0>Zb+{`iTR^OPpN%U=f!{ygNfWI{q$4&W(RDUr%rVrK`TX0I z%gg?Qr6RWn#-}&jN{sDfIp_zkCjf!Nv2JmY13C|FckhwKE{?wiS8k+p9JyG`!4SLA z4V0yY8B&^>80f+Ygi?SMdO@LB8Y8#^ku2lwJ2$chQ$b^^(%}{JEusZ1A)gG9MwB8j zJu!tfh$wD*;F@1LwaOZ%gMKhd%Db5rw(JAeN=eJ)hP^ zu3mxs6iI%!H$1h?0?h7F_=0zz$?sl+ZFM=%jHgJW(<+w@qIp6G-g{4UtRB1V zS$u7HW<;k{E)aZ&nZDoHtQMsG43J2TM= zT%VCWgZwSTHze`GJ|=ugf)(GDf3uW)7w`Iyj$wk)rHmfxbF)(aDFO!$D1`BgfMKtIv<^r1RD*(DP zdjq)I#}uO?H_T7#3`e^b?0gw+6K#6@G;2ETame*oOC_}9^SyBqaiwq48gn)U7Zo{E z(U7)++ACQkdId%drNXra<2k-lTq%H*Y~%JgjIRxx0740FpcCujj~~*rwfF^RV7dOT z@0*ySlM^;A$0PQK?F-6txnF%_S;aN;G&>vM zfzPf$4|luoE&u_Y@pPVwBdfbP0#)*uYu!57>RcYSX*KUAmUk!af$QcH4c(#B>BSkx zKWtLo3k4SIWf_h$SK5MqxC?UAp=4_<8KCeVt-<8_cNV~D7J;{T_xLkEqk( zm+09Ynpj=T{-y?H4WTs4N0~wi2q=|Tj{Y7wZ>nTF##8iAFM?~aES|5VPVWMnfti`K zE73dZ;a4c)t=cTTb~VxW7_VW?!Dz~6?#br<<<_{c7{-G#<%kcB?u<7&FI!scZ^Q>) zb8NVw)!cky4_Y3jJU0#7ML3zA@|~yovVt$813N+Y zyYmefPks2}gY7Voup3xXmbB76lXKTXM)RbrX^qge*`E`9aXZdMeh(1}JCgiK?PwcS z=E}GtqdSF3+C=O@)V3uvB^B0SM_MvCF2kmjl+~EL;?n8825-mwa+NJ$In4*fw`fV& z_DKTk`|k1@3^I@O59pM_?I0R5@l3_Ex;{_~@`l!t086ibn8#Yxk?IOBUD!@Gd9#(& zD%HE`vMyTB@E{%*UKzIrYY6Tlr;;FY=uc@+6thrzV2=ZUzI>OLL<+Yxcf)ncv|2^d z=G9QZ?J@6s$D44&o{5~~B$IwA=UHfJGBGvzOIVH<*vAa+xC7-2u3&1)I>L}CR=93S z#ZD7+vJO-wb$EE3YtJHIs(T|k@KMHR^ejP~vG8=Fz`+eJ(<%{Yd19p)#&d5bgrk|+ ztV1hM&@@e1Pg%*Nw|Ho>O!$)bwJG!ra`lcgE*MF|-b01Y3BDtHl-OH@R^vqbN>STP zNYsz$0Px3?EDHS&rLovpFgDM99IC<`JM&Cj;2J!feB`?GM>tpR2OYVj$IJ$I(5^rm zv~M);2b5FmVTl+Z`^X3DtHOKGwv}rSa#$9Q;1VFw!{TKG|4su*)N1nP zyz5o$Oy{oFYM;q&_}%ZMB^6EpL7VrCM8AcoMJ(9@q01i$NR|+L7Fy&}=8qT^Cziar z&~QRnDWpbDC?#)v52Gs7HitocNP!rdyop9_V*NinB$ z$eD?A6X@nW=>-JUT)uMwV z_D^?w_88J+tvtW;mFb4Dt81ujYPezdc-H^Opt7{#)>n6O6UXf|-`hAts1*9mD4=#> zHiyL37^vVLt0Ei&Rq4-g^t41nGYMPsnJ!u#vKvtiN-!+i$B;V4&LSUIu9;b@ih7=N zG(7P~#9sAZ2@)amWl}Iyem}w&KgN=!J2jLWK6}7L^R5$jO@Mvf7Khr^%4Sy5FVd(> zKAkjqqIOtB_?4PalqO4cxx8XtHsW=@qOs-6Gy9Y$xzMqd6h)*_s(Yw#P%~}Oj*d~` zx&S@tTrZtPD#wPaI!sguX=$}0UHxtl#|&@5bff_9{}}0*-o5NL{N&1LvnOZzgkf(J z9I880=oc|u#a^nK6zBw*rV@ah^i8!80Vq=8;|R7O&~Vkv?I81$txAWQIPV@;uLRrs zAk3=nhO5TJ2_Kz1Q89Yf!p8d+UCxM>y<8-ICAjG(9-tSA>5LGVPJ?R zP5;0{F^Bm+5SiBDKAzl7sYNfoXL->TsVdpy#uIE8-V^>t{t!ux5;V0l;S`Q1J$=%firuX_47FWkPe|~^XP`w+qJWaD zrwj~Kwgl%m2yn&|yiF129uMcjStIo9EUqgs*ohV)xHBl}MA0UhlW)qL0;@uYaoShE z%neLCZ7R2Z)&djwOshNB!&O}2i~n)U+Ii8P6APzu{5v#PH^O*9wV*Q{4hzA^SJ{{q z+2NvCik&DEFqiqVDyZo_&4Wd{Qn%%COoaXR_nS(mEizt>DU3^2t7CFSKfMrxs)Z_t z=v9)0JOVO<5n3yzGqk zsp7ZQ6pQ^n{TV7I1BYp#P5@MZX??V zpFM~GE^*;ZM5>xFh%9$t(!ibhm+C95J1<6i(o~7^;Ehva9q1>#YmWi%3EEib^v;20 zf*+G1|GI>LEWW=|dGY0i;^(tzCQ4udKL*T$xE_RS;2iNe?`!-6ATEi6*@;)lACv*BAGTD>Ga=9TX5nBY}c6en#-Mmu~< zrqh;=L6+<_FURHt6EkYfj!k1AJ0vlTwCAdD_H_W=u_ENaKx_d0XT&bGp2AgxQjdb} zl}_iX@ny%5_j|WLI{(SlDC|Mdg+NI?_3MbwFCi153(>bs9LE4h3PcB=Pa6-}paFMw zT=S4AM(H>d1ML5?pDWVeZ&sU&9&hOb`W_*^ppXzJRVfJm$+xf0-t}5NBN*<5W|}3L5L!{+O`g%LnV>ySk~t&L@8uy+opx>D;@bn zYnH+@#uCPUFoNInzy%w%6{(4tmm2qV@E4i9{!&@+hAVEpi-__Vv6<9fOB1J@S((5= zJ}fzfL@zo_>_gX*F7tQHs%fm(5Nm<%EY?!1AG7xn89v4}tl<@z^-f4?IKB@a(jEw} zcjd`&PBzs&YpIbno9qCEPmT99LT9r-g+7zz%H9aRELqp+n!ZG$BH<*M!vMK~WrVZ! zU?)X^poyj;q$aSdhPA-Qx;N;9gMOJlgEn?$^8=v@%5CHXgOcZUO^NPutbG}hdsiHO zuCIIukxJW0p+iODy6mM>4+*DuPnf(MN=SC5@3s-5<}6v15)3)Q!-boHs;+Xgh@IAb za^UR=C;s5g{rN8MmOoq%MXBC=JERbH%M4rf-t4GwTX9@EFNLcPRHZ_`+XNic%57&Y zHFmm>Qw{=%uB@molm!_|4u#ujmd<`9V8O)0(<^TMtx=WkSjLLP?DdzyxVQ@!9@bo# zSPuBoxu_{VxdyXFOpLJqc*d)ygjHohv>uyU$wpS7)bWLrc)vDXD$tD5!16ctfBRMg ze7eNsWGCN_y;XE_a@2a-PUQV#88>b6nCF-I`FTt2ZI6%TuSc+?cO<}(_S037E4u#w2kkaj5gZ-9EYIme8ve6;#y#1a@#f{&`{%;k^O*b4fpbXOSnJi@& z)wk~*ohBDM*+N?VVqeif4?7!#oOR$_Tu)WTBUV0xN-$w%b7ddVEjq~JTzj=0gYHQL zHx>$U?RHK+V>*plL_6I)JalUOW=WT26K|flq63paJU{?m7!}Z=0|U5KO?Xnf)U*an zRiGvx;%hX5hyXR+hYVYvId`jBvBWx{KhGgVLCnD`X5Dum(oD}b+)nr-(%nQ*bFb$0 zFz2Y-(E1zTDII(bXmFC)QK;6>b-F*_@`H*QI|3E%F0dq=s-LAMcs=La_M>Fyc2we# zUAKvZ1oXfqXc+`W>w-)56WH69#4yz z$KS<7>R8A=9m3s9BbX6qIKG%~!F6oES@SRYrea^+bVI;k@u^3c67Ph9Xr zyNHBf+yk?Z_uD<^w|pSLNKgqcVTxo=30g@vCpDtEaz<;ga8mLrA_K#A=M_={kyjA4 zBq};gFK zGckjPI@#`dUg)ZYRY)^T9G*js!jhPEGt`EfHHWf>(ov7+Y1P4tRG@hO;JuM0<(VVk zT{CAdrsoG*G z4p?D+UhFNM3*&5p8lTo+xNK0Tt%P%uyR)|8eNi>IjdK+0FnI~&Un8h;?C{2K-<3Xj zQp@_relm;FFdB6X9Ljpo)TA*#?J8yKUIQ5%dfyeb4NwE@q5wTUTRxfBS*;qS@*AglJ=kw zBV4Gfyu%xJg=_L?mS_xkWq!kdH$Zs5TV67&g5z&e$2&#nt%EBSx9H9F&6nQ3l+LjF zBgsxU34OB<)X{;W@odHjR}Rjs;>vvOL~U)V;ax@33&`~r8EE=Fn&|g1pBJXjbUIN= zy!!;(avx$ZNmL8yx*z2fQw+v+-p;;8*k9wOZ=g_JG}!$8XYVwAq3bsMa)pgE;LprY`X-Sr73zU~Sph!nB*VY!L4|k!{a>#wWZCl% zef0LWS3W`h=%jzn4v|9w4!V!`vLy!4^c+$(`8j%AXQr|JlyxTx&dY0)#L}dQaIlA| zR=~h0&iNVwn{$Ji8*I!fW^4O!30P7@`Ej35<4#^*<@EIQL{6IpZ-bPa%qwj}5c?^B z%gWg2u6I6@pk^1PF0Kgn*cWaSLoYzzGCs>9W6@Y=!> zeo{;gJ;oi`c=QR{xM%78eaa$^v*%ZteVR{QJw@(I%)WEi)F3xbzR2U@2_p8J-Vm}Q^ z)P2jOb{n zBI;D(-69_>U(=%6P9YO(J6!z06^ARElox_ps@csgylhH12HZjB%DT?rxH9=nu?})N zr6D+(g3Urey$pO4{%h|Hd{4W#`{Ic(Mu9ykolUDFXQ5Otv}~LT%@%3PW;}@LB{xjA zj!4^WtuB6i8sWX(JioPVGZer>e)Fh~?|UdWyq)R@VmL_tC`OW~75c-Qch$-(5h^&Y z90RVlZ2-`j-!%wX*2msoPaO<5oVKjoyJ_D{sWKIU`!Ae$JiDRL|pn!cTK9Z^Ek|=Wd7G0t_L7A%O<9 zkx*AOuy>`i#fjXF6q%>hd8ON6fCm1DIf_&(lCdI99Q4&4BA_dvHl>yu)CXhUG@q#n zcdXg_8gc(zrKqRLzq-C8=O@s2d2uJ-aZT>cPiwF?m;2ufvD#d4;qij^+wvI6ygMaW ztB9w*p~`u8FP!t-U6iDCK|5!3=L4X6@6O@YsWl6FWqAGo#a*Vg^9zBz*Hz&3?;)o z0j30sWOuD3lGJRR!?iG?`7QOy9>CkGN6w--adN4Xz7@+6qpH{P`5P@co_JTN!uEwO0ZABjbp-i5?4WakWN`-1wtt8cm;%QOgH$!0jRAR)U{Yl zdmM(^+IaKrdm@4$4xp)1@z2>LSsVZRb7A_ogD!1-azD=M*?@Df#97+Tot7TN;BOcF z>9orv8JknU1Ur2&oPcK1WM|c2th1K@D%11UMce=)jigvv=Pm`7PV8x`y*}~A|1l-) zn_k&9SSpSaBrDfSAsRE=N3lm&_#Q$oD&42*7t`e1v_CmrMykf;v4* zVhoHJe>8_1RY%Eo0|y_!WWuGnCxOjn5Ag#N-E8HGA8ACDS?|>3hm~2Ru+R7)6};(@ zAkS&tbXV;gdom5La!a1@@<>qnA-g67N}}{6gt-Uh`KT)NutPvrCDw`%)oVN6bARqU z-jgX2;Mvrat1ztP@#W3V#IO6`$}dmw1xB2q(#Lq5QW$P@h27Ss^J#k@+l^PEqob35 zhA)3me5KF!0m5oM+4Xvr!*{eKZF*8x^J?R&@w1XFc>o2$nm`hsW~-E;B5g=Ml~J- z!yHHWy&^rSZAxJ+1|J%&RI{EvyYCVa7I*%^liX#Uyc6Ct;irF7;6XlA0CA6RW)&(v zbM!+-SbAyG?{8%7ZUtR`xnmscK3=G6iuOR$)9;h0qa->x*oH)P$NS{;aUnY|*B^Z3 zE)xoTvwylky>71jK7TfpSRX10*=bEWmBYV`0K)cLk=nJmo8G24(&`XPu37=#q7lkw zx|wiL<_TBVIG+425Ne0g6)YRm9}#-d_~K`x0&ug(5@ty1`*1-(KYi0f;RlZ&#=*^Z zy(%ju;hV&!6E_s6N_pzeWF7zT6RRw$b2SHnd6lSXJ{+IG8FGG@mdCF(RD zZrZ876(+U^CiFAWQN0fSX~RI*iDH1cQXuae>l@M0(iaylv zoF4;MNMKc8LIBob6JICVF!u`aF=>pL|r5!r78 z{JtIUg88k*Xs=$@tNni@Il=5oXUENNXuIhg7>xc{ko(!qs{8HBpa6z%VBOKly$2Aoq4CV zp)*21U1sY=nIaOJ-z0jrkqL)GO@U`y9V$Xr&3^mB%QZAB*iUt<&kMTxk{y%B-}mnK z2SKCayM@2KERx)Cug>FUM;_f+@J7#8P2Cyw`x%zP{&(AdEDHEvT!ST$%!)RgRj+SN zbq)KS-kcO|!*Xrv1*D2Rc0GK$2Nruw6_u2T zUK{3qh1`>3?p7NMXj5MRs~%s1@<^9I({YP~%ubo@T_I4yvekv5j+k>bxS zy}$~6^XE%)d1+i72uc<*1xKKu#|}zZLrxduT_ZNzm_b?V@fmi)2$JhVpF-OYah=Ot zfDuMtB42Bcg&l6w&R}-_{9$#{W8YbW4~JBYzKpW$kS>TyvIL1fhyk}FlTPdO6mH{{ z6HVr2_XoB#vC!QM6xiqii)yLLOyF=IHI}7*xco}4%S6+S#l@aq#dg_| z+TYj+b9)s~#cWJ%4db9^R~57B^x&IAy$f^8z`>cGv{n`7@rrQn3I7M93QdE!$_lDm z;PyA1e$iv<%I|;i`2*UTk)-xG=&w{iMmoA_?C;=$?N_*^fqEu=tHs{Y$%Nm0RkpN> zx9-v6O}N$edj513dM@$t6yiGsdFt9i4-N1ng9e}5PoWt>jVuZFTV65Ox|DkX+`+(& zn=lHdOUk(lLlIm%AiHZkuefB+cYF4duXwIOtb5r>UX4gcyUK)%=2C%R(nq&3^a6nD zheYlQ=|TX7UW!?-Y!;)d%Q_Gl=muOD;tFXaUz>Y=fU7fu6JKbFYmc8&>Ex?61MXMM z8XNe7z{usgh0Y^Ur4z=zVT8NCS$$Da7kAQ*Cf$F{&KucR9wThAv3Xl?>W<%Jhf;En zd)R&Uh92dfT&SOQkLA@tN-f-;J~ma%f+uqI9(VVtk1xM`;Jr^=Gbz4btov8jE~KLE zhHW}ieGkAHHjH;8regoRS;ovU_e;~F1N9dUow|R_sv$Eh;@qy+JsJl;4?{f;tRAk4 z9^@cKvr@i)D!I>pQ>X==h$j>splv`|5`qD>!pGWJ$7*~e)h9xK_vDed(U+t07kAQC z$7V1vG<_5^CdT&RJtB(pp;Cqj--b948wa7|Z^x2kqO;R{ZQi5Qz^@&v1s_v$FBGG0 zoo9;IxuT`CaFV?4;hV>P(^Ty+sXr(3tr1diU&`}*uH0KqBv+2g->9^aH%E!+%YIkI z834=W;II3`ppM_3RedPCs-{foN7J)twVR6{_&A3luW{{IUztviUHOe1ogTCr*QqkZ zJ^CE_V)*@Jq6G}^83?=HMSk`T<{<`YW@FhB1;FveKXXjL1@;|q0(BRiM$nI*QlS>g z_8(S)x(ij9V(DkQbLo_{Y5CNTH;@gXUe_Ls$hNeH3BHMoWKFc|C5G^!EOIdUD= zgUHXBC=IZOfn2gKBep}^0GUc~y+B`>!Z1#Hja#JD_0H;$kEm?~hMUVAa5LE|V1|TB z=9YS%CsGQGbE}Iibd;LOcLSEnLaYI%sTqzD;qMBnq3q>mEKuC{CUF2&-^x);V zOke?O{2=P$V<$RudenjjqRCQ(K&YOzB&zcjeoNSETu|$&r&1+z(_o?-jkNI9<||)v z34F%E5Dq~K`~uPv2l!GwIwKmLy&Am8V+4R@z{rskHkL)8b2r=9WYAP7rj%ekP@mxA zI2bS#eaFD?=f{D{fTRy@><6d(n@0sq=AeJ%pcgsNnosibP)fuBbc1`lJGk@nHPm+Q zE+FnI_Yg}BJyz9$u5Ii>DE_M~>G!W>=&?p)!@BWBQ0+!ox601bjW5o2Y{%3JhdFd6!T;sKV$ zA_H)pQOIP)Q>{+2#=*CU&0*uHd`4!xt+SKK*tBh??V#`Rsh+s-Sf8n%d&XVYBTOo1 zikAy@s|12hfDogRS~U)z$?aRxVP$u$I!%$Uwe3Cyy1D^cYcQ$$|JB*MM?=-V|Kn4U zB26elOhqIiIrTVArAQK`a*C-ue#>d>7?%uf~9SLcGb(7u%-gMkdi7*1_rjzi# zL}a&X6G2I2D$c}LZbfp1J|pNyyp(at%GMWt85ON}MjIOmYm1FtLLk8sM()!jYklf2 z(vuxM6CP;=`P2TF(w}mMoz#8Cb}-WGz?-M?#_1=pd>(Fjr*L z0##O%rQwv)n`05?gPwJae&Z_SaLBccpciLZXFZ1UhGT!mRJWyHI3NKj;$RRIhs#hI z6R_PI#d$z9XoO^=(z$$>vDl~Fl9;;HNcPRK60Arak%4Xxu1NShuX}Kn$fK5A!do9Sa*7RqqQ*da%M{si8 zlqV9-wX)$*o4taSGDLt8^dfhNxO*^YC1gsV^=?PYZ$*@rqBU_ zI|UCh*8tWaBSl?DoLkNwY6oiu>3&W=0RtA|q81HK8gr%UiLl%eJ8o68-u_l17cVt6 z%jwXzeQmCtbdsm5dh^HDy>5YW!%WQ{t>>GoR`+<60Qo+B5ef&{$Rh57dGrD8$0BdI z^RCDrF622yqg#+4<6I(D2N zz19Jct-^Hf+Hwk2R^Xa^MH_!Wh#mmW2Qq1hu&kW7vLSf~<_3w31+RmpJ+Y_oS*o5iEc#|qktgHOy3wewt)EnS3{ zz!P;2bsDBIRoGFP2jKk3V1_S2QnWd`!Fngks;VO@K>^5x#UBo>LSq*XJ$mMeLwDJR zE!clq8ydeBQ0ccqpbBAnT4NC(LAUrVWi79xyMIr;H}D z)RAzGW`D3~Z9dtZui@>uC4Ip(%tcS!^6<(l)tD2+?ajYdb)jU4lL4R~yhw?LN~iN7 zo-30W1;MAh!nlB`sU)&2MIIGSF%(2HFyz@i@2Dbdet?);85}Qz8Mn_r(m&ipx%$9v zr^cAS&poj3=idKy5@i5f$?2zXNu@b@zZhq;SWJ zffn`^s%M5n-t1_9c=Bpw;0JUH&~q8l1fczehJxF<N zK1s(UykxsYlFTpdRPy#n9bH;il}H}qf_n`SL|2M4@WHaKs0^_f?IXNqC)D-x@JeW> zdH(csxVPTC<*B8)<3b$B6$@Lk4B?rH>fU*1ec;PilNNdkr1`6@8Zzk-)Qdr-S@T^0+wCT2XXE{Azv{(QkRnWyFCrX@liS#tPtZkpx^pxT`aZ z1FJjlRt(Sha69V(RgMrp&e)aV{r5{&3L`x4Smk0w%s_UiO0();Qu!cBcr-^4$xU&> zM2gY89}j`*mc_0Bn##D=*Fdq8Fi7@)j={_Z`8#t4Sr*A__%1;4_5(AQ~5nI7V&mXvqQ50 z0GY9KcDp3p^sN+*<7v@&$?M;L3nduL$mAom(-|XbNq;}!-V1VHBfFIn3vT_fd<*+E zd*(6b+{Gk8I=tnz_8)Fv0TOAn<(Sj)W5MQv#Xq&2&OjG7%Kt0r#C4{N)Ud{;Y}l5- zGUxH!I8oSm==F!*4qb`F%@@S^^qoK3HL_N|h_biBJPEu`0WJ}%z!CX?tw`AeD59>7 z3}mPFv~FG??$r{e)oBeWgqPk3vO)X)I^1`>JRQe>|9;eI9g8%`sbxo1$do8fG@gM! zr@`Kn+h>m}{_+=`Jg{yy^j3F_{m962htFs6h4=}?`HNku-R!8q!N=222zQeV#(d+U z3+!l>g+}U7Di=zccYjGy>&#y@BJ2aaM?Lz1=gtSE6D-dY#qQ$clV+ARyb=if`1R+iNE2cLgd-p3t?5xREkm*o zAm>+8x!uO>un#IXp1+Lm`&IEz)aq-Sl%6kwLU(l)*%|TTfdHjB-oTDSW&5uW(IWtf zX9=^J4j4s8Qogsn^dyZp;}zze)ZIwgE%#`aP9P0d}#!_LnxI%U2$*lM+#QswQ=np8|TH5(XMNDE8*#TxPOYEaX}tN;2X3CXD*6)m_4u7TCYLBAh}i%xgWUj#`>zE_FpgvOttd=LK!-f4 z$r^}N9?9i+1kn3!hU>~+c#w##>Xtp%g*Rjsa}E3S$VOcEuP#ag!Si!JW2<7vFgqX+ zCTr#vA#_a*SOUX<2k;0g#owlEU=K~46kX<*n-c*WM59UNbpL3sPdMMexjSfX4o-`E zKDqBr`eC?|CRq|!9%9z1Y1;D5IapsSY()*N==z!^j3E&*pF&S&@}1BQ4GtB}rfu+3 z8D5+fL!L{RTq!2ww1@3*!TX~KV<3{c+zQ}ToIbOJl&{S~#wo#BVSbROp> z&%M+&v!=hk+t6l%xGjuzg_s_P~ZAB*GZVm29x}+5y?%$#7bp}j0Kf!AK?IMcZ z(Tpf8zZ!vrGNM!bWp{!qZ!w*$t;R^S-Xz+AF!Rt({1&>QT1*3*gtH&0!y@6W>MF=n z=mm4%$di%Jfdh-Ha}McfJ-(`G)?x0E@Mpa7HxCqo*Gm`RpHZHZN2T*geVo&jfZev2ig9Ez`{yBI(?7^@d|LzHGC9S~|!&_AT z+4)q~oz|v3tA3W<@p-=v6Y((r6En}v z-G5*h?L6-vbWti}_{-0rt)3)$YkhmriPYPv`*Eod?14qJ(qh2=G!ds*I>7D6s(N?V zS&59(osq{vW2EpDy}Gx2#*CL7n{YKBD@{uK@xkJ;RFfdY=~Js2yP>(_(=NUY*J!JP%uS;(qF9)PHAjW zMW_6+vpZg@RLuNEk>Ojc_^O}1I{ntm?v5}E$$cYU%b{o-O`Y9tr+Q?%pkVEd8`1?E zFD4msFn?%w`Q}RCno1IX6a$S20oh$nS@9~%X#;q*djQq^R^`hq-Rj=ibA6V|{Op#A zg67;Qb4q@o`Li~IyCz98wJQBSzgCi*j`V%GBs5E$`L6qKrO-K(#bU`=kb;8F#j;qV zJ2_FKqnc5pYtA3oxQDs!+^+3Ar4zF;7^x>~f%B9&5)@4(4Bwj+kmxW?jZ5w>UabAh zs+un0H`X&Y%q#Dnn=nxXxnh$Xo=J@l@hYmOm$HUDr(OjN8ts&kANt+%D$(rSDWD_b zKm_VnUy+KB@HnEt@oiuuIv=3Tp+cgh$Qp6UB`X4YgbOce*fX=xQHptnA~ztC9Mik7 z$ab_ZPV@)zxuC9;kto~uzcpVM{9zEjTCAy9!!_%30x#fZUg5;(OUmSg^xAH*fIVIWdObQvOra6v7I zCq8691S>Q_2MFt}^+o6St|<)PTBpXkm2f#lBO>I?D-gD5SWwLV}ulAXu}ALq}q6+``G2V2!j)~SHq2(U$$SA{G9xC!*jjs%MHQy z#cuIS2#{hvWpaaKgt{br%N5b3bSAATZ2E7CMH^p<6ZhyjX%?`(BKTS?c4L+--?oYy zv^|h0mylaSJM9pjthkm~l;1OcKS<5z;ghvT$68Hu70g6g{Jvt19YW*F} z#hoKc=km(CmzVmmSeC)tW`Jnm#kg@MBph#Mi;A9rRTh5-;sAtT4q`|yVGNX@pOukV zB;F2P1GtcOLYVNA4-ODrKg|F7lU9R*>)_eeK+JuNKw3g2w3-15KvorZp}8|5tXlsq z&yTFb^&~2^-^!4RDYAW9JUJMqlqToNl%!t_v$uS&{OH8B=@sDOdl<27*=J$~gHv$J ztdX(av$f}aOrjJrvPO6Ra>M8 z%rBlkftb$FpUF;~IrG}MJE#GI^~NdGtso7b30%TV>tT6Tv2iz_IAoLRWQ~9K`g#0# z6cL_1`699BU~29c>ENLATUCOru)jQDTJmx}h=&g|Sieq`c7FqQAiewrX5advaZ2?nmn79(rX}hJN;rrv1h}z!t<%3`{JT zh78U(+>nLfow^coS_ss?7btDGa!N^v&oehqQW7DY7RivG7uzDA{Lc{9*Xd01J@is? z$;0csRrwolRAmmoFHdz&KErJ^Q>0Er{ULuE#DVU9FZa!RU%z_)`qku`FI#?E`E6Ll zh?d(PPr(0(r-S$k>jNUB+w~ypKmUU|-j?+Qr<;v%*Op}! z+Aok$Mi(=_>M%BfmC$1iV(;W1{MEOM7{%j@&Scdk%mX{P25lR>y@Z)pEPrB`(MZ_s!_`B_Gcu{MeoObS<6?~!E>@eBZav6FjvY+ z+6`?Zz$aFsvV`@YM?6^mb&_2SKV8Q_o1XJ267fZ^;7f0=|1op$=0Oe3BhnJPRVH@+ z+sgS#m@48q#QzW}7mplle)<+3ExM|pzBXmdF3qoP%Z>8~?u#hkojx613HvrXAzIoq z;czd{EN0G06uSd*jf+tSQV zN|!#tjsqBStxyZ?#>4wNk(~?Cbs9kTvN6Lob}L^!mv1L9MIOJDwK?-@P$U;DKiEuI zW36@WW%V7_DKn`?boXodN1|4}~+K za$J1`-!b$*(s^TfD0X0Of@=itm;}Fn$IK<})J}msaRqmW*(GQ;+ z>xM)q#2jGrK{h7=fjf&Q*)3sgC;q|$%9FFkFB?<_1cIIafK^U~hSDhA(BjJqX$%V9 zA!i6;fz;}}7^osIQ&~POvYddFv4h^J!NA&UmAZ9S+w`B_sanB%b3D=Xrr8D4T z;IZKVj|Hq%eUNl+%xv&pDbAlKrr=8Pu@p(NA>Va+_UU(ZZO0IZva!)y3h5CD3RUfh#VtW}JzjXs!=uNm4^NQ#^}lXA zF=*2;*NN=&93wW*BATsnOPF(=Ql7}KW*U5wm_X6)Vf=|-PinM5M|w;c$|6<1qP$ zp2zyp?QCO!=Y1XXl+pD(4fuf2hT%ii)ozw+O2pKstmZ&&=X%fgAG-W+n3fvV#NAkX zS#0>kZmAmu1*|_Pda&Yryzgu>pe8lR5GsHf60s4d0zAfcAGGJ(H)g1iC=u%e3j>qR z*k8z4wYOtk>7MJ?&m7#m5`+J!imtJ`SVUe4Rz$AooH&Z%xAuqeBgl^(%_yIpR^k)v z=#?2kyMWMx7X38S8}6TfR}=MzkIEet*F@r6+oKa&(I>0$7WTnzdXxWl0`7}`lg609 z*Ux^;K#8oy_xey-w+nINu@%8z4p-!ghnY$V6u|GF<+fD2QU(a7;|>ov8v3V>tmr(c za(Dz+1ifk%$uY*NL>p-W5-_3z$-B!KE_xoHK*OTjI$-vgQawA~wF+I$ca0|h!TVcf zyq>0lZXwmZ_pH$iaeUjW?fLwAkgb~aHw&SvkQAzD*cx;|K6Q9`wP*-@P-!Co01O9O zZxla6wa7$-2=WTr^jolSegK8B1xbVF)J%-8o*6+)3lXCx{CZ0n04BOebH^Kkl(!0l z1V!S9c~dhX1p&?;5!L8ddgf5xgB@!fi?Cl^1sA&^uC6#6l_MRU<-|32vf}K4#=&C< zJ2cG9+B1NvkX@0|?9THAlPbi>8IcS3Ua)Zxak~3JL_x=eNX<&uD+g7ilx3jx`GPj^ ziN1@I8Gp9nIsY)E>-0#O=$cTX@GRMj?=*uG`XXj6z~;o5IK*zjZ>f{@^Tvn^L>IZl zwIm}Rt*M0Ndlvpu{kd+-t5@alt2e^yRhmQ3{Z8*aWfcCO7PvTxGy91k*oE}+A~d*m zDR>Sm*;fF;>C{WK#_ENZ%o$yQA}qqmz9e4G?MzfSi|pWIo%vq>bnQ726!Mf+@xf*9 zk=&K(&e#t$L~C3mA1Am8oGL)%)Zh=cI1QDx(%@6zZ21HU3Twmww7cXqG~A>@jjX^; zcSdI^IGVLAdWrKA_~~c6$$B9L6KAXq{YD)foMYFVFR#r!UYWAs^49Yn&S%aBRn8Qg z2KDzzONU|fA|BD&Jb85qbNJwGTqi)eQUzxbWuX;0KwLt%5`spArO1)vB!MW{iBpKz z2`pC*Bf+FImR&}ho-H6DJNfuV#%7TUJW}qxWx#}?_{!l(sPZMVGd%=W``4C}Lq86V zMPS>2o=$sOxdiN0J9GykrA-V9*s(gaATSBK zcP}^-&c=_GrP!Dh*qK)0|Dv(dxoNjytRNPtfeH0ArINb62$AcQ97>4&Np^}m*hg2( z0WM;w^!cacBXw|Ibm;CLyQeHtl%Cy@p1ial-yHs)&YCFRVb9xYl2ncNhdA-BJ!l7G z1;hcE-O1SvtyeCH|Au_@>^QUrPqFVQMUUgT{8qu8YP36Ue{bHkpH{uO)adQIOAQHC zyPDr-oPLskzXeiK@vR7OHm|-%X-P$d0y^>tuzRR}V-=E~L&Y;z}?|-k}H@)vkCpruZTNWJ-6#Ct|Yp)wW zd%gYLpUN#qG&h9dL8XT1;zX(@e41c?H`$dxEBIr;^03g3^0E4K<@1-M!FuzV$Hm!z zE^8Mv!~IoUe5uzs313E(w84zkSdeQgV`ZK06pPT4#xL>*%%^c72xIvPan2H)&%h;kfu< z36JBCf%lF<<8@b;6-;^PJanq{%v|p_f&iiwGqu*I>07Q-7Z@^mp zoFxtCM>9A%&^kJS8?@?J7u}w(BskC`av4h|9Irg0Pd~vUtPr zdgcTpW1;P1lltI^uCE4Cr@Z9`TiFo= zl)(k#O)An8;xK#8CiELyl&}mO5hvf9>+ZG4$RryadOM)C zLb^HF>ruu&)fIN zF0WEw=sh0tn-yB9fmq@0!mY(n79l2ZWyu1PQ3fZM@ES^id}*o<`M`jix^YwoG2q)q zY_}pfBli5TW4-y6aO>R}y|BZYg3DUzP6KoEj}(QDS(DwLcE81V597v5!P)*nT#cGS zOlgpOJ)v!=+eGPBp}ghJ-l?q9O9f<)hj1~AePTeP`(xs(v!t*w!fno{9Sqw3S8tPC zd-Z3y>>|h@{E{!vd^ViT*?&{;Ea~ zv%6%4F(7ibfM*lQ20Ya@CRR#b-6r^TUI!xbCQZ$@=G=dmfA7ixOw<|)43v>BQW@cv zV39IZx)xSkXj(iR7q4HA9hx08NSSk=PYUfL9OD;JBk6{DL!{STpMS^{G_Edv(Y$GB z^-&cv203yKoIy;mOD!WoZUBdngGvtV5 zN&{7)gi#ZCeZ_N!i&qve7RAl@2(9VX+VK3tkv`Y^ud(so>w7Qt1gq#Cj4{sq>y5v4 z-~D|jS*_pVL`q{^Y;-7|WyVz!ZFJ={XHNmnxM*W1*X%ark6qijr8@S$zf!ZF>H%Oc zlB~Y6k9e*!cItR7qqo<#?z+5c!55XLZcQT(!)?xC|EAz(V$kq-O;E*OT*4&K)>vCk zLaV@5saq+&tAPZDatx$IClEGwHhwX`$rF3GPNVOoYO|t>Vi{kGcd;TrF_>6B9R9t+ zqQFGQx3UWDzJ2cTmyAPqcl@T&A=nwa5m9Jlm(I=@z5vWKs22*PVG(W4Y&?K}KyH}m zcHPTkS%@5CiQkw}G5U9%dOwHm-sbSfc!iC>hQsdF7NLQWi3gp2rs6jOU(p1plG;d( zSH~h&K=mMp$O4l*yS`B3I!Yr>!FrqMH1d`+%GExn~I7?@=8MWI$j$3 zF9zK?W9Viz?8)35_N1rt9@-5`1FQ_90p1UM1Uqz1)~uce)IQWKIb4O)%tRPB!dKvi zbQF;{iL*7ka>UQ+H%+NLHY?ntn;yckFP^QC35^L=B~*SMbIY(ZegEQ@t(?y^ZHUOV zyF<1T+K6!dbKoYEP#HGYqK2T%{7Rw;9*SQ9iG~8lAg-;acv$GfD~UFNk|~-XhS^IV zl2z_1QT9!fl9P1J*a~n0afH~iMBv(W+0u@~M)U`gpCh`0JS4_96db7GeEZY}T4{Qd znhiH>Mz-KLZhf8dXFG!^(bYUsYJj`#IO11NYGc{Ll2Od^?#CO zVI5Kza*P%2-fFLD#wAQ0hpI{Mj}dYYR?)hmUd1fr!LW56jFGk@f^=>caMQJb5E{p4$68B6OmoW};Aj&LS}Twb^T+Sh?Ph`2F)p51F>>u! zeN|Y7EyEWSS00;Qs;}wR*LX}g5%sHa#;U)+dWcqDTk-ColIm>8kJ(}2u|E7c0`JI?+396O%M#14BItORHGee&8CAY{qIV_#FUVe6;**lc?`!Rev82mS02y% z=CscEgrV(*S33Zvyo^ktTyV4`;a{Bu*)skd?hdrtWW?xL8N%c>(UDf+0q=<}&)eaA zD$m$?(&NWpM2$op{jFWQds-WBkyC@A*KP-MQ~p?Ui&ogT?3fHZz2GwtFuVrk8ls)w z;)G~JjSxm;21`+|Glubk!UWtG2 zD#%BGKPxyVnc<>~u!G=Q&o*Eo&H#28QPohS$cP)YV9L@e<>3-WS0IHvvDD$%-jD-9 zxdDrD@wq=;zm!!~-B)Ul8VrzRkCdWCNnOA-cL}qo{N#4u3N!={lUzlBB(ls#ykz$h zn&j0!c@qp>2JX4o-?rK!g--NCzQ6~#$y()fIgHSroLaMUm8hA)nJW2xl-P_H8UuyL zVhZ`dlfX;R|JFTp#84yw!5QKdsso%YDuU96>LQ^4l5aL07tqB|E%ja^x}1TS+_U}* z=Ch5ps$TKm)psDji~Xzo&FwBev9YL3-Sj~-z~gyEX@Z+&$!6nBd?o`i0S%)2#o#a# z6NM1@Fx-zm1daxmGta^$_1F7@iqBUeas2m=Yw6n}cvgyEy^U*o$qG3m0Ui+@?7!g7 zV()f&xvSalj{IQ}UI+*w!O)Ycq!s`L8BRP2v{}OFob0j}-3QP75&I{V&9Xp0wgOmo z;1ojKj~f(YAh1N?YxyF1eFAHY^NYe_W5`xesPGu&W8KD@%$B-Ma9*ll1{z~9@!008 zFP)zG3rquo^+irxfAz65@cZUR#m@pqcHFFu;Yc4}E*@^+;~8=6O0yPx5`<-(EBQ3o znnCy0Mf`=Plwr0sY9LbJZy?DywuK#>(HxQ30wvacBlS6G%ng0j(#W;B!O3XvA|i1F9vB~6x6nW4ggpT z7!ZK(kA)rx`MiX&kzQl%FD?dgD3bB2v!14i_Att;?&{Me;jsFJEag%@J`v1y;88bs z=AT6xn=0!zz>6h-lkOi;;5Z(jR0%6a4Mj<@wLbL^?YAna2lA+eCSdG|=PBJO(d@I- z9b?FRSKkYFhz7!Lkq7H=eA9=DW!WR`=PCC)SJ@MdgS$FZsue2w-8Nv%t1(=P_7bMe zjK72tCs^wN?ar-lk@yY;RQ5Q?x?hO4fUMgYVT0^C?skE`rvYf3*AXVB4uCzo)Qh4! zSB(*eg@U~R`G6>Gw9TN5itp*jn_$R^tQl|$srX%W0N6OuPrUSNz^Vik>26VbNKQbP zr&o-W{C2w2-(Nzv;+Q4VE^xcI_-UOaWdQJ>oo3EuXg4~svbj=C03f?0jt+$sR3miv z!;Be2uC4=Bk$f7_eiGnXN%N;QX6cnzzvY`g-m91vXOy=!GS#WheiHbN}%|kEk`ns4Fqe*e+n4zSV%CD4L?=eYht0A zq_g#Hta8w?dym!}Ps8IWn<#({c!qeJtpL!$bgt60nee9Q4{#eoApiRtj9Q>5h|}i- zEvPa`bQ`@w=pC0msi@>}%J8L1%-3>7YQ9zZhW4*E(z~z3H~jOgADo8cAjh5T1Iv!X zG59ph4Q3Puy0T$s+7N>?NfI`Xn(?(0M4_+|Q+)$0o8N>>f#icl=XntTP*h5!Z@aOL z&c`K&78c&d{`0E#>v(G5I>YiSTQ#3`8i86Q3alO`A@=DykFc2qex;*_fj&>h;jt=b zb|9B;m^Df?aP&;A-jXdS11_%Y>(adr1-EHa#|FbiwUT74R}igFer+v(RbY- z;V0S|Mytw?e+fd&({xr9aC*&>?4{fAtvh*HaoC0#qh{8Q!MhjuW`aw%_acWMmHX)( z>h@Zl<E16RYIiHFu zhW#y-xk<@^2cxIclXg#BU$4J;-3pL*5eA(eeS(+(aPkF{Kl*SL`7I^LPCL;?hBv#l#2KFwh?tKsaZsB$mISn<|hA+`e$mTb-PO`Pwqx@Q~udq#h|#>ck@Wr!|f%>Me; z-gqj*n0C{qW^>4K-c?L4@WE^@lgFp7(*w?ot%KgXI+)+M2O* zMqi_5V1}#%X}Y9d&6kPKlUVhh0>~)L4jk+}3pRnrLId@a61tYbPvZJg=}v3lmQ22q zCvb*hCxej;CeJ5JD+(vo~NR>Z^pz8hML=w$eCW?{kDE5LsG6uEiq74J6AgffW7iR^@XL{faH1zIbT zL@I<1Q*}z!0BxW#OKvY-%qry@H0$zk3M|sCZ)1&%5u*Dx+b6ou)@9$UJ3Jh;?&aS4 z)gU9Yh7f((^qF4pV2_D&DJta4$W@TLf-Ep9n!Kl*j)hmwa%I0!$DE?^!zvF2`C((3 zv`uQ>14P-+?b7w7M@w%KwSLyq9$vjyP+oyD2!IU!2#hWuz|K^vxfwX*&l5h`@XSQ= zg90IPXjB2g!{Z#+1iT+U9s~g684b>C+`Ll^K}u7}TZ=C_^0Ig2lUsn1U*16558mB~ zXOpav&knn!B<92LIg!^^wCr8cniuY@@r5`{5YRvyiVB~n>?T43isDWTmIaO^~RQrN)D(GW1$akA)Un`fjNPQ}NvVow6;D99I7@}6(&(wBXnFnBg* zrrY=PvEVIQu3LNUehh;3$$3iy35yKk&@ie@Mc2siv#!Ilv2%KBebbw8s!!9P>4;rw z79Z1g)~O#U==l0$*Xw;}ITk)4D__rwBU%mlt59hGmq=+)x-&v&qo7M{MDqqiXc~|{ zP-QyHmuVVR?6(XpnlSS6G*)B`VfpWWanajB=0}cAg@;Mb>6B}D)XltKb>P`3{Djk#XqsMH9!g5Qz@Dm}cDv;EH;y?;Q$aMz3NC2!j_f!ig12ArXiMo5X! zknA>qN+N*ckSn}O>Aw})+UTX!#DFW8O{Z2(cnmhi7mF^6pXX9#)CjI=pY5N!sp3o* zM@Mq}e6mKq$c)==!T!t1hm7&C4VLA(+?R=R04^#@by)H#@b!M+Hbc>w1e}Q~Tys*8 zD|ejv{b_kc&m23-TM3D{nyY5y@yMW9Ke;uoSUW*a?~A{@+x-n4e}7V~aspQd)du(_ zJJ}mJitC!MdA#@6;Wq=u5eBDdc73jwp5{6he6qW#pGp3EbH>O&%{%LV|J9{+`TjjS zN^935zV zd0vOKz+R)awpTBwUs3hZWls7+*V+Addz!aDu^_noeg+<(O&}h2!6G|cTd7Gjl^L*v z?hz^>U_j>qNnZ!2D;`ccOTslV*5h9dG`7kk?*bhPwH_78JV1SilHXQQF*l7j(SlJxwo7O+rCfOvP z-E6#f=bxdw>Ngx-X&v(f+u}4lg-W}Ei3!(``(xOlg?cvZXfR9_S;8!xZ5{Q@Z;P*-oEVWqA z*dpBBhkxpZB32U`S|D29XT2|Ge$ZzoKGfV)ZruR7)qSAW0Nm}b1av*94xlPYJrWT=5XF!Wsn#(XGz`w?7p%c zDMi^G&;5#1AI&^-_7g9kr+^5;JZZfdV%mdMui1YwiK#9ter}2N&-sk@fKXtmA)#VR zD@17SKw*cBaWd6rtY%}$r@BmyqB@h)7(OtJ! zccX)10b6t$16kAH>$^)qgMg_F`3>bi+8SjLPka5L%+a`E=Y>Q(E4+cKS!TbD_2S9J zgsqspx30(%m%%c&2u$3QU~vBOHUlDIBYrBkb!JwGL5hh(QoMzjAnTnX72v+=kBbGo z+vC7-JlZy6un`x4m=sM^$5f?c_mbV8Mx=f7k9pNgpV9gAp{D18t@K~TbNIp%79L&m z8w^tMfXAoji%ElmixtSA!89J$^#HHOO0<#AU+Fh0!4Hd@d#vF|YoM&+e~kR~X~LsP zH|&{Pl5RwR)AaXCY3^SWQlur8v&6Yhu8m+lA`52BtOHMWiqf1)`9v#QNW}Zn5*eHD zf%bJ8qofdL_%St>Ec5*ABpWMo*Dnf$AB;6Fu(GHjPyC*FdToENX5{>rr$=)@*$ewU zzS6052!KvPMj!M)4>oGm|9ly_?M=pL&sxw^a|jP7Za1vZF_ zP@3|A4Z1P_&5piNMS$Z(dtuQHg{j5z8xWj|%r=`9hhjGaFz<2y-K%z2Uj}B}O;<@4 zwX2jo(t6jgaq93|-~N9!C(F~sGVSuEG}KZe?3_RvRJ>y28F>F@n~hgjTl{@k#zp^egfK=D~-@Akl}t=7M`_)smlZ-GXaI{9R~bWeqem{(+o2q_ki_pmR;EEdx7DQ`t?-DVN+2Ru6 z1fx|X6ktOCGw4i~Wd&f4bNJm$n4`cP9b7K&|8u!a5mbaxwG$E+dLjNP%p}l%m{Oto zd-$zTE=Fs4TgfI=XzMNI^4gw*Bl!87fnc!BTh*<5MUsIPChXYJ?Ynxw%WG(l zK5uQ3xowTRemR^Yq=8$8?F|t9^l1QVNE*RnTI9U{b#@f3B8{3e>_y7tKoJCKGvI>&-PB<>LWp*A{){BQOpqbYqu@!yHa(Vl z+2q~GTA>W0-7;#0MB|&V$^q#(uwJOVazIu2v&YX{rN_7cDzmi6ouU@F4b^^L%yH4A z2Kq)mC;kb!vnv3Rp&c|N47syI)JB?|C4nE(kfk85#$WVu80(Cs>?Lb<6win!5mWP9PUNmNx_@(r%dZ;2oRa~pJrKO|Q^f~!Z3Dtqg;lI%|Lt@EEJMOb)O zZU`uKXVC*MUot&;=mBeQWIp_P33Dc3%|1pvBnM!xbf)!AFd_!u;;oECz(S@usHNiR zgfYVAsas)jmU`UBGRGo6vnQ*+y?xQPskU5hds^7T`g+mMRS00cMS+*k^rcuqOt(61 za3;WxY-GnJ>R5yWSE)&Sa;(9651$sxsfA~ou&k)Iu?>@prkMqwZFj(74O1j7PljCJ zV3ca@!LIyA0!MSdp!ruh)n$vfz=bU#rWR#0xVm25isY4A-Il(>!(_Wx-3Hd?B9B+S z@;z+l>R!FJ9_uZnmCC0a&U5u7b2F^=ez>nfXts7NuQfro1xy$N-Pt z(G9;Jbw6~;_igv=+;CdPs7S{mm@Fd%;?ZtKrQLK<2#(1fj;_zhpK;>MN=^16W~ce< zU+bZYXn)P+khPo;S#5rd>Zct0}k=*0&Oh9_quI}U3gF-Nrrdk zK7X&Qil7aGyDpGcew0+!nzg&e2|H!IIn}1p^Rd~X*Od=b3RFz(OVsU^&Yf=AvaPMn9faDr z`QHHT|HOuG5W&o=q~m?p2}4Ru%06+i=yFN8uP^V><#}8*W94*kk;$gs48d4SopFVi zuY0A`$22qeV|4h&Hk{HL&4f=X(Z0X5LqT6n`WL9|R482=M1OeMofx!K;EtRK4eylp zJCnhVgnS4?@B3rkc)7Ip3>&vUpW3iNyO%c*Y=VgPKXBLPH@(>9(|oOradED{9UjZmSB1DDyTRBlf-eS{S{-Z4C}K| zo6oZYS9L8?L!glG2+hHh5}(DF{?DT||7Q;k{&x>aV+=DeA^-|Ur9w=3#svnV6Pi<} ziBP#o>=+uZ==yMk!9j73wZiasmQz#ww4Q;YKo;9lc`Lbm(Kqq>v-i%QrqWW&kA55o z@o=iVxcYGg{!YnoAu9V3Tr!2ys#i2g*@9ey0*O(3))rHnutQTvY6mMUBCJ0Tmu9vy z%0|nsUgfF&aUjiZ=ovo#g6^t6NHTVEGA_%&3|CAc#N3huBrlx8-O-}pQ+m;ts+)jvs(n>Ym{Nt zIm8e+fru$|CAIW6;$E}s3F_o4eD^+CdNr+qxT+-aLJzTc_ZNEBSTjR0_os1?9!ss$ zvU)Z_?{QUeSoymxn|PJ@5&(R)id39<$;SbE7v@ox`!b^NWm1iJtVmUyg(`x^ z{XzrqrQcBx+|xLEt6Liv)m{#!R*XJ{j1>cluHFnaqA!G1>1qCY-?96(MX9Ey!2JI> zw;sq<|2yw~)l%30tRC<^iLJPj5~w~AgUkv5A2$jD^C&B(PuPI=R69X}V&r98R#aRA zJZ2YVXA?mcSpFo@x7X5Z-HCu3ANJ#n%>qbM`*CZ@M$73TgOkFL4i$DPOaOvLTr@rG z<{yX4dMQBG#ZzhlPSaO3JGKvf2jds=3j?aXa`2|TTh2soI_2{4ko5X#aRLlQ0qxcj z=HP3%^|qxpmmx_CJf5(vS7gPt!aBe|5`3h$9!eZ^=;p;rIAbsp6Z=#0mUX!Qy|MrM JAK0a#{|B?hQvm<~ literal 0 HcmV?d00001 diff --git a/doc/themes/scikit-learn/static/img/sydney-stacked.jpeg b/doc/themes/scikit-learn/static/img/sydney-stacked.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..d35e8c724f435713b4af7c06e7a5fc38bc898ec5 GIT binary patch literal 3356 zcmbW3c{J4BAIHD5*~Tz~#`ctTA|6XwV;*CP42e{-dytsKC)w0rx$O7sDFA61(1l3e+$E+liQNI9csSEpKtQqp7zKi$KJ1PbHg=Hcaoa~v9w02l;;fT0i=YZ5`didL~Cq&5oMmEl!>~ZFk1r!O`=q z*Ew$=!ui0U%U6O!La)YLkBy5@NKDGix|w||C--(Sj4UJ8Yn_HfB zbwBTU(fjh%;Lz~MDCPYal{P&y`|0!C{K6vR>)N;P>l@6?tz9n8e*T5UY5yYo4=xmk z3k-!qpxnD$AaL+5I0_1rImjhsbb{OcqOk0tC>{~xjQh2nymA`0^gSMz-tnR3HE9^e zF4|AB{|+qr|04SZ_7B%MU;;qE;N8kGD5t`Bc8>+d$IZ>f!_UXh59fo!5duO;gaAqa z4o9MqC}EL3d-m`Pii)8{#Dqllh_E?HKoCw06vhjK@roee2$BEU*b|&<31d?Meh7$j znII?t2W;CEe=8|oF=&t`IkOD6n@KV$8r{^!rs-;HlOdHbmoldBu|byLgE13q^m~oa ztPdYIJG?G#Ab%_W4z6v3hq*J;epoNI+-*VB=bg)W5T5Jin~!0bm6jig3*D!i?KgET zq!^PPr(|64@?{T|u9iaIE~T|6cu?(e&#i?Hj8pCeFSxmo`X}oR;u%M2@rkW&Gs`B= zNZcbq-PVF`U(k73j*s3eE01e9xOYdLyZu)CbQ8K0lu!XqOJ>0tb6u{|ruA7JS94-B zsQT3rOL|FF{HZr&tMEZ<=GM6-+Gn*TG3q)O-Gtf>gqbqsYWf1U)Dl}wi=3#K*KLaQ z2jm)p-3cz0Y#&vLY0OzZUV?hNIyBXnvLitnBLR$@B&;RP4MCvpFDedk9k%kDp4{iV zzHaw5O|!bsM50K0BH^sF=hXs!PZ;m}L7dOzbtxg14#Q)w>YQ~<7xj;l{_M%b`p?lX z>{{%u-*S=+NcKv3xF{u)Dj+g-5ct|2H`%z_jnn85-NN0_QbYPUQc|kIFy(}N-JuJ! zEirHGXAc<#dVPx4HsJcEaPj4oN4rfW{n1o_GEFeHqig*l8-SM{85dao=(wGVsLW@I ziLWM8R_>8Uvp_zDN$EKDTh0M%)}I2=ET@<2urW=JSrH z!JU22s56y5*_S8qWW7S7vwI|U54(a+KHl?C`eA)b)>KYBNkK3r-E|;zTHn(9ZT`^l zYYI`{MPl#6{K8&uULajq7Kb*zd%Rsc?G}xLV9zdYQKDOUEA-CFeOOpe?aP0x+xv#w z+G4Xg;{0k%Z9n?4-(^1vVqbnsB;93hRqD+&GAyk%xG-us6Bw{qBgVH0(!n9^GX}LY z;8!r6dB*k(F``xxpVOqz|sZ&uYGlDz|`TF#c zo!BblwS|q^rF2teQ(2_ZnN(Amq6~YkGq&OmMo=Uoqa)L|BN$eQ}8VF<7yQNEy2ke5>7P2otL+8!~Go@2_2VXjc83+*5$$*vS}N zCJFlB1Ro5O(2mSY1KrvigCW^ihx4?8q;1tidu-M1T;NE9l7%h5zPjkS&Me5 z!C#O=KWs0ZduoO9Fg;?ERK!>CCush_2mS$gh^H0ZJHCEf*t`*) z=a+0O=D}|tssBjG^59`iqI@P^K5wLmj>{H_RQ$PRAok!kY*f?ARlK37G~ztb@(b!* zf1vXHg!*%rxOUF0&M}^wjk?Mh$W8<_4fw8)*Y)6Z`VQWcdqssX29J412A}7~!#Yw3TheP_45lsRPWz~|8C2TpH7*1I~j#$OW+mdxR+q_CVu zw8+u-L|Do*l@q?UPncfe(t`~@@T+%?3O|@%B$mNe;fGP?RaYK-OZVg3G`w0eERn;K zr{3JJJlD{iKk9=?8Ms+nwmsjAQ&YOs{`|2gb_QZS#IokCYkacvt|j{L?@6w=6Ee;7 zjvgw0#j{x?W;@wkVr@fcgAVy)1FM+v&xw`i8O-1l{SVT-_7%x?zpXL(SSF_AMQo0V zl8{q{H-ru>tv#Kg}Nn3rsccx4eG~9NNJTbp@H`DR_l_rAw1(o|vEjQD2Uf(o7sd6{_eZYrE z!^5&I2|@Z#a1!GQ-fenvPT~{t*%Nj_GADx3Z^T^^Pj)Jnewl3-t?}kO<7(HbD2Cmm zLT2!pha++KMXpww{I*Q|jSKwdxCwaWk{EiR|L#Q!p$+zkVURMT93jYDa|JUp-~3@V zX`4x?A1^DCuD3nx<^H4FBUjPlZcJ|h_hO@~lAreJ*vo3m$DFqtO*2}{HKf0W-9@Dy zOnLk)4)L<4atEbi_>~AK)D7rLMNYa3PT>-^Hg1rlaL=|a*4K_P``pX4Gb*s6gW~R| zH{aAGkL~|xZqb``Dc`e7;Xb*r8mj^;DOQK~TM!TI!M<$Zy*&vL_sqwy5J2pqE${W% zaJ8A7X9(Lq8)o?Q5oXDSCOqxd!@(7mm1S}!9z8k@r$3QNOp?nXi1-n6`KO=Ct*5?g zcxt(Rlu;W0sp&BVd%BEi7p~>;uqiZe@%wTT>%LLX+)4m<8QIuvQzD4GH8t5^8GMEf zq=mG9DcGrx?4_qx!kW64m6oSl;v}!}jGE^Sp3b(Aw%cbnu?h7A0;#PE>qN%$$u8^2 zEmBIv)PSfnGjsHdZAMT?5dIdv>EnRBp~Vk(m&INaY!cUMZVe(FAIXJ? z$Zy%4-~HxX*QCJrf%uQ`_EFYpKdl$ZEUy_~obY0!9#+V;tF+L{zYrF5tgZ{HoFnc3 zXE#3*c8@AWLqGMNz7vtqF0eF{x9XH-Js9RzA~wJ}run_uWG|Hrb$0Ui%x63*hsM5m z>&ONsZtQTgDkdnb7|EUHv<1U`yo`zoe;fgKeJl23O!;}LK$>S&irMtu{Z3Cp_JvF# zxu~U>#y0Jk Date: Tue, 1 Aug 2017 02:15:05 -0700 Subject: [PATCH 60/86] [MRG+1] Added examples to docstrings of ElasticNet and ElasticNetCV (#9383) --- sklearn/linear_model/coordinate_descent.py | 39 ++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index a1a034cb9eb72..e03aece7f2762 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -614,6 +614,25 @@ class ElasticNet(LinearModel, RegressorMixin): number of iterations run by the coordinate descent solver to reach the specified tolerance. + Examples + -------- + >>> from sklearn.linear_model import ElasticNet + >>> from sklearn.datasets import make_regression + >>> + >>> X, y = make_regression(n_features=2, random_state=0) + >>> regr = ElasticNet(random_state=0) + >>> regr.fit(X, y) + ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5, + max_iter=1000, normalize=False, positive=False, precompute=False, + random_state=0, selection='cyclic', tol=0.0001, warm_start=False) + >>> print(regr.coef_) # doctest: +ELLIPSIS + [ 18.83816048 64.55968825] + >>> print(regr.intercept_) # doctest: +ELLIPSIS + 1.45126075617 + >>> print(regr.predict([[0, 0]])) # doctest: +ELLIPSIS + [ 1.45126076] + + Notes ----- To avoid unnecessary memory duplication the X argument of the fit method @@ -1486,6 +1505,26 @@ class ElasticNetCV(LinearModelCV, RegressorMixin): number of iterations run by the coordinate descent solver to reach the specified tolerance for the optimal alpha. + Examples + -------- + >>> from sklearn.linear_model import ElasticNetCV + >>> from sklearn.datasets import make_regression + >>> + >>> X, y = make_regression(n_features=2, random_state=0) + >>> regr = ElasticNetCV(cv=5, random_state=0) + >>> regr.fit(X, y) + ElasticNetCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=True, + l1_ratio=0.5, max_iter=1000, n_alphas=100, n_jobs=1, + normalize=False, positive=False, precompute='auto', random_state=0, + selection='cyclic', tol=0.0001, verbose=0) + >>> print(regr.alpha_) # doctest: +ELLIPSIS + 0.19947279427 + >>> print(regr.intercept_) # doctest: +ELLIPSIS + 0.398882965428 + >>> print(regr.predict([[0, 0]])) # doctest: +ELLIPSIS + [ 0.39888297] + + Notes ----- For an example, see From d012f05145191a616efa54a8a2796ca776c102d9 Mon Sep 17 00:00:00 2001 From: Vathsala Achar Date: Tue, 1 Aug 2017 20:11:48 +0100 Subject: [PATCH 61/86] [MRG+1] DOC Simplifying margin plotting in SVM examples (#8501) (#8875) * Simplifying margin plotting in SVM examples (#8501) * updated to use contour levels on decision function * separating unbalanced class now uses a red line to show the change in the decision boundary when the classes are weighted * corrected the target variable from Y to y * DOC Updates to SVM examples * Fixing flake8 issues * Altered make_blobs to move clusters to corners and be more compact * Reverted changes converting Y to y * Fixes for flake8 errors --- examples/svm/plot_separating_hyperplane.py | 54 +++++++++---------- .../plot_separating_hyperplane_unbalanced.py | 42 +++++++++------ 2 files changed, 51 insertions(+), 45 deletions(-) diff --git a/examples/svm/plot_separating_hyperplane.py b/examples/svm/plot_separating_hyperplane.py index ff6f3fc8f31ad..fafadb2d381d0 100644 --- a/examples/svm/plot_separating_hyperplane.py +++ b/examples/svm/plot_separating_hyperplane.py @@ -12,37 +12,33 @@ import numpy as np import matplotlib.pyplot as plt from sklearn import svm +from sklearn.datasets import make_blobs + # we create 40 separable points -np.random.seed(0) -X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]] -Y = [0] * 20 + [1] * 20 +X, y = make_blobs(n_samples=40, centers=2, random_state=12, cluster_std=0.35) # fit the model clf = svm.SVC(kernel='linear') -clf.fit(X, Y) - -# get the separating hyperplane -w = clf.coef_[0] -a = -w[0] / w[1] -xx = np.linspace(-5, 5) -yy = a * xx - (clf.intercept_[0]) / w[1] - -# plot the parallels to the separating hyperplane that pass through the -# support vectors -b = clf.support_vectors_[0] -yy_down = a * xx + (b[1] - a * b[0]) -b = clf.support_vectors_[-1] -yy_up = a * xx + (b[1] - a * b[0]) - -# plot the line, the points, and the nearest vectors to the plane -plt.plot(xx, yy, 'k-') -plt.plot(xx, yy_down, 'k--') -plt.plot(xx, yy_up, 'k--') - -plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], - s=80, facecolors='none') -plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired) - -plt.axis('tight') -plt.show() +clf.fit(X, y) + +plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired) + +# plot the decision function +ax = plt.gca() +xlim = ax.get_xlim() +ylim = ax.get_ylim() + +# create grid to evaluate model +xx = np.linspace(xlim[0], xlim[1], 30) +yy = np.linspace(ylim[0], ylim[1], 30) +YY, XX = np.meshgrid(yy, xx) +xy = np.vstack([XX.ravel(), YY.ravel()]).T +Z = clf.decision_function(xy).reshape(XX.shape) + +# plot decision boundary and margins +ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, + linestyles=['--', '-', '--']) +# plot support vectors +ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100, + linewidth=1, facecolors='none') diff --git a/examples/svm/plot_separating_hyperplane_unbalanced.py b/examples/svm/plot_separating_hyperplane_unbalanced.py index 438291dc5538d..cf3130a6ae5c5 100644 --- a/examples/svm/plot_separating_hyperplane_unbalanced.py +++ b/examples/svm/plot_separating_hyperplane_unbalanced.py @@ -29,7 +29,6 @@ import numpy as np import matplotlib.pyplot as plt from sklearn import svm -#from sklearn.linear_model import SGDClassifier # we create 40 separable points rng = np.random.RandomState(0) @@ -43,25 +42,36 @@ clf = svm.SVC(kernel='linear', C=1.0) clf.fit(X, y) -w = clf.coef_[0] -a = -w[0] / w[1] -xx = np.linspace(-5, 5) -yy = a * xx - clf.intercept_[0] / w[1] - - -# get the separating hyperplane using weighted classes +# fit the model and get the separating hyperplane using weighted classes wclf = svm.SVC(kernel='linear', class_weight={1: 10}) wclf.fit(X, y) -ww = wclf.coef_[0] -wa = -ww[0] / ww[1] -wyy = wa * xx - wclf.intercept_[0] / ww[1] - # plot separating hyperplanes and samples -h0 = plt.plot(xx, yy, 'k-', label='no weights') -h1 = plt.plot(xx, wyy, 'k--', label='with weights') plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k') plt.legend() -plt.axis('tight') -plt.show() +# plot the decision functions for both classifiers +ax = plt.gca() +xlim = ax.get_xlim() +ylim = ax.get_ylim() + +# create grid to evaluate model +xx = np.linspace(xlim[0], xlim[1], 30) +yy = np.linspace(ylim[0], ylim[1], 30) +YY, XX = np.meshgrid(yy, xx) +xy = np.vstack([XX.ravel(), YY.ravel()]).T + +# get the separating hyperplane +Z = clf.decision_function(xy).reshape(XX.shape) + +# plot decision boundary and margins +a = ax.contour(XX, YY, Z, colors='k', levels=[0], alpha=0.5, linestyles=['-']) + +# get the separating hyperplane for weighted classes +Z = wclf.decision_function(xy).reshape(XX.shape) + +# plot decision boundary and margins for weighted classes +b = ax.contour(XX, YY, Z, colors='r', levels=[0], alpha=0.5, linestyles=['-']) + +plt.legend([a.collections[0], b.collections[0]], ["non weighted", "weighted"], + loc="upper right") From 2edb5cfc37ba3165f9987d45bc3e405d46cbb09c Mon Sep 17 00:00:00 2001 From: JC Liu Date: Wed, 2 Aug 2017 04:42:15 +0800 Subject: [PATCH 62/86] [MRG+1] Issue#7998 : Consistent parameters between QDA and LDA (#8130) * for #7998 * Fix some style error and add test * Add local variable store_covariance * better deprecation * fix bug * Style check * fix covariance_ * style check * Update * modify test * Formating * update * Update * Add whats_new.rst * Revert "Add whats_new.rst" This reverts commit 4e5977d5cdb20fca7ed683e2bf093037cba75005. * whats_new * Update for FutureWarning * Remove warning from the setter * add fit in test * drop back * Quick fix * Small fix * Fix * update new * Fix space * Fix docstring * fix style * Fix * fix assert --- doc/whats_new.rst | 7 +++ sklearn/discriminant_analysis.py | 41 +++++++++----- sklearn/tests/test_discriminant_analysis.py | 60 ++++++++++++++++++--- 3 files changed, 90 insertions(+), 18 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index b4b611a6f84ea..489b1040b37dc 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -796,6 +796,13 @@ Miscellaneous :mod:`utils` have been removed or deprecated accordingly. :issue:`8854` and :issue:`8874` by :user:`Naoya Kanai ` +- The ``store_covariances`` and ``covariances_`` parameters of + :class:`discriminant_analysis.QuadraticDiscriminantAnalysis` + has been renamed to ``store_covariance`` and ``covariance_`` to be + consistent with the corresponding parameter names of the + :class:`discriminant_analysis.LinearDiscriminantAnalysis`. They will be + removed in version 0.21. :issue:`7998` by :user:`Jiacheng ` + Removed in 0.19: - ``utils.fixes.argpartition`` diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 8506d35a76c9a..e26ca771eb512 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -11,8 +11,8 @@ from __future__ import print_function import warnings - import numpy as np +from .utils import deprecated from scipy import linalg from .externals.six import string_types from .externals.six.moves import xrange @@ -170,7 +170,8 @@ class LinearDiscriminantAnalysis(BaseEstimator, LinearClassifierMixin, Number of components (< n_classes - 1) for dimensionality reduction. store_covariance : bool, optional - Additionally compute class covariance matrix (default False). + Additionally compute class covariance matrix (default False), used + only in 'svd' solver. .. versionadded:: 0.17 @@ -245,6 +246,7 @@ class LinearDiscriminantAnalysis(BaseEstimator, LinearClassifierMixin, >>> print(clf.predict([[-0.8, -1]])) [1] """ + def __init__(self, solver='svd', shrinkage=None, priors=None, n_components=None, store_covariance=False, tol=1e-4): self.solver = solver @@ -554,7 +556,7 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin): Regularizes the covariance estimate as ``(1-reg_param)*Sigma + reg_param*np.eye(n_features)`` - store_covariances : boolean + store_covariance : boolean If True the covariance matrices are computed and stored in the `self.covariances_` attribute. @@ -567,7 +569,7 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin): Attributes ---------- - covariances_ : list of array-like, shape = [n_features, n_features] + covariance_ : list of array-like, shape = [n_features, n_features] Covariance matrices of each class. means_ : array-like, shape = [n_classes, n_features] @@ -597,7 +599,8 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin): >>> clf.fit(X, y) ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE QuadraticDiscriminantAnalysis(priors=None, reg_param=0.0, - store_covariances=False, tol=0.0001) + store_covariance=False, + store_covariances=None, tol=0.0001) >>> print(clf.predict([[-0.8, -1]])) [1] @@ -607,21 +610,30 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin): Discriminant Analysis """ - def __init__(self, priors=None, reg_param=0., store_covariances=False, - tol=1.0e-4): + def __init__(self, priors=None, reg_param=0., store_covariance=False, + tol=1.0e-4, store_covariances=None): self.priors = np.asarray(priors) if priors is not None else None self.reg_param = reg_param self.store_covariances = store_covariances + self.store_covariance = store_covariance self.tol = tol + @property + @deprecated("Attribute covariances_ was deprecated in version" + " 0.19 and will be removed in 0.21. Use " + "covariance_ instead") + def covariances_(self): + return self.covariance_ + def fit(self, X, y): """Fit the model according to the given training data and parameters. .. versionchanged:: 0.19 - *store_covariance* has been moved to main constructor. + ``store_covariances`` has been moved to main constructor as + ``store_covariance`` .. versionchanged:: 0.19 - *tol* has been moved to main constructor. + ``tol`` has been moved to main constructor. Parameters ---------- @@ -645,7 +657,12 @@ def fit(self, X, y): self.priors_ = self.priors cov = None + store_covariance = self.store_covariance or self.store_covariances if self.store_covariances: + warnings.warn("'store_covariances' was renamed to store_covariance" + " in version 0.19 and will be removed in 0.21.", + DeprecationWarning) + if store_covariance: cov = [] means = [] scalings = [] @@ -665,13 +682,13 @@ def fit(self, X, y): warnings.warn("Variables are collinear") S2 = (S ** 2) / (len(Xg) - 1) S2 = ((1 - self.reg_param) * S2) + self.reg_param - if self.store_covariances: + if self.store_covariance or store_covariance: # cov = V * (S^2 / (n-1)) * V.T cov.append(np.dot(S2 * Vt.T, Vt)) scalings.append(S2) rotations.append(Vt.T) - if self.store_covariances: - self.covariances_ = cov + if self.store_covariance or store_covariance: + self.covariance_ = cov self.means_ = np.asarray(means) self.scalings_ = scalings self.rotations_ = rotations diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index a7a878a73160e..8eb5da1908ba7 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -5,9 +5,11 @@ from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_true +from sklearn.utils.testing import assert_false from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import assert_warns +from sklearn.utils.testing import assert_warns_message from sklearn.utils.testing import assert_greater from sklearn.utils.testing import ignore_warnings @@ -223,6 +225,38 @@ def test_lda_scaling(): 'using covariance: %s' % solver) +def test_lda_store_covariance(): + # Test for slover 'lsqr' and 'eigen' + # 'store_covariance' has no effect on 'lsqr' and 'eigen' solvers + for solver in ('lsqr', 'eigen'): + clf = LinearDiscriminantAnalysis(solver=solver).fit(X6, y6) + assert_true(hasattr(clf, 'covariance_')) + + # Test the actual attribute: + clf = LinearDiscriminantAnalysis(solver=solver, + store_covariance=True).fit(X6, y6) + assert_true(hasattr(clf, 'covariance_')) + + assert_array_almost_equal( + clf.covariance_, + np.array([[0.422222, 0.088889], [0.088889, 0.533333]]) + ) + + # Test for SVD slover, the default is to not set the covariances_ attribute + clf = LinearDiscriminantAnalysis(solver='svd').fit(X6, y6) + assert_false(hasattr(clf, 'covariance_')) + + # Test the actual attribute: + clf = LinearDiscriminantAnalysis(solver=solver, + store_covariance=True).fit(X6, y6) + assert_true(hasattr(clf, 'covariance_')) + + assert_array_almost_equal( + clf.covariance_, + np.array([[0.422222, 0.088889], [0.088889, 0.533333]]) + ) + + def test_qda(): # QDA classification. # This checks that QDA implements fit and predict and returns @@ -262,26 +296,40 @@ def test_qda_priors(): assert_greater(n_pos2, n_pos) -def test_qda_store_covariances(): +def test_qda_store_covariance(): # The default is to not set the covariances_ attribute clf = QuadraticDiscriminantAnalysis().fit(X6, y6) - assert_true(not hasattr(clf, 'covariances_')) + assert_false(hasattr(clf, 'covariance_')) # Test the actual attribute: - clf = QuadraticDiscriminantAnalysis(store_covariances=True).fit(X6, y6) - assert_true(hasattr(clf, 'covariances_')) + clf = QuadraticDiscriminantAnalysis(store_covariance=True).fit(X6, y6) + assert_true(hasattr(clf, 'covariance_')) assert_array_almost_equal( - clf.covariances_[0], + clf.covariance_[0], np.array([[0.7, 0.45], [0.45, 0.7]]) ) assert_array_almost_equal( - clf.covariances_[1], + clf.covariance_[1], np.array([[0.33333333, -0.33333333], [-0.33333333, 0.66666667]]) ) +def test_qda_deprecation(): + # Test the deprecation + clf = QuadraticDiscriminantAnalysis(store_covariances=True) + assert_warns_message(DeprecationWarning, "'store_covariances' was renamed" + " to store_covariance in version 0.19 and will be " + "removed in 0.21.", clf.fit, X, y) + + # check that covariance_ (and covariances_ with warning) is stored + assert_warns_message(DeprecationWarning, "Attribute covariances_ was " + "deprecated in version 0.19 and will be removed " + "in 0.21. Use covariance_ instead", getattr, clf, + 'covariances_') + + def test_qda_regularization(): # the default is reg_param=0. and will cause issues # when there is a constant variable From e14e313a69fd03af555421cc764ce952749612ad Mon Sep 17 00:00:00 2001 From: Taehoon Lee Date: Wed, 2 Aug 2017 13:02:59 +0900 Subject: [PATCH 63/86] Fix typos (#9476) --- sklearn/ensemble/gradient_boosting.py | 2 +- sklearn/ensemble/tests/test_base.py | 2 +- sklearn/linear_model/tests/test_logistic.py | 4 ++-- sklearn/metrics/ranking.py | 2 +- sklearn/mixture/dpgmm.py | 2 +- sklearn/multioutput.py | 2 +- sklearn/utils/random.py | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index e725d2e6ebe81..a37377fe7bde8 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -448,7 +448,7 @@ class ClassificationLossFunction(six.with_metaclass(ABCMeta, LossFunction)): def _score_to_proba(self, score): """Template method to convert scores to probabilities. - the does not support probabilites raises AttributeError. + the does not support probabilities raises AttributeError. """ raise TypeError('%s does not support predict_proba' % type(self).__name__) diff --git a/sklearn/ensemble/tests/test_base.py b/sklearn/ensemble/tests/test_base.py index 65ea8b62a2927..f2a87d8fb559f 100644 --- a/sklearn/ensemble/tests/test_base.py +++ b/sklearn/ensemble/tests/test_base.py @@ -109,7 +109,7 @@ def make_steps(): assert_not_equal(est1.get_params()['sel__estimator__random_state'], est1.get_params()['clf__random_state']) - # ensure multiple random_state paramaters are invariant to get_params() + # ensure multiple random_state parameters are invariant to get_params() # iteration order class AlphaParamPipeline(Pipeline): diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 6a7f717946481..031520362a528 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -986,7 +986,7 @@ def test_logreg_predict_proba_multinomial(): X, y = make_classification(n_samples=10, n_features=20, random_state=0, n_classes=3, n_informative=10) - # Predicted probabilites using the true-entropy loss should give a + # Predicted probabilities using the true-entropy loss should give a # smaller loss than those using the ovr method. clf_multi = LogisticRegression(multi_class="multinomial", solver="lbfgs") clf_multi.fit(X, y) @@ -996,7 +996,7 @@ def test_logreg_predict_proba_multinomial(): clf_ovr_loss = log_loss(y, clf_ovr.predict_proba(X)) assert_greater(clf_ovr_loss, clf_multi_loss) - # Predicted probabilites using the soft-max function should give a + # Predicted probabilities using the soft-max function should give a # smaller loss than those using the logistic function. clf_multi_loss = log_loss(y, clf_multi.predict_proba(X)) clf_wrong_loss = log_loss(y, clf_multi._predict_proba_lr(X)) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 2b54896781929..2003ed8b314c8 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -840,7 +840,7 @@ def ndcg_score(y_true, y_score, k=5): """ y_score, y_true = check_X_y(y_score, y_true) - # Make sure we use all the labels (max between the lenght and the higher + # Make sure we use all the labels (max between the length and the higher # number in the array) lb = LabelBinarizer() lb.fit(np.arange(max(np.max(y_true) + 1, len(y_true)))) diff --git a/sklearn/mixture/dpgmm.py b/sklearn/mixture/dpgmm.py index 75b0b88e9b4cf..c2fd42ab45842 100644 --- a/sklearn/mixture/dpgmm.py +++ b/sklearn/mixture/dpgmm.py @@ -47,7 +47,7 @@ def gammaln(x): @deprecated("The function log_normalize is deprecated in 0.18 and " "will be removed in 0.20.") def log_normalize(v, axis=0): - """Normalized probabilities from unnormalized log-probabilites""" + """Normalized probabilities from unnormalized log-probabilities""" v = np.rollaxis(v, axis) v = v.copy() v -= v.max(axis=0) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index a84a6ce36b218..d350b1bd6dc26 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -316,7 +316,7 @@ def __init__(self, estimator, n_jobs=1): def predict_proba(self, X): """Probability estimates. - Returns prediction probabilites for each class of each output. + Returns prediction probabilities for each class of each output. Parameters ---------- diff --git a/sklearn/utils/random.py b/sklearn/utils/random.py index 93235f07b467e..044b8c70d8b71 100644 --- a/sklearn/utils/random.py +++ b/sklearn/utils/random.py @@ -184,7 +184,7 @@ def random_choice_csc(n_samples, classes, class_probability=None, random_state=random_state) indices.extend(ind_sample) - # Normalize probabilites for the nonzero elements + # Normalize probabilities for the nonzero elements classes_j_nonzero = classes[j] != 0 class_probability_nz = class_prob_j[classes_j_nonzero] class_probability_nz_norm = (class_probability_nz / From 81e359e4ef44ecf7b32b13222981ef3e7c2c153e Mon Sep 17 00:00:00 2001 From: Sri Krishna Date: Wed, 2 Aug 2017 10:48:52 +0530 Subject: [PATCH 64/86] DOC Update classification.py (#9478) fixes doc formatting. --- sklearn/metrics/classification.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 09aa4d87b8e21..395725c00d7d9 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -167,6 +167,7 @@ def accuracy_score(y_true, y_pred, normalize=True, sample_weight=None): 2 In the multilabel case with binary label indicators: + >>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2))) 0.5 """ From d54815c91b61659561b54778ec11456c247f5002 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Wed, 2 Aug 2017 15:47:40 +1000 Subject: [PATCH 65/86] PEP8 fix blank line contains whitespace --- sklearn/metrics/classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 395725c00d7d9..be71d2eb84a20 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -167,7 +167,7 @@ def accuracy_score(y_true, y_pred, normalize=True, sample_weight=None): 2 In the multilabel case with binary label indicators: - + >>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2))) 0.5 """ From c815caf1ba95e3b36fea1ffb776205696eb17f60 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sun, 6 Aug 2017 14:21:51 +1000 Subject: [PATCH 66/86] DOC Release date --- doc/whats_new.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 489b1040b37dc..7ab815d28e3ed 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -1,6 +1,5 @@ .. currentmodule:: sklearn - =============== Release history =============== @@ -8,6 +7,8 @@ Release history Version 0.19 ============ +**August 7, 2017** + Highlights ---------- From 96f08570eda794d140c5afa15262a320717fbab0 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sun, 6 Aug 2017 14:31:51 +1000 Subject: [PATCH 67/86] Update version --- sklearn/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/__init__.py b/sklearn/__init__.py index e74466efd8a95..d29d5f81156c1 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -114,7 +114,7 @@ def config_context(**new_config): # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = '0.19.dev0' +__version__ = '0.19.0' try: From dc9ab8028bd52c4492c371ddeef8ea225f5ebaa1 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sun, 6 Aug 2017 21:28:53 +1000 Subject: [PATCH 68/86] DOC fix merge errors in what's new --- doc/whats_new.rst | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index cdc8ecf4fa0e4..0731a359ceb69 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -172,13 +172,11 @@ Trees and ensembles - :func:`tree.export_graphviz` now shows configurable number of decimal places. :issue:`8698` by :user:`Guillaume Lemaitre `. - - :func:`tree.export_graphviz` now shows configurable number of decimal - places. :issue:`8698` by :user:`Guillaume Lemaitre `. - - Added ``flatten_transform`` parameter to :class:`ensemble.VotingClassifier` - to change output shape of `transform` method to 2 dimensional. - :issue:`7794` by :user:`Ibraim Ganiev ` and - :user:`Herilalaina Rakotoarison `. +- Added ``flatten_transform`` parameter to :class:`ensemble.VotingClassifier` + to change output shape of `transform` method to 2 dimensional. + :issue:`7794` by :user:`Ibraim Ganiev ` and + :user:`Herilalaina Rakotoarison `. Linear, kernelized and related models @@ -323,9 +321,6 @@ Model evaluation and meta-estimators - :class:`multioutput.MultiOutputRegressor` and :class:`multioutput.MultiOutputClassifier` now support online learning using ``partial_fit``. :issue: `8053` by :user:`Peng Yu `. - - :class:`multioutput.MultiOutputRegressor` and :class:`multioutput.MultiOutputClassifier` - now support online learning using ``partial_fit``. - :issue:`8053` by :user:`Peng Yu `. - Add ``max_train_size`` parameter to :class:`model_selection.TimeSeriesSplit` :issue:`8282` by :user:`Aman Dalmia `. @@ -4372,7 +4367,6 @@ Highlights by `Andreas Müller`_. - Randomized sparse linear models for feature -- :ref:`randomized_l1`: Randomized sparse linear models for feature selection, by `Alexandre Gramfort`_ and `Gael Varoquaux`_ - :ref:`label_propagation` for semi-supervised learning, by Clay @@ -4833,7 +4827,6 @@ Changelog `Mathieu Blondel`_ and `Lars Buitinck`_ - Documentation improvements: thumbnails in - example gallery by `Fabian Pedregosa`_. :ref:`example gallery ` by `Fabian Pedregosa`_. - Important bugfixes in :ref:`svm` module (segfaults, bad From e5b892ebb7fcc94e04c65083720d0c9003b12c3c Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay Date: Mon, 7 Aug 2017 01:12:44 +0200 Subject: [PATCH 69/86] FIX Convergence warning and n_iter_ in LabelPropagation (#5893) --- sklearn/semi_supervised/label_propagation.py | 43 ++++++++++--------- .../tests/test_label_propagation.py | 25 ++++++++++- 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index c690ac1f151f4..10eebba86f04e 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -34,8 +34,8 @@ >>> from sklearn.semi_supervised import LabelPropagation >>> label_prop_model = LabelPropagation() >>> iris = datasets.load_iris() ->>> random_unlabeled_points = np.where(np.random.randint(0, 2, -... size=len(iris.target))) +>>> rng = np.random.RandomState(42) +>>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3 >>> labels = np.copy(iris.target) >>> labels[random_unlabeled_points] = -1 >>> label_prop_model.fit(iris.data, labels) @@ -53,6 +53,7 @@ """ # Authors: Clay Woolam +# Utkarsh Upadhyay # License: BSD from abc import ABCMeta, abstractmethod @@ -67,13 +68,7 @@ from ..utils.extmath import safe_sparse_dot from ..utils.multiclass import check_classification_targets from ..utils.validation import check_X_y, check_is_fitted, check_array - - -# Helper functions - -def _not_converged(y_truth, y_prediction, tol=1e-3): - """basic convergence check""" - return np.abs(y_truth - y_prediction).sum() > tol +from ..exceptions import ConvergenceWarning class BaseLabelPropagation(six.with_metaclass(ABCMeta, BaseEstimator, @@ -97,7 +92,7 @@ class BaseLabelPropagation(six.with_metaclass(ABCMeta, BaseEstimator, alpha : float Clamping factor - max_iter : float + max_iter : integer Change maximum number of iterations allowed tol : float @@ -264,12 +259,14 @@ def fit(self, X, y): l_previous = np.zeros((self.X_.shape[0], n_classes)) - remaining_iter = self.max_iter unlabeled = unlabeled[:, np.newaxis] if sparse.isspmatrix(graph_matrix): graph_matrix = graph_matrix.tocsr() - while (_not_converged(self.label_distributions_, l_previous, self.tol) - and remaining_iter > 1): + + for self.n_iter_ in range(self.max_iter): + if np.abs(self.label_distributions_ - l_previous).sum() < self.tol: + break + l_previous = self.label_distributions_ self.label_distributions_ = safe_sparse_dot( graph_matrix, self.label_distributions_) @@ -285,7 +282,12 @@ def fit(self, X, y): # clamp self.label_distributions_ = np.multiply( alpha, self.label_distributions_) + y_static - remaining_iter -= 1 + else: + warnings.warn( + 'max_iter=%d was reached without convergence.' % self.max_iter, + category=ConvergenceWarning + ) + self.n_iter_ += 1 normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis] self.label_distributions_ /= normalizer @@ -294,7 +296,6 @@ def fit(self, X, y): transduction = self.classes_[np.argmax(self.label_distributions_, axis=1)] self.transduction_ = transduction.ravel() - self.n_iter_ = self.max_iter - remaining_iter return self @@ -324,7 +325,7 @@ class LabelPropagation(BaseLabelPropagation): This parameter will be removed in 0.21. 'alpha' is fixed to zero in 'LabelPropagation'. - max_iter : float + max_iter : integer Change maximum number of iterations allowed tol : float @@ -358,8 +359,8 @@ class LabelPropagation(BaseLabelPropagation): >>> from sklearn.semi_supervised import LabelPropagation >>> label_prop_model = LabelPropagation() >>> iris = datasets.load_iris() - >>> random_unlabeled_points = np.where(np.random.randint(0, 2, - ... size=len(iris.target))) + >>> rng = np.random.RandomState(42) + >>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3 >>> labels = np.copy(iris.target) >>> labels[random_unlabeled_points] = -1 >>> label_prop_model.fit(iris.data, labels) @@ -441,7 +442,7 @@ class LabelSpreading(BaseLabelPropagation): alpha=0 means keeping the initial label information; alpha=1 means replacing all initial information. - max_iter : float + max_iter : integer maximum number of iterations allowed tol : float @@ -475,8 +476,8 @@ class LabelSpreading(BaseLabelPropagation): >>> from sklearn.semi_supervised import LabelSpreading >>> label_prop_model = LabelSpreading() >>> iris = datasets.load_iris() - >>> random_unlabeled_points = np.where(np.random.randint(0, 2, - ... size=len(iris.target))) + >>> rng = np.random.RandomState(42) + >>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3 >>> labels = np.copy(iris.target) >>> labels[random_unlabeled_points] = -1 >>> label_prop_model.fit(iris.data, labels) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 3d5bd21a89110..8cd0cce41d7e9 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -9,6 +9,7 @@ from sklearn.semi_supervised import label_propagation from sklearn.metrics.pairwise import rbf_kernel from sklearn.datasets import make_classification +from sklearn.exceptions import ConvergenceWarning from numpy.testing import assert_array_almost_equal from numpy.testing import assert_array_equal @@ -70,7 +71,7 @@ def test_alpha_deprecation(): y[::3] = -1 lp_default = label_propagation.LabelPropagation(kernel='rbf', gamma=0.1) - lp_default_y = assert_no_warnings(lp_default.fit, X, y).transduction_ + lp_default_y = lp_default.fit(X, y).transduction_ lp_0 = label_propagation.LabelPropagation(alpha=0, kernel='rbf', gamma=0.1) lp_0_y = assert_warns(DeprecationWarning, lp_0.fit, X, y).transduction_ @@ -108,7 +109,8 @@ def test_label_propagation_closed_form(): labelled_idx = (Y[:, (-1,)] == 0).nonzero()[0] clf = label_propagation.LabelPropagation(max_iter=10000, - gamma=0.1).fit(X, y) + gamma=0.1) + clf.fit(X, y) # adopting notation from Zhu et al 2002 T_bar = clf._build_graph() Tuu = T_bar[np.meshgrid(unlabelled_idx, unlabelled_idx, indexing='ij')] @@ -145,3 +147,22 @@ def test_convergence_speed(): # this should converge quickly: assert mdl.n_iter_ < 10 assert_array_equal(mdl.predict(X), [0, 1, 1]) + + +def test_convergence_warning(): + # This is a non-regression test for #5774 + X = np.array([[1., 0.], [0., 1.], [1., 2.5]]) + y = np.array([0, 1, -1]) + mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=1) + assert_warns(ConvergenceWarning, mdl.fit, X, y) + assert_equal(mdl.n_iter_, mdl.max_iter) + + mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=1) + assert_warns(ConvergenceWarning, mdl.fit, X, y) + assert_equal(mdl.n_iter_, mdl.max_iter) + + mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=500) + assert_no_warnings(mdl.fit, X, y) + + mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=500) + assert_no_warnings(mdl.fit, X, y) From affcff41edb915f9519306fb6ce17de5322c89eb Mon Sep 17 00:00:00 2001 From: tobycheese Date: Mon, 7 Aug 2017 00:48:07 +0200 Subject: [PATCH 70/86] DOC remove unnecessary line (#9504) --- examples/cluster/plot_cluster_iris.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py index 8b4a24af021e8..e0f39c86b371c 100755 --- a/examples/cluster/plot_cluster_iris.py +++ b/examples/cluster/plot_cluster_iris.py @@ -34,7 +34,6 @@ np.random.seed(5) -centers = [[1, 1], [-1, -1], [1, -1]] iris = datasets.load_iris() X = iris.data y = iris.target From 8d7396c4b1f9c04060e02d745cab5119bc01b5a0 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Sun, 6 Aug 2017 11:50:24 +0800 Subject: [PATCH 71/86] DOC Correct what's new for #9108 (#9501) --- doc/whats_new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 0731a359ceb69..5bf0fc258c6b4 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -498,6 +498,9 @@ Decomposition, manifold learning and clustering :class:`decomposition.IncrementalPCA`. :issue:`9105` by `Hanmin Qin `_. +- Fixed the implementation of noise_variance_ in :class:`decomposition.PCA`. + :issue:`9108` by `Hanmin Qin `_. + - Fixed a bug where :class:`cluster.DBSCAN` gives incorrect result when input is a precomputed sparse matrix with initial rows all zero. :issue:`8306` by :user:`Akshay Gupta ` From 5fef319df1e087987da310ac372b623281ada325 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Sat, 5 Aug 2017 23:28:08 -0400 Subject: [PATCH 72/86] DOC Fixup of linear svm separating hyperplane plot (#9471) * change data, don't regularize, call plt.show --- examples/svm/plot_separating_hyperplane.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/svm/plot_separating_hyperplane.py b/examples/svm/plot_separating_hyperplane.py index fafadb2d381d0..9fdbcc785ed2b 100644 --- a/examples/svm/plot_separating_hyperplane.py +++ b/examples/svm/plot_separating_hyperplane.py @@ -16,10 +16,10 @@ # we create 40 separable points -X, y = make_blobs(n_samples=40, centers=2, random_state=12, cluster_std=0.35) +X, y = make_blobs(n_samples=40, centers=2, random_state=6) -# fit the model -clf = svm.SVC(kernel='linear') +# fit the model, don't regularize for illustration purposes +clf = svm.SVC(kernel='linear', C=1000) clf.fit(X, y) plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired) @@ -42,3 +42,4 @@ # plot support vectors ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100, linewidth=1, facecolors='none') +plt.show() From 11c6243b5ae8f760353f53e45c0570229e4f6c25 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Sun, 6 Aug 2017 10:24:32 +0800 Subject: [PATCH 73/86] FIX Incorrent implementation of noise_variance_ in PCA._fit_truncated (#9108) --- doc/whats_new.rst | 3 ++ sklearn/decomposition/pca.py | 9 ++++- sklearn/decomposition/tests/test_pca.py | 44 +++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 1 deletion(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 5bf0fc258c6b4..e05cd2f6a351c 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -238,6 +238,9 @@ Decomposition, manifold learning and clustering ``singular_values_``, like in :class:`decomposition.IncrementalPCA`. :issue:`7685` by :user:`Tommy Löfstedt ` +- Fixed the implementation of noise_variance_ in :class:`decomposition.PCA`. + :issue:`9108` by `Hanmin Qin `_. + - :class:`decomposition.NMF` now faster when ``beta_loss=0``. :issue:`9277` by :user:`hongkahjun`. diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py index de447f1edd6aa..c0f1eb77b5f56 100644 --- a/sklearn/decomposition/pca.py +++ b/sklearn/decomposition/pca.py @@ -201,6 +201,9 @@ class PCA(_BasePCA): explained_variance_ : array, shape (n_components,) The amount of variance explained by each of the selected components. + Equal to n_components largest eigenvalues + of the covariance matrix of X. + .. versionadded:: 0.18 explained_variance_ratio_ : array, shape (n_components,) @@ -232,6 +235,9 @@ class PCA(_BasePCA): http://www.miketipping.com/papers/met-mppca.pdf. It is required to computed the estimated data covariance and score samples. + Equal to the average of (min(n_features, n_samples) - n_components) + smallest eigenvalues of the covariance matrix of X. + References ---------- For n_components == 'mle', this class uses the method of `Thomas P. Minka: @@ -494,9 +500,10 @@ def _fit_truncated(self, X, n_components, svd_solver): self.explained_variance_ratio_ = \ self.explained_variance_ / total_var.sum() self.singular_values_ = S.copy() # Store the singular values. - if self.n_components_ < n_features: + if self.n_components_ < min(n_features, n_samples): self.noise_variance_ = (total_var.sum() - self.explained_variance_.sum()) + self.noise_variance_ /= min(n_features, n_samples) - n_components else: self.noise_variance_ = 0. diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py index 34b63c0674335..6795013b0790a 100644 --- a/sklearn/decomposition/tests/test_pca.py +++ b/sklearn/decomposition/tests/test_pca.py @@ -529,6 +529,50 @@ def test_pca_score3(): assert_true(ll.argmax() == 1) +def test_pca_score_with_different_solvers(): + digits = datasets.load_digits() + X_digits = digits.data + + pca_dict = {svd_solver: PCA(n_components=30, svd_solver=svd_solver, + random_state=0) + for svd_solver in solver_list} + + for pca in pca_dict.values(): + pca.fit(X_digits) + # Sanity check for the noise_variance_. For more details see + # https://github.com/scikit-learn/scikit-learn/issues/7568 + # https://github.com/scikit-learn/scikit-learn/issues/8541 + # https://github.com/scikit-learn/scikit-learn/issues/8544 + assert np.all((pca.explained_variance_ - pca.noise_variance_) >= 0) + + # Compare scores with different svd_solvers + score_dict = {svd_solver: pca.score(X_digits) + for svd_solver, pca in pca_dict.items()} + assert_almost_equal(score_dict['full'], score_dict['arpack']) + assert_almost_equal(score_dict['full'], score_dict['randomized'], + decimal=3) + + +def test_pca_zero_noise_variance_edge_cases(): + # ensure that noise_variance_ is 0 in edge cases + # when n_components == min(n_samples, n_features) + n, p = 100, 3 + + rng = np.random.RandomState(0) + X = rng.randn(n, p) * .1 + np.array([3, 4, 5]) + # arpack raises ValueError for n_components == min(n_samples, + # n_features) + svd_solvers = ['full', 'randomized'] + + for svd_solver in svd_solvers: + pca = PCA(svd_solver=svd_solver, n_components=p) + pca.fit(X) + assert pca.noise_variance_ == 0 + + pca.fit(X.T) + assert pca.noise_variance_ == 0 + + def test_svd_solver_auto(): rng = np.random.RandomState(0) X = rng.uniform(size=(1000, 50)) From 02c496e696410b4d01c6acc4342ca31d859d190a Mon Sep 17 00:00:00 2001 From: jschendel Date: Sat, 5 Aug 2017 17:35:28 -0600 Subject: [PATCH 74/86] FIX Pass sample_weight as kwargs in VotingClassifier (#9493) --- sklearn/ensemble/tests/test_voting_classifier.py | 15 +++++++++++++++ sklearn/ensemble/voting_classifier.py | 6 +++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/sklearn/ensemble/tests/test_voting_classifier.py b/sklearn/ensemble/tests/test_voting_classifier.py index 4765d0e32d0bb..023be79912d12 100644 --- a/sklearn/ensemble/tests/test_voting_classifier.py +++ b/sklearn/ensemble/tests/test_voting_classifier.py @@ -17,6 +17,7 @@ from sklearn.svm import SVC from sklearn.multiclass import OneVsRestClassifier from sklearn.neighbors import KNeighborsClassifier +from sklearn.base import BaseEstimator, ClassifierMixin # Load the iris dataset and randomly permute it @@ -274,6 +275,20 @@ def test_sample_weight(): assert_raise_message(ValueError, msg, eclf3.fit, X, y, sample_weight) +def test_sample_weight_kwargs(): + """Check that VotingClassifier passes sample_weight as kwargs""" + class MockClassifier(BaseEstimator, ClassifierMixin): + """Mock Classifier to check that sample_weight is received as kwargs""" + def fit(self, X, y, *args, **sample_weight): + assert_true('sample_weight' in sample_weight) + + clf = MockClassifier() + eclf = VotingClassifier(estimators=[('mock', clf)], voting='soft') + + # Should not raise an error. + eclf.fit(X, y, sample_weight=np.ones((len(y),))) + + def test_set_params(): """set_params should be able to set estimators""" clf1 = LogisticRegression(random_state=123, C=1.0) diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py index 88b329d836978..ad6c0125dd664 100644 --- a/sklearn/ensemble/voting_classifier.py +++ b/sklearn/ensemble/voting_classifier.py @@ -23,10 +23,10 @@ from ..utils.metaestimators import _BaseComposition -def _parallel_fit_estimator(estimator, X, y, sample_weight): +def _parallel_fit_estimator(estimator, X, y, sample_weight=None): """Private function used to fit an estimator within a job.""" if sample_weight is not None: - estimator.fit(X, y, sample_weight) + estimator.fit(X, y, sample_weight=sample_weight) else: estimator.fit(X, y) return estimator @@ -185,7 +185,7 @@ def fit(self, X, y, sample_weight=None): self.estimators_ = Parallel(n_jobs=self.n_jobs)( delayed(_parallel_fit_estimator)(clone(clf), X, transformed_y, - sample_weight) + sample_weight=sample_weight) for clf in clfs if clf is not None) return self From 6ff8790061dac5c041cfb3c54fd175dd256b7f3c Mon Sep 17 00:00:00 2001 From: Julian Kuhlmann Date: Fri, 4 Aug 2017 13:00:48 -0700 Subject: [PATCH 75/86] Bring last code block in line with the image. (#9488) Code from http://scikit-learn.org/stable/auto_examples/decomposition/plot_ica_blind_source_separation.html. --- .../statistical_inference/unsupervised_learning.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/tutorial/statistical_inference/unsupervised_learning.rst b/doc/tutorial/statistical_inference/unsupervised_learning.rst index be32fabd96cb8..afe51320414c6 100644 --- a/doc/tutorial/statistical_inference/unsupervised_learning.rst +++ b/doc/tutorial/statistical_inference/unsupervised_learning.rst @@ -305,14 +305,17 @@ a maximum amount of independent information. It is able to recover :: >>> # Generate sample data + >>> import numpy as np + >>> from scipy import signal >>> time = np.linspace(0, 10, 2000) >>> s1 = np.sin(2 * time) # Signal 1 : sinusoidal signal >>> s2 = np.sign(np.sin(3 * time)) # Signal 2 : square signal - >>> S = np.c_[s1, s2] + >>> s3 = signal.sawtooth(2 * np.pi * time) # Signal 3: saw tooth signal + >>> S = np.c_[s1, s2, s3] >>> S += 0.2 * np.random.normal(size=S.shape) # Add noise >>> S /= S.std(axis=0) # Standardize data >>> # Mix data - >>> A = np.array([[1, 1], [0.5, 2]]) # Mixing matrix + >>> A = np.array([[1, 1, 1], [0.5, 2, 1], [1.5, 1, 2]]) # Mixing matrix >>> X = np.dot(S, A.T) # Generate observations >>> # Compute ICA From ac853921c6b801de52877344711a3ba62df2c651 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sat, 5 Aug 2017 02:26:54 +1000 Subject: [PATCH 76/86] [MRG+1] FIX Add missing mixins to ClassifierChain (#9473) * Add missing mixins to ClassifierChain * Fix import in test --- sklearn/multioutput.py | 2 +- sklearn/tests/test_multioutput.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index d350b1bd6dc26..688507da01fe3 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -368,7 +368,7 @@ def score(self, X, y): return np.mean(np.all(y == y_pred, axis=1)) -class ClassifierChain(BaseEstimator): +class ClassifierChain(BaseEstimator, ClassifierMixin, MetaEstimatorMixin): """A multi-label model that arranges binary classifiers into a chain. Each model makes a prediction in the order specified by the chain using diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 0c58d04c27581..5d5de53bbde6c 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -29,6 +29,7 @@ from sklearn.multioutput import MultiOutputClassifier from sklearn.multioutput import MultiOutputRegressor from sklearn.svm import LinearSVC +from sklearn.base import ClassifierMixin from sklearn.utils import shuffle @@ -380,6 +381,8 @@ def test_classifier_chain_fit_and_predict_with_logistic_regression(): assert_equal([c.coef_.size for c in classifier_chain.estimators_], list(range(X.shape[1], X.shape[1] + Y.shape[1]))) + assert isinstance(classifier_chain, ClassifierMixin) + def test_classifier_chain_fit_and_predict_with_linear_svc(): # Fit classifier chain and verify predict performance using LinearSVC From b01e20b00362a48aeda086d2684b8aa816d2f75d Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 2 Aug 2017 17:39:46 -0400 Subject: [PATCH 77/86] fix wrong assert in test_validation (#9480) --- sklearn/model_selection/tests/test_validation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index dedb77026c544..5f650cb644079 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -452,8 +452,8 @@ def check_cross_validate_multi_metric(clf, X, y, scores): assert type(cv_results['test_r2']) == np.ndarray assert (type(cv_results['test_neg_mean_squared_error']) == np.ndarray) - assert type(cv_results['fit_time'] == np.ndarray) - assert type(cv_results['score_time'] == np.ndarray) + assert type(cv_results['fit_time']) == np.ndarray + assert type(cv_results['score_time']) == np.ndarray # Ensure all the times are within sane limits assert np.all(cv_results['fit_time'] >= 0) From fd2e0f7932aa1e8ce92c5e59391b41a13690b38c Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Tue, 8 Aug 2017 16:02:15 +0800 Subject: [PATCH 78/86] [MRG+1] add scorer based on explained_variance_score (#9259) --- doc/modules/model_evaluation.rst | 3 ++- doc/whats_new.rst | 3 +++ sklearn/metrics/scorer.py | 7 +++++-- sklearn/metrics/tests/test_score_objects.py | 8 ++++---- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 125a48d46b713..4800569556758 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -81,6 +81,7 @@ Scoring Function 'v_measure_score' :func:`metrics.v_measure_score` **Regression** +'explained_variance' :func:`metrics.explained_variance_score` 'neg_mean_absolute_error' :func:`metrics.mean_absolute_error` 'neg_mean_squared_error' :func:`metrics.mean_squared_error` 'neg_mean_squared_log_error' :func:`metrics.mean_squared_log_error` @@ -101,7 +102,7 @@ Usage examples: >>> model = svm.SVC() >>> cross_val_score(model, X, y, scoring='wrong_choice') Traceback (most recent call last): - ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'completeness_score', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score'] + ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'completeness_score', 'explained_variance', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score'] .. note:: diff --git a/doc/whats_new.rst b/doc/whats_new.rst index e05cd2f6a351c..627034d8a0b86 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -139,6 +139,9 @@ Model selection and evaluation :class:`model_selection.RepeatedStratifiedKFold`. :issue:`8120` by `Neeraj Gangwar`_. +- Added a scorer based on :class:`metrics.explained_variance_score`. + :issue:`9259` by `Hanmin Qin `_. + Miscellaneous - Validation that input data contains no NaN or inf can now be suppressed diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index f13068d477b09..b1f01c1a18e1b 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -26,7 +26,8 @@ from . import (r2_score, median_absolute_error, mean_absolute_error, mean_squared_error, mean_squared_log_error, accuracy_score, f1_score, roc_auc_score, average_precision_score, - precision_score, recall_score, log_loss) + precision_score, recall_score, log_loss, + explained_variance_score) from .cluster import adjusted_rand_score from .cluster import homogeneity_score @@ -463,6 +464,7 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, # Standard regression scores +explained_variance_scorer = make_scorer(explained_variance_score) r2_scorer = make_scorer(r2_score) neg_mean_squared_error_scorer = make_scorer(mean_squared_error, greater_is_better=False) @@ -525,7 +527,8 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, fowlkes_mallows_scorer = make_scorer(fowlkes_mallows_score) -SCORERS = dict(r2=r2_scorer, +SCORERS = dict(explained_variance=explained_variance_scorer, + r2=r2_scorer, neg_median_absolute_error=neg_median_absolute_error_scorer, neg_mean_absolute_error=neg_mean_absolute_error_scorer, neg_mean_squared_error=neg_mean_squared_error_scorer, diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 47c4d334f893a..fc5ba91401eab 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -29,7 +29,6 @@ from sklearn.svm import LinearSVC from sklearn.pipeline import make_pipeline from sklearn.cluster import KMeans -from sklearn.dummy import DummyRegressor from sklearn.linear_model import Ridge, LogisticRegression from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.datasets import make_blobs @@ -42,8 +41,9 @@ from sklearn.externals import joblib -REGRESSION_SCORERS = ['r2', 'neg_mean_absolute_error', - 'neg_mean_squared_error', 'neg_mean_squared_log_error', +REGRESSION_SCORERS = ['explained_variance', 'r2', + 'neg_mean_absolute_error', 'neg_mean_squared_error', + 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'mean_absolute_error', 'mean_squared_error', 'median_absolute_error'] @@ -68,7 +68,7 @@ def _make_estimators(X_train, y_train, y_ml_train): # Make estimators that make sense to test various scoring methods - sensible_regr = DummyRegressor(strategy='median') + sensible_regr = DecisionTreeRegressor(random_state=0) sensible_regr.fit(X_train, y_train) sensible_clf = DecisionTreeClassifier(random_state=0) sensible_clf.fit(X_train, y_train) From fa794eaa3152422a0198f0e808effc5f6d410789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Tue, 8 Aug 2017 10:21:09 +0200 Subject: [PATCH 79/86] Fix safe_indexing with read-only indices (#9507) --- sklearn/utils/__init__.py | 2 ++ sklearn/utils/tests/test_utils.py | 13 +++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 332e856c641db..4b2665cdd4f77 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -142,6 +142,8 @@ def safe_indexing(X, indices): not supported. """ if hasattr(X, "iloc"): + # Work-around for indexing with read-only indices in pandas + indices = indices if indices.flags.writeable else indices.copy() # Pandas Dataframes and Series try: return X.iloc[indices] diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py index c0fd079a932fb..fa93bf34fe6bc 100644 --- a/sklearn/utils/tests/test_utils.py +++ b/sklearn/utils/tests/test_utils.py @@ -1,4 +1,4 @@ -from itertools import chain +from itertools import chain, product import warnings import numpy as np @@ -200,10 +200,15 @@ def test_safe_indexing_pandas(): # this happens in joblib memmapping X.setflags(write=False) X_df_readonly = pd.DataFrame(X) - with warnings.catch_warnings(record=True): - X_df_ro_indexed = safe_indexing(X_df_readonly, inds) + inds_readonly = inds.copy() + inds_readonly.setflags(write=False) - assert_array_equal(np.array(X_df_ro_indexed), X_indexed) + for this_df, this_inds in product([X_df, X_df_readonly], + [inds, inds_readonly]): + with warnings.catch_warnings(record=True): + X_df_indexed = safe_indexing(this_df, this_inds) + + assert_array_equal(np.array(X_df_indexed), X_indexed) def test_safe_indexing_mock_pandas(): From 6c6d6a268b3e54d7f6a86a5cb2f096dd528d5672 Mon Sep 17 00:00:00 2001 From: Minghui Liu Date: Tue, 8 Aug 2017 05:36:03 -0700 Subject: [PATCH 80/86] Use base.is_classifier instead instead of isinstance (#9482) --- sklearn/ensemble/weight_boosting.py | 4 ++-- sklearn/multioutput.py | 4 ++-- sklearn/neural_network/multilayer_perceptron.py | 5 +++-- sklearn/tree/tests/test_export.py | 4 ++-- sklearn/tree/tree.py | 5 +++-- sklearn/utils/estimator_checks.py | 10 +++++----- 6 files changed, 17 insertions(+), 15 deletions(-) diff --git a/sklearn/ensemble/weight_boosting.py b/sklearn/ensemble/weight_boosting.py index 3108717d4676e..a53c57d3495e9 100644 --- a/sklearn/ensemble/weight_boosting.py +++ b/sklearn/ensemble/weight_boosting.py @@ -29,7 +29,7 @@ from numpy.core.umath_tests import inner1d from .base import BaseEnsemble -from ..base import ClassifierMixin, RegressorMixin, is_regressor +from ..base import ClassifierMixin, RegressorMixin, is_regressor, is_classifier from ..externals import six from ..externals.six.moves import zip from ..externals.six.moves import xrange as range @@ -231,7 +231,7 @@ def staged_score(self, X, y, sample_weight=None): z : float """ for y_pred in self.staged_predict(X): - if isinstance(self, ClassifierMixin): + if is_classifier(self): yield accuracy_score(y, y_pred, sample_weight=sample_weight) else: yield r2_score(y, y_pred, sample_weight=sample_weight) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 688507da01fe3..6c9fbc55f7863 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -18,7 +18,7 @@ import scipy.sparse as sp from abc import ABCMeta, abstractmethod from .base import BaseEstimator, clone, MetaEstimatorMixin -from .base import RegressorMixin, ClassifierMixin +from .base import RegressorMixin, ClassifierMixin, is_classifier from .model_selection import cross_val_predict from .utils import check_array, check_X_y, check_random_state from .utils.fixes import parallel_helper @@ -152,7 +152,7 @@ def fit(self, X, y, sample_weight=None): multi_output=True, accept_sparse=True) - if isinstance(self, ClassifierMixin): + if is_classifier(self): check_classification_targets(y) if y.ndim == 1: diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index af1eca3b201d5..ae6df22c2fc5a 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -13,6 +13,7 @@ import warnings from ..base import BaseEstimator, ClassifierMixin, RegressorMixin +from ..base import is_classifier from ._base import ACTIVATIONS, DERIVATIVES, LOSS_FUNCTIONS from ._stochastic_optimizers import SGDOptimizer, AdamOptimizer from ..model_selection import train_test_split @@ -268,7 +269,7 @@ def _initialize(self, y, layer_units): self.n_layers_ = len(layer_units) # Output for regression - if not isinstance(self, ClassifierMixin): + if not is_classifier(self): self.out_activation_ = 'identity' # Output for multi class elif self._label_binarizer.y_type_ == 'multiclass': @@ -491,7 +492,7 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads, X, X_val, y, y_val = train_test_split( X, y, random_state=self._random_state, test_size=self.validation_fraction) - if isinstance(self, ClassifierMixin): + if is_classifier(self): y_val = self._label_binarizer.inverse_transform(y_val) else: X_val = None diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py index 0bf70073d34c7..230c1cc23102d 100644 --- a/sklearn/tree/tests/test_export.py +++ b/sklearn/tree/tests/test_export.py @@ -6,7 +6,7 @@ from numpy.random import RandomState -from sklearn.base import ClassifierMixin +from sklearn.base import is_classifier from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.ensemble import GradientBoostingClassifier from sklearn.tree import export_graphviz @@ -292,7 +292,7 @@ def test_precision(): len(search("\.\d+", finding.group()).group()), precision + 1) # check impurity - if isinstance(clf, ClassifierMixin): + if is_classifier(clf): pattern = "gini = \d+\.\d+" else: pattern = "friedman_mse = \d+\.\d+" diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py index 099f3da39a45b..789ffb8b61cac 100644 --- a/sklearn/tree/tree.py +++ b/sklearn/tree/tree.py @@ -29,6 +29,7 @@ from ..base import BaseEstimator from ..base import ClassifierMixin from ..base import RegressorMixin +from ..base import is_classifier from ..externals import six from ..utils import check_array from ..utils import check_random_state @@ -123,7 +124,7 @@ def fit(self, X, y, sample_weight=None, check_input=True, # Determine output settings n_samples, self.n_features_ = X.shape - is_classification = isinstance(self, ClassifierMixin) + is_classification = is_classifier(self) y = np.atleast_1d(y) expanded_class_weight = None @@ -413,7 +414,7 @@ def predict(self, X, check_input=True): n_samples = X.shape[0] # Classification - if isinstance(self, ClassifierMixin): + if is_classifier(self): if self.n_outputs_ == 1: return self.classes_.take(np.argmax(proba, axis=1), axis=0) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 4760253a5a43e..ba83535988fad 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -35,8 +35,8 @@ from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -from sklearn.base import (clone, ClassifierMixin, RegressorMixin, - TransformerMixin, ClusterMixin, BaseEstimator) +from sklearn.base import (clone, TransformerMixin, ClusterMixin, + BaseEstimator, is_classifier, is_regressor) from sklearn.metrics import accuracy_score, adjusted_rand_score, f1_score from sklearn.random_projection import BaseRandomProjection @@ -208,10 +208,10 @@ def _yield_clustering_checks(name, clusterer): def _yield_all_checks(name, estimator): for check in _yield_non_meta_checks(name, estimator): yield check - if isinstance(estimator, ClassifierMixin): + if is_classifier(estimator): for check in _yield_classifier_checks(name, estimator): yield check - if isinstance(estimator, RegressorMixin): + if is_regressor(estimator): for check in _yield_regressor_checks(name, estimator): yield check if isinstance(estimator, TransformerMixin): @@ -980,7 +980,7 @@ def check_estimators_partial_fit_n_features(name, estimator_orig): X -= X.min() try: - if isinstance(estimator, ClassifierMixin): + if is_classifier(estimator): classes = np.unique(y) estimator.partial_fit(X, y, classes=classes) else: From 2cc156fecd63c022645592cef8fac83ab0f4f653 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Tue, 8 Aug 2017 23:09:28 +1000 Subject: [PATCH 81/86] Update what's new for recent changes --- doc/whats_new.rst | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 627034d8a0b86..fe7d33135c935 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -139,9 +139,6 @@ Model selection and evaluation :class:`model_selection.RepeatedStratifiedKFold`. :issue:`8120` by `Neeraj Gangwar`_. -- Added a scorer based on :class:`metrics.explained_variance_score`. - :issue:`9259` by `Hanmin Qin `_. - Miscellaneous - Validation that input data contains no NaN or inf can now be suppressed @@ -334,6 +331,9 @@ Model evaluation and meta-estimators - More clustering metrics are now available through :func:`metrics.get_scorer` and ``scoring`` parameters. :issue:`8117` by `Raghav RV`_. +- A scorer based on :func:`metrics.explained_variance_score` is also available. + :issue:`9259` by `Hanmin Qin `_. + Metrics - :func:`metrics.matthews_corrcoef` now support multiclass classification. @@ -606,6 +606,9 @@ Model evaluation and meta-estimators raised on trying to stack matrices with different dimensions. :issue:`8093` by :user:`Peter Bull `. +- Cross validation now works with Pandas datatypes that that have a + read-only index. :issue:`9507` by `Loic Esteve`_. + Metrics - :func:`metrics.average_precision_score` no longer linearly From 0d6740dcbd46ff4707dfb85ccefaf6e7a8a73ef5 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Tue, 8 Aug 2017 23:10:15 +1000 Subject: [PATCH 82/86] DOC Remove some whitespace from what's new --- doc/whats_new.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index fe7d33135c935..e90c2fff4d6dd 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -172,7 +172,7 @@ Trees and ensembles - :func:`tree.export_graphviz` now shows configurable number of decimal places. :issue:`8698` by :user:`Guillaume Lemaitre `. - + - Added ``flatten_transform`` parameter to :class:`ensemble.VotingClassifier` to change output shape of `transform` method to 2 dimensional. :issue:`7794` by :user:`Ibraim Ganiev ` and @@ -332,7 +332,7 @@ Model evaluation and meta-estimators and ``scoring`` parameters. :issue:`8117` by `Raghav RV`_. - A scorer based on :func:`metrics.explained_variance_score` is also available. - :issue:`9259` by `Hanmin Qin `_. + :issue:`9259` by `Hanmin Qin `_. Metrics @@ -502,7 +502,7 @@ Decomposition, manifold learning and clustering in :class:`decomposition.PCA`, :class:`decomposition.RandomizedPCA` and :class:`decomposition.IncrementalPCA`. - :issue:`9105` by `Hanmin Qin `_. + :issue:`9105` by `Hanmin Qin `_. - Fixed the implementation of noise_variance_ in :class:`decomposition.PCA`. :issue:`9108` by `Hanmin Qin `_. From 8d36abfae2abdfb2c03b526f54b0781a565dcbff Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Tue, 8 Aug 2017 23:10:50 +1000 Subject: [PATCH 83/86] DOC Change release date to Thursday --- doc/whats_new.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index e90c2fff4d6dd..29c9f9ab048d6 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -7,8 +7,7 @@ Release history Version 0.19 ============ -**August 7, 2017** -**Release Candidate (0.19b2) July 17, 2017** +**August 10, 2017** Highlights ---------- From e4274b59e8117088d22f6674a1fc9b0ece8e257b Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Wed, 9 Aug 2017 09:22:05 +1000 Subject: [PATCH 84/86] DOC list of contributors to 0.19 --- doc/whats_new.rst | 68 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 29c9f9ab048d6..1c4410b46c35b 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -856,6 +856,74 @@ Miscellaneous :issue:`7464` by `Lars Buitinck`_ and `Loic Esteve`_. +Code and Documentation Contributors +----------------------------------- + +Thanks to everyone who has contributed to the maintenance and improvement of the +project since version 0.18, including: + +Joel Nothman, Loic Esteve, Andreas Mueller, Guillaume Lemaitre, Olivier Grisel, +Hanmin Qin, Raghav RV, Alexandre Gramfort, themrmax, Aman Dalmia, Gael +Varoquaux, Naoya Kanai, Tom Dupré la Tour, Rishikesh, Nelson Liu, Taehoon Lee, +Nelle Varoquaux, Aashil, Mikhail Korobov, Sebastin Santy, Joan Massich, Roman +Yurchak, RAKOTOARISON Herilalaina, Thierry Guillemot, Alexandre Abadie, Carol +Willing, Balakumaran Manoharan, Josh Karnofsky, Vlad Niculae, Utkarsh Upadhyay, +Dmitry Petrov, Minghui Liu, Srivatsan, Vincent Pham, Albert Thomas, Jake +VanderPlas, Attractadore, JC Liu, alexandercbooth, chkoar, Óscar Nájera, +Aarshay Jain, Kyle Gilliam, Ramana Subramanyam, CJ Carey, Clement Joudet, David +Robles, He Chen, Joris Van den Bossche, Karan Desai, Katie Luangkote, Leland +McInnes, Maniteja Nandana, Michele Lacchia, Sergei Lebedev, Shubham Bhardwaj, +akshay0724, omtcyfz, rickiepark, waterponey, Vathsala Achar, jbDelafosse, Ralf +Gommers, Ekaterina Krivich, Vivek Kumar, Ishank Gulati, Dave Elliott, ldirer, +Reiichiro Nakano, Levi John Wolf, Mathieu Blondel, Sid Kapur, Dougal J. +Sutherland, midinas, mikebenfield, Sourav Singh, Aseem Bansal, Ibraim Ganiev, +Stephen Hoover, AishwaryaRK, Steven C. Howell, Gary Foreman, Neeraj Gangwar, +Tahar, Jon Crall, dokato, Kathy Chen, ferria, Thomas Moreau, Charlie Brummitt, +Nicolas Goix, Adam Kleczewski, Sam Shleifer, Nikita Singh, Basil Beirouti, +Giorgio Patrini, Manoj Kumar, Rafael Possas, James Bourbeau, James A. Bednar, +Janine Harper, Jaye, Jean Helie, Jeremy Steward, Artsiom, John Wei, Jonathan +LIgo, Jonathan Rahn, seanpwilliams, Arthur Mensch, Josh Levy, Julian Kuhlmann, +Julien Aubert, Jörn Hees, Kai, shivamgargsya, Kat Hempstalk, Kaushik +Lakshmikanth, Kennedy, Kenneth Lyons, Kenneth Myers, Kevin Yap, Kirill Bobyrev, +Konstantin Podshumok, Arthur Imbert, Lee Murray, toastedcornflakes, Lera, Li +Li, Arthur Douillard, Mainak Jas, tobycheese, Manraj Singh, Manvendra Singh, +Marc Meketon, MarcoFalke, Matthew Brett, Matthias Gilch, Mehul Ahuja, Melanie +Goetz, Meng, Peng, Michael Dezube, Michal Baumgartner, vibrantabhi19, Artem +Golubin, Milen Paskov, Antonin Carette, Morikko, MrMjauh, NALEPA Emmanuel, +Namiya, Antoine Wendlinger, Narine Kokhlikyan, NarineK, Nate Guerin, Angus +Williams, Ang Lu, Nicole Vavrova, Nitish Pandey, Okhlopkov Daniil Olegovich, +Andy Craze, Om Prakash, Parminder Singh, Patrick Carlson, Patrick Pei, Paul +Ganssle, Paulo Haddad, Paweł Lorek, Peng Yu, Pete Bachant, Peter Bull, Peter +Csizsek, Peter Wang, Pieter Arthur de Jong, Ping-Yao, Chang, Preston Parry, +Puneet Mathur, Quentin Hibon, Andrew Smith, Andrew Jackson, 1kastner, Rameshwar +Bhaskaran, Rebecca Bilbro, Remi Rampin, Andrea Esuli, Rob Hall, Robert +Bradshaw, Romain Brault, Aman Pratik, Ruifeng Zheng, Russell Smith, Sachin +Agarwal, Sailesh Choyal, Samson Tan, Samuël Weber, Sarah Brown, Sebastian +Pölsterl, Sebastian Raschka, Sebastian Saeger, Alyssa Batula, Abhyuday Pratap +Singh, Sergey Feldman, Sergul Aydore, Sharan Yalburgi, willduan, Siddharth +Gupta, Sri Krishna, Almer, Stijn Tonk, Allen Riddell, Theofilos Papapanagiotou, +Alison, Alexis Mignon, Tommy Boucher, Tommy Löfstedt, Toshihiro Kamishima, +Tyler Folkman, Tyler Lanigan, Alexander Junge, Varun Shenoy, Victor Poughon, +Vilhelm von Ehrenheim, Aleksandr Sandrovskii, Alan Yee, Vlasios Vasileiou, +Warut Vijitbenjaronk, Yang Zhang, Yaroslav Halchenko, Yichuan Liu, Yuichi +Fujikawa, affanv14, aivision2020, xor, andreh7, brady salz, campustrampus, +Agamemnon Krasoulis, ditenberg, elena-sharova, filipj8, fukatani, gedeck, +guiniol, guoci, hakaa1, hongkahjun, i-am-xhy, jakirkham, jaroslaw-weber, +jayzed82, jeroko, jmontoyam, jonathan.striebel, josephsalmon, jschendel, +leereeves, martin-hahn, mathurinm, mehak-sachdeva, mlewis1729, mlliou112, +mthorrell, ndingwall, nuffe, yangarbiter, plagree, pldtc325, Breno Freitas, +Brett Olsen, Brian A. Alfano, Brian Burns, polmauri, Brandon Carter, Charlton +Austin, Chayant T15h, Chinmaya Pancholi, Christian Danielsen, Chung Yen, +Chyi-Kwei Yau, pravarmahajan, DOHMATOB Elvis, Daniel LeJeune, Daniel Hnyk, +Darius Morawiec, David DeTomaso, David Gasquez, David Haberthür, David +Heryanto, David Kirkby, David Nicholson, rashchedrin, Deborah Gertrude Digges, +Denis Engemann, Devansh D, Dickson, Bob Baxley, Don86, E. Lynch-Klarup, Ed +Rogers, Elizabeth Ferriss, Ellen-Co2, Fabian Egli, Fang-Chieh Chou, Bing Tian +Dai, Greg Stupp, Grzegorz Szpak, Bertrand Thirion, Hadrien Bertrand, Harizo +Rajaona, zxcvbnius, Henry Lin, Holger Peters, Icyblade Dai, Igor +Andriushchenko, Ilya, Isaac Laughlin, Iván Vallés, Aurélien Bellet, JPFrancoia, +Jacob Schreiber, Asish Mahapatra + .. _changes_0_18_2: Version 0.18.2 From 740d92dbe6c3a8b3114bf1aedbee97294700a87c Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Wed, 9 Aug 2017 09:06:11 +1000 Subject: [PATCH 85/86] DOC Update news and menu for 0.19 release --- doc/index.rst | 6 ++++-- doc/themes/scikit-learn/layout.html | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/index.rst b/doc/index.rst index e835de46a660e..c49e5cf2db3cd 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -207,14 +207,16 @@
  • On-going development: What's new (Changelog)
  • +
  • July 2017. scikit-learn 0.19.0 is available for download (Changelog). +
  • +
  • June 2017. scikit-learn 0.18.2 is available for download (Changelog). +
  • September 2016. scikit-learn 0.18.0 is available for download (Changelog).
  • November 2015. scikit-learn 0.17.0 is available for download (Changelog).
  • March 2015. scikit-learn 0.16.0 is available for download (Changelog).
  • -
  • July 2014. scikit-learn 0.15.0 is available for download (Changelog). -
  • July 14-20th, 2014: international sprint. During this week-long sprint, we gathered 18 of the core contributors in Paris. diff --git a/doc/themes/scikit-learn/layout.html b/doc/themes/scikit-learn/layout.html index d659b9ce86179..9a2691c6b1fbb 100644 --- a/doc/themes/scikit-learn/layout.html +++ b/doc/themes/scikit-learn/layout.html @@ -85,9 +85,9 @@
  • FAQ
  • Contributing
  • -
  • Scikit-learn 0.18 (stable)
  • +
  • Scikit-learn 0.19 (stable)
  • +
  • Scikit-learn 0.18
  • Scikit-learn 0.17
  • -
  • Scikit-learn 0.16
  • PDF documentation
  • From f2d66b80e675392786e1081bea33ab6f3fd3eebb Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Thu, 10 Aug 2017 14:47:53 +1000 Subject: [PATCH 86/86] DOC set release date to Friday --- doc/whats_new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 1c4410b46c35b..88d451c77c111 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -7,7 +7,7 @@ Release history Version 0.19 ============ -**August 10, 2017** +**August 12, 2017** Highlights ----------