diff --git a/doc/about.rst b/doc/about.rst index c4208efdc247a..d85e2cef387d3 100644 --- a/doc/about.rst +++ b/doc/about.rst @@ -67,7 +67,7 @@ Funding `INRIA `_ actively supports this project. It has provided funding for Fabian Pedregosa (2010-2012), Jaques Grobler -(2012-2013) and Olivier Grisel (2013-2015) to work on this project +(2012-2013) and Olivier Grisel (2013-2017) to work on this project full-time. It also hosts coding sprints and other events. .. image:: images/inria-logo.jpg @@ -77,7 +77,7 @@ full-time. It also hosts coding sprints and other events. `Paris-Saclay Center for Data Science `_ funded one year for a developer to work on the project full-time -(2014-2015). +(2014-2015) and 50% of the time of Guillaume Lemaitre (2016-2017). .. image:: images/cds-logo.png :width: 200pt @@ -94,9 +94,9 @@ Environment also funds several students to work on the project part-time. :target: http://cds.nyu.edu/mooresloan/ -`Télécom Paristech `_ funds Manoj Kumar (2014), -Tom Dupré la Tour (2015), Raghav RV (2015-2016) and Thierry Guillemot (2016) to -work on scikit-learn. +`Télécom Paristech `_ funded Manoj Kumar (2014), +Tom Dupré la Tour (2015), Raghav RV (2015-2017), Thierry Guillemot (2016-2017) +and Albert Thomas (2017) to work on scikit-learn. .. image:: themes/scikit-learn/static/img/telecom.png :width: 100pt @@ -104,13 +104,27 @@ work on scikit-learn. :target: http://www.telecom-paristech.fr/ -`Columbia University `_ funds Andreas Mueller since 2016. +`Columbia University `_ funds Andreas Müller since 2016. .. image:: themes/scikit-learn/static/img/columbia.png :width: 100pt :align: center :target: http://www.columbia.edu/ +Andreas Müller also received a grant to improve scikit-learn from the `Alfred P. Sloan Foundation `_ in 2017. + +.. image:: images/sloan_banner.png + :width: 200pt + :align: center + :target: https://sloan.org/ + +`The University of Sydney `_ funds Joel Nothman since July 2017. + +.. image:: themes/scikit-learn/static/img/sydney-primary.jpeg + :width: 200pt + :align: center + :target: http://www.sydney.edu.au/ + The following students were sponsored by `Google `_ to work on scikit-learn through the `Google Summer of Code `_ diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst index 8168434e697e8..f91163fc235c5 100644 --- a/doc/datasets/index.rst +++ b/doc/datasets/index.rst @@ -252,7 +252,7 @@ features:: .. topic:: Related links: - _`Public datasets in svmlight / libsvm format`: http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/ + _`Public datasets in svmlight / libsvm format`: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets _`Faster API-compatible implementation`: https://github.com/mblondel/svmlight-loader @@ -268,15 +268,15 @@ DataFrame are also acceptable. Here are some recommended ways to load standard columnar data into a format usable by scikit-learn: -* `pandas.io `_ +* `pandas.io `_ provides tools to read data from common formats including CSV, Excel, JSON and SQL. DataFrames may also be constructed from lists of tuples or dicts. Pandas handles heterogeneous data smoothly and provides tools for manipulation and conversion into a numeric array suitable for scikit-learn. -* `scipy.io `_ +* `scipy.io `_ specializes in binary formats often used in scientific computing context such as .mat and .arff -* `numpy/routines.io `_ +* `numpy/routines.io `_ for standard loading of columnar data into numpy arrays * scikit-learn's :func:`datasets.load_svmlight_file` for the svmlight or libSVM sparse format @@ -288,14 +288,14 @@ For some miscellaneous data such as images, videos, and audio, you may wish to refer to: * `skimage.io `_ or - `Imageio `_ + `Imageio `_ for loading images and videos to numpy arrays -* `scipy.misc.imread `_ (requires the `Pillow `_ package) to load pixel intensities data from various image file formats * `scipy.io.wavfile.read - `_ + `_ for reading WAV files into a numpy array Categorical (or nominal) features stored as strings (common in pandas DataFrames) diff --git a/doc/developers/debugging.rst b/doc/developers/debugging.rst deleted file mode 100644 index f3e28110f1da8..0000000000000 --- a/doc/developers/debugging.rst +++ /dev/null @@ -1,51 +0,0 @@ -.. _developers-debugging: - -============================== -Developers' Tips for Debugging -============================== - -Memory errors: debugging Cython with valgrind -============================================= - -While python/numpy's built-in memory management is relatively robust, it can -lead to performance penalties for some routines. For this reason, much of -the high-performance code in scikit-learn in written in cython. This -performance gain comes with a tradeoff, however: it is very easy for memory -bugs to crop up in cython code, especially in situations where that code -relies heavily on pointer arithmetic. - -Memory errors can manifest themselves a number of ways. The easiest ones to -debug are often segmentation faults and related glibc errors. Uninitialized -variables can lead to unexpected behavior that is difficult to track down. -A very useful tool when debugging these sorts of errors is -valgrind_. - - -Valgrind is a command-line tool that can trace memory errors in a variety of -code. Follow these steps: - - 1. Install `valgrind`_ on your system. - - 2. Download the python valgrind suppression file: `valgrind-python.supp`_. - - 3. Follow the directions in the `README.valgrind`_ file to customize your - python suppressions. If you don't, you will have spurious output coming - related to the python interpreter instead of your own code. - - 4. Run valgrind as follows:: - - $> valgrind -v --suppressions=valgrind-python.supp python my_test_script.py - -.. _valgrind: http://valgrind.org -.. _`README.valgrind`: http://svn.python.org/projects/python/trunk/Misc/README.valgrind -.. _`valgrind-python.supp`: http://svn.python.org/projects/python/trunk/Misc/valgrind-python.supp - - -The result will be a list of all the memory-related errors, which reference -lines in the C-code generated by cython from your .pyx file. If you examine -the referenced lines in the .c file, you will see comments which indicate the -corresponding location in your .pyx source file. Hopefully the output will -give you clues as to the source of your memory error. - -For more information on valgrind and the array of options it has, see the -tutorials and documentation on the `valgrind web site `_. diff --git a/doc/developers/index.rst b/doc/developers/index.rst index 5ac2d4f202bb6..4463bf50d8b50 100644 --- a/doc/developers/index.rst +++ b/doc/developers/index.rst @@ -10,7 +10,7 @@ Developer's Guide .. toctree:: contributing - debugging + tips utilities performance advanced_installation diff --git a/doc/developers/tips.rst b/doc/developers/tips.rst new file mode 100644 index 0000000000000..bbf46965d379c --- /dev/null +++ b/doc/developers/tips.rst @@ -0,0 +1,119 @@ +.. _developers-tips: + +=========================== +Developers' Tips and Tricks +=========================== + +Productivity and sanity-preserving tips +======================================= + +In this section we gather some useful advice and tools that may increase your +quality-of-life when reviewing pull requests, running unit tests, and so forth. +Some of these tricks consist of userscripts that require a browser extension +such as `TamperMonkey`_ or `GreaseMonkey`_; to set up userscripts you must have +one of these extensions installed, enabled and running. We provide userscripts +as GitHub gists; to install them, click on the "Raw" button on the gist page. + +.. _TamperMonkey: https://tampermonkey.net +.. _GreaseMonkey: http://www.greasespot.net + +Viewing the rendered HTML documentation for a pull request +---------------------------------------------------------- + +We use CircleCI to build the HTML documentation for every pull request. To +access that documentation, we provide a redirect as described in the +:ref:`documentation section of the contributor guide +`. Instead of typing the address by hand, we provide a +`userscript `_ +that adds a button to every PR. After installing the userscript, navigate to any +GitHub PR; a new button labeled "See CircleCI doc for this PR" should appear in +the top-right area. + +Folding and unfolding outdated diffs on pull requests +----------------------------------------------------- + +GitHub hides discussions on PRs when the corresponding lines of code have been +changed in the mean while. This `userscript +`_ provides a button to +unfold all such hidden discussions at once, so you can catch up. + +Checking out pull requests as remote-tracking branches +------------------------------------------------------ + +In your local fork, add to your ``.git/config``, under the ``[remote +"upstream"]`` heading, the line:: + + fetch = +refs/pull/*/head:refs/remotes/upstream/pr/* + +You may then use ``git checkout pr/PR_NUMBER`` to navigate to the code of the +pull-request with the given number. (`Read more in this gist. +`_) + +Display code coverage in pull requests +-------------------------------------- + +To overlay the code coverage reports generated by the CodeCov continuous +integration, consider `this browser extension +`_. The coverage of each line +will be displayed as a color background behind the line number. + +Useful pytest aliases and flags +------------------------------- + +We recommend using pytest to run unit tests. When a unit tests fail, the +following tricks can make debugging easier: + + 1. The command line argument ``pytest -l`` instructs pytest to print the local + variables when a failure occurs. + + 2. The argument ``pytest --pdb`` drops into the Python debugger on failure. To + instead drop into the rich IPython debugger ``ipdb``, you may set up a + shell alias to:: + + pytest --pdbcls=IPython.terminal.debugger:TerminalPdb --capture no + +Debugging memory errors in Cython with valgrind +=============================================== + +While python/numpy's built-in memory management is relatively robust, it can +lead to performance penalties for some routines. For this reason, much of +the high-performance code in scikit-learn in written in cython. This +performance gain comes with a tradeoff, however: it is very easy for memory +bugs to crop up in cython code, especially in situations where that code +relies heavily on pointer arithmetic. + +Memory errors can manifest themselves a number of ways. The easiest ones to +debug are often segmentation faults and related glibc errors. Uninitialized +variables can lead to unexpected behavior that is difficult to track down. +A very useful tool when debugging these sorts of errors is +valgrind_. + + +Valgrind is a command-line tool that can trace memory errors in a variety of +code. Follow these steps: + + 1. Install `valgrind`_ on your system. + + 2. Download the python valgrind suppression file: `valgrind-python.supp`_. + + 3. Follow the directions in the `README.valgrind`_ file to customize your + python suppressions. If you don't, you will have spurious output coming + related to the python interpreter instead of your own code. + + 4. Run valgrind as follows:: + + $> valgrind -v --suppressions=valgrind-python.supp python my_test_script.py + +.. _valgrind: http://valgrind.org +.. _`README.valgrind`: http://svn.python.org/projects/python/trunk/Misc/README.valgrind +.. _`valgrind-python.supp`: http://svn.python.org/projects/python/trunk/Misc/valgrind-python.supp + + +The result will be a list of all the memory-related errors, which reference +lines in the C-code generated by cython from your .pyx file. If you examine +the referenced lines in the .c file, you will see comments which indicate the +corresponding location in your .pyx source file. Hopefully the output will +give you clues as to the source of your memory error. + +For more information on valgrind and the array of options it has, see the +tutorials and documentation on the `valgrind web site `_. diff --git a/doc/faq.rst b/doc/faq.rst index f11f1e013d434..dcaee6da8b928 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -24,9 +24,9 @@ Apart from scikit-learn, another popular one is `scikit-image `_. Please do not contact the contributors of scikit-learn directly -regarding contributing to scikit-learn. +usually a major and lengthy undertaking, it is recommended to start with +:ref:`known issues `. Please do not contact the contributors +of scikit-learn directly regarding contributing to scikit-learn. What's the best way to get help on scikit-learn usage? -------------------------------------------------------------- diff --git a/doc/images/sloan_banner.png b/doc/images/sloan_banner.png new file mode 100644 index 0000000000000..bcb98e8403006 Binary files /dev/null and b/doc/images/sloan_banner.png differ diff --git a/doc/index.rst b/doc/index.rst index 439e70dd94758..c49e5cf2db3cd 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -207,14 +207,16 @@
  • On-going development: What's new (Changelog)
  • +
  • July 2017. scikit-learn 0.19.0 is available for download (Changelog). +
  • +
  • June 2017. scikit-learn 0.18.2 is available for download (Changelog). +
  • September 2016. scikit-learn 0.18.0 is available for download (Changelog).
  • November 2015. scikit-learn 0.17.0 is available for download (Changelog).
  • March 2015. scikit-learn 0.16.0 is available for download (Changelog).
  • -
  • July 2014. scikit-learn 0.15.0 is available for download (Changelog). -
  • July 14-20th, 2014: international sprint. During this week-long sprint, we gathered 18 of the core contributors in Paris. @@ -323,14 +325,15 @@ Funding provided by INRIA and others.
    diff --git a/doc/modules/calibration.rst b/doc/modules/calibration.rst index 0c0af594398a0..9762414ac8cc0 100644 --- a/doc/modules/calibration.rst +++ b/doc/modules/calibration.rst @@ -44,7 +44,7 @@ with different biases per method: * :class:`RandomForestClassifier` shows the opposite behavior: the histograms show peaks at approximately 0.2 and 0.9 probability, while probabilities close to 0 or 1 are very rare. An explanation for this is given by Niculescu-Mizil - and Caruana [4]: "Methods such as bagging and random forests that average + and Caruana [4]_: "Methods such as bagging and random forests that average predictions from a base set of models can have difficulty making predictions near 0 and 1 because variance in the underlying base models will bias predictions that should be near zero or one away from these values. Because @@ -57,7 +57,7 @@ with different biases per method: ensemble away from 0. We observe this effect most strongly with random forests because the base-level trees trained with random forests have relatively high variance due to feature subseting." As a result, the - calibration curve also referred to as the reliability diagram (Wilks 1995[5]) shows a + calibration curve also referred to as the reliability diagram (Wilks 1995 [5]_) shows a characteristic sigmoid shape, indicating that the classifier could trust its "intuition" more and return probabilties closer to 0 or 1 typically. @@ -65,7 +65,7 @@ with different biases per method: * Linear Support Vector Classification (:class:`LinearSVC`) shows an even more sigmoid curve as the RandomForestClassifier, which is typical for maximum-margin methods - (compare Niculescu-Mizil and Caruana [4]), which focus on hard samples + (compare Niculescu-Mizil and Caruana [4]_), which focus on hard samples that are close to the decision boundary (the support vectors). .. currentmodule:: sklearn.calibration @@ -190,18 +190,18 @@ a similar decrease in log-loss. .. topic:: References: - .. [1] Obtaining calibrated probability estimates from decision trees - and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001 + * Obtaining calibrated probability estimates from decision trees + and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001 - .. [2] Transforming Classifier Scores into Accurate Multiclass - Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002) + * Transforming Classifier Scores into Accurate Multiclass + Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002) - .. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to - Regularized Likelihood Methods, J. Platt, (1999) + * Probabilistic Outputs for Support Vector Machines and Comparisons to + Regularized Likelihood Methods, J. Platt, (1999) .. [4] Predicting Good Probabilities with Supervised Learning, - A. Niculescu-Mizil & R. Caruana, ICML 2005 + A. Niculescu-Mizil & R. Caruana, ICML 2005 .. [5] On the combination of forecast probabilities for - consecutive precipitation periods. Wea. Forecasting, 5, 640– - 650., Wilks, D. S., 1990a + consecutive precipitation periods. Wea. Forecasting, 5, 640–650., + Wilks, D. S., 1990a diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index b41de5c108b5c..128f1c85f13e2 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -41,9 +41,34 @@ Functions base.clone config_context - set_config get_config + set_config + +.. _calibration_ref: +:mod:`sklearn.calibration`: Probability Calibration +=================================================== + +.. automodule:: sklearn.calibration + :no-members: + :no-inherited-members: + +**User guide:** See the :ref:`calibration` section for further details. + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + calibration.CalibratedClassifierCV + + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + calibration.calibration_curve .. _cluster_ref: @@ -80,13 +105,13 @@ Functions :toctree: generated/ :template: function.rst - cluster.estimate_bandwidth - cluster.k_means - cluster.ward_tree cluster.affinity_propagation cluster.dbscan + cluster.estimate_bandwidth + cluster.k_means cluster.mean_shift cluster.spectral_clustering + cluster.ward_tree .. _bicluster_ref: @@ -141,60 +166,21 @@ Classes :template: function.rst covariance.empirical_covariance + covariance.graph_lasso covariance.ledoit_wolf - covariance.shrunk_covariance covariance.oas - covariance.graph_lasso + covariance.shrunk_covariance +.. _cross_decomposition_ref: -:mod:`sklearn.model_selection`: Model Selection -=============================================== +:mod:`sklearn.cross_decomposition`: Cross decomposition +======================================================= -.. automodule:: sklearn.model_selection +.. automodule:: sklearn.cross_decomposition :no-members: :no-inherited-members: -**User guide:** See the :ref:`cross_validation`, :ref:`grid_search` and -:ref:`learning_curve` sections for further details. - -Splitter Classes ----------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - model_selection.KFold - model_selection.GroupKFold - model_selection.StratifiedKFold - model_selection.LeaveOneGroupOut - model_selection.LeavePGroupsOut - model_selection.LeaveOneOut - model_selection.LeavePOut - model_selection.RepeatedKFold - model_selection.RepeatedStratifiedKFold - model_selection.ShuffleSplit - model_selection.GroupShuffleSplit - model_selection.StratifiedShuffleSplit - model_selection.PredefinedSplit - model_selection.TimeSeriesSplit - -Splitter Functions ------------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - model_selection.train_test_split - model_selection.check_cv - -Hyper-parameter optimizers --------------------------- +**User guide:** See the :ref:`cross_decomposition` section for further details. .. currentmodule:: sklearn @@ -202,33 +188,10 @@ Hyper-parameter optimizers :toctree: generated/ :template: class.rst - model_selection.GridSearchCV - model_selection.RandomizedSearchCV - model_selection.ParameterGrid - model_selection.ParameterSampler - - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - model_selection.fit_grid_point - -Model validation ----------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - model_selection.cross_validate - model_selection.cross_val_score - model_selection.cross_val_predict - model_selection.permutation_test_score - model_selection.learning_curve - model_selection.validation_curve + cross_decomposition.CCA + cross_decomposition.PLSCanonical + cross_decomposition.PLSRegression + cross_decomposition.PLSSVD .. _datasets_ref: @@ -251,33 +214,33 @@ Loaders :template: function.rst datasets.clear_data_home - datasets.get_data_home + datasets.dump_svmlight_file datasets.fetch_20newsgroups datasets.fetch_20newsgroups_vectorized + datasets.fetch_california_housing + datasets.fetch_covtype + datasets.fetch_kddcup99 + datasets.fetch_lfw_pairs + datasets.fetch_lfw_people + datasets.fetch_mldata + datasets.fetch_olivetti_faces + datasets.fetch_rcv1 + datasets.fetch_species_distributions + datasets.get_data_home datasets.load_boston datasets.load_breast_cancer datasets.load_diabetes datasets.load_digits datasets.load_files datasets.load_iris - datasets.load_wine - datasets.fetch_lfw_pairs - datasets.fetch_lfw_people datasets.load_linnerud - datasets.mldata_filename - datasets.fetch_mldata - datasets.fetch_olivetti_faces - datasets.fetch_california_housing - datasets.fetch_covtype - datasets.fetch_kddcup99 - datasets.fetch_rcv1 datasets.load_mlcomp datasets.load_sample_image datasets.load_sample_images - datasets.fetch_species_distributions datasets.load_svmlight_file datasets.load_svmlight_files - datasets.dump_svmlight_file + datasets.load_wine + datasets.mldata_filename Samples generator ----------------- @@ -288,9 +251,11 @@ Samples generator :toctree: generated/ :template: function.rst + datasets.make_biclusters datasets.make_blobs - datasets.make_classification + datasets.make_checkerboard datasets.make_circles + datasets.make_classification datasets.make_friedman1 datasets.make_friedman2 datasets.make_friedman3 @@ -306,8 +271,6 @@ Samples generator datasets.make_sparse_uncorrelated datasets.make_spd_matrix datasets.make_swiss_roll - datasets.make_biclusters - datasets.make_checkerboard .. _decomposition_ref: @@ -327,29 +290,49 @@ Samples generator :toctree: generated/ :template: class.rst - decomposition.PCA - decomposition.IncrementalPCA - decomposition.KernelPCA + decomposition.DictionaryLearning decomposition.FactorAnalysis decomposition.FastICA - decomposition.TruncatedSVD + decomposition.IncrementalPCA + decomposition.KernelPCA + decomposition.LatentDirichletAllocation + decomposition.MiniBatchDictionaryLearning + decomposition.MiniBatchSparsePCA decomposition.NMF + decomposition.PCA decomposition.SparsePCA - decomposition.MiniBatchSparsePCA decomposition.SparseCoder - decomposition.DictionaryLearning - decomposition.MiniBatchDictionaryLearning - decomposition.LatentDirichletAllocation + decomposition.TruncatedSVD .. autosummary:: :toctree: generated/ :template: function.rst - decomposition.fastica decomposition.dict_learning decomposition.dict_learning_online + decomposition.fastica decomposition.sparse_encode +.. _lda_ref: + +:mod:`sklearn.discriminant_analysis`: Discriminant Analysis +=========================================================== + +.. automodule:: sklearn.discriminant_analysis + :no-members: + :no-inherited-members: + +**User guide:** See the :ref:`lda_qda` section for further details. + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated + :template: class.rst + + discriminant_analysis.LinearDiscriminantAnalysis + discriminant_analysis.QuadraticDiscriminantAnalysis + .. _dummy_ref: :mod:`sklearn.dummy`: Dummy estimators @@ -401,8 +384,8 @@ Samples generator ensemble.GradientBoostingRegressor ensemble.IsolationForest ensemble.RandomForestClassifier - ensemble.RandomTreesEmbedding ensemble.RandomForestRegressor + ensemble.RandomTreesEmbedding ensemble.VotingClassifier .. autosummary:: @@ -442,13 +425,13 @@ partial dependence :toctree: generated/ :template: class_without_init.rst - exceptions.NotFittedError exceptions.ChangedBehaviorWarning exceptions.ConvergenceWarning exceptions.DataConversionWarning exceptions.DataDimensionalityWarning exceptions.EfficiencyWarning exceptions.FitFailedWarning + exceptions.NotFittedError exceptions.NonBLASDotWarning exceptions.UndefinedMetricWarning @@ -485,9 +468,9 @@ From images :toctree: generated/ :template: function.rst - feature_extraction.image.img_to_graph - feature_extraction.image.grid_to_graph feature_extraction.image.extract_patches_2d + feature_extraction.image.grid_to_graph + feature_extraction.image.img_to_graph feature_extraction.image.reconstruct_from_patches_2d :template: class.rst @@ -571,8 +554,8 @@ From text :toctree: generated/ :template: class.rst - gaussian_process.GaussianProcessRegressor gaussian_process.GaussianProcessClassifier + gaussian_process.GaussianProcessRegressor Kernels: @@ -580,20 +563,20 @@ Kernels: :toctree: generated/ :template: class_with_call.rst + gaussian_process.kernels.CompoundKernel + gaussian_process.kernels.ConstantKernel + gaussian_process.kernels.DotProduct + gaussian_process.kernels.ExpSineSquared + gaussian_process.kernels.Exponentiation + gaussian_process.kernels.Hyperparameter gaussian_process.kernels.Kernel - gaussian_process.kernels.Sum + gaussian_process.kernels.Matern + gaussian_process.kernels.PairwiseKernel gaussian_process.kernels.Product - gaussian_process.kernels.Exponentiation - gaussian_process.kernels.ConstantKernel - gaussian_process.kernels.WhiteKernel gaussian_process.kernels.RBF - gaussian_process.kernels.Matern gaussian_process.kernels.RationalQuadratic - gaussian_process.kernels.ExpSineSquared - gaussian_process.kernels.DotProduct - gaussian_process.kernels.PairwiseKernel - gaussian_process.kernels.CompoundKernel - gaussian_process.kernels.Hyperparameter + gaussian_process.kernels.Sum + gaussian_process.kernels.WhiteKernel .. _isotonic_ref: @@ -618,8 +601,8 @@ Kernels: :toctree: generated :template: function.rst - isotonic.isotonic_regression isotonic.check_increasing + isotonic.isotonic_regression .. _kernel_approximation_ref: @@ -662,27 +645,6 @@ Kernels: kernel_ridge.KernelRidge -.. _lda_ref: - -:mod:`sklearn.discriminant_analysis`: Discriminant Analysis -=========================================================== - -.. automodule:: sklearn.discriminant_analysis - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`lda_qda` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated - :template: class.rst - - discriminant_analysis.LinearDiscriminantAnalysis - discriminant_analysis.QuadraticDiscriminantAnalysis - - .. _linear_model_ref: :mod:`sklearn.linear_model`: Generalized Linear Models @@ -763,8 +725,8 @@ Kernels: :toctree: generated :template: class.rst - manifold.LocallyLinearEmbedding manifold.Isomap + manifold.LocallyLinearEmbedding manifold.MDS manifold.SpectralEmbedding manifold.TSNE @@ -774,8 +736,8 @@ Kernels: :template: function.rst manifold.locally_linear_embedding - manifold.spectral_embedding manifold.smacof + manifold.spectral_embedding .. _metrics_ref: @@ -801,8 +763,8 @@ details. :toctree: generated/ :template: function.rst - metrics.make_scorer metrics.get_scorer + metrics.make_scorer Classification metrics ---------------------- @@ -930,9 +892,12 @@ See the :ref:`metrics` section of the user guide for further details. metrics.pairwise.additive_chi2_kernel metrics.pairwise.chi2_kernel + metrics.pairwise.cosine_similarity + metrics.pairwise.cosine_distances metrics.pairwise.distance_metrics metrics.pairwise.euclidean_distances metrics.pairwise.kernel_metrics + metrics.pairwise.laplacian_kernel metrics.pairwise.linear_kernel metrics.pairwise.manhattan_distances metrics.pairwise.pairwise_distances @@ -940,16 +905,13 @@ See the :ref:`metrics` section of the user guide for further details. metrics.pairwise.polynomial_kernel metrics.pairwise.rbf_kernel metrics.pairwise.sigmoid_kernel - metrics.pairwise.cosine_similarity - metrics.pairwise.cosine_distances - metrics.pairwise.laplacian_kernel - metrics.pairwise_distances - metrics.pairwise_distances_argmin - metrics.pairwise_distances_argmin_min metrics.pairwise.paired_euclidean_distances metrics.pairwise.paired_manhattan_distances metrics.pairwise.paired_cosine_distances metrics.pairwise.paired_distances + metrics.pairwise_distances + metrics.pairwise_distances_argmin + metrics.pairwise_distances_argmin_min .. _mixture_ref: @@ -969,9 +931,93 @@ See the :ref:`metrics` section of the user guide for further details. :toctree: generated/ :template: class.rst - mixture.GaussianMixture mixture.BayesianGaussianMixture + mixture.GaussianMixture + +.. _modelselection_ref: + +:mod:`sklearn.model_selection`: Model Selection +=============================================== + +.. automodule:: sklearn.model_selection + :no-members: + :no-inherited-members: + +**User guide:** See the :ref:`cross_validation`, :ref:`grid_search` and +:ref:`learning_curve` sections for further details. + +Splitter Classes +---------------- + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + model_selection.GroupKFold + model_selection.GroupShuffleSplit + model_selection.KFold + model_selection.LeaveOneGroupOut + model_selection.LeavePGroupsOut + model_selection.LeaveOneOut + model_selection.LeavePOut + model_selection.PredefinedSplit + model_selection.RepeatedKFold + model_selection.RepeatedStratifiedKFold + model_selection.ShuffleSplit + model_selection.StratifiedKFold + model_selection.StratifiedShuffleSplit + model_selection.TimeSeriesSplit +Splitter Functions +------------------ + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + model_selection.check_cv + model_selection.train_test_split + +Hyper-parameter optimizers +-------------------------- + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + model_selection.GridSearchCV + model_selection.ParameterGrid + model_selection.ParameterSampler + model_selection.RandomizedSearchCV + + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + model_selection.fit_grid_point + +Model validation +---------------- + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + model_selection.cross_validate + model_selection.cross_val_predict + model_selection.cross_val_score + model_selection.learning_curve + model_selection.permutation_test_score + model_selection.validation_curve .. _multiclass_ref: @@ -1011,9 +1057,9 @@ See the :ref:`metrics` section of the user guide for further details. :toctree: generated :template: class.rst + multioutput.ClassifierChain multioutput.MultiOutputRegressor multioutput.MultiOutputClassifier - multioutput.ClassifierChain .. _naive_bayes_ref: @@ -1032,9 +1078,9 @@ See the :ref:`metrics` section of the user guide for further details. :toctree: generated/ :template: class.rst + naive_bayes.BernoulliNB naive_bayes.GaussianNB naive_bayes.MultinomialNB - naive_bayes.BernoulliNB .. _neighbors_ref: @@ -1054,17 +1100,17 @@ See the :ref:`metrics` section of the user guide for further details. :toctree: generated/ :template: class.rst - neighbors.NearestNeighbors - neighbors.KNeighborsClassifier - neighbors.RadiusNeighborsClassifier - neighbors.KNeighborsRegressor - neighbors.RadiusNeighborsRegressor - neighbors.NearestCentroid neighbors.BallTree - neighbors.KDTree neighbors.DistanceMetric + neighbors.KDTree neighbors.KernelDensity + neighbors.KNeighborsClassifier + neighbors.KNeighborsRegressor neighbors.LocalOutlierFactor + neighbors.RadiusNeighborsClassifier + neighbors.RadiusNeighborsRegressor + neighbors.NearestCentroid + neighbors.NearestNeighbors .. autosummary:: :toctree: generated/ @@ -1094,57 +1140,6 @@ See the :ref:`metrics` section of the user guide for further details. neural_network.MLPClassifier neural_network.MLPRegressor - -.. _calibration_ref: - -:mod:`sklearn.calibration`: Probability Calibration -=================================================== - -.. automodule:: sklearn.calibration - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`calibration` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - calibration.CalibratedClassifierCV - - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - calibration.calibration_curve - - -.. _cross_decomposition_ref: - -:mod:`sklearn.cross_decomposition`: Cross decomposition -======================================================= - -.. automodule:: sklearn.cross_decomposition - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`cross_decomposition` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - cross_decomposition.PLSRegression - cross_decomposition.PLSCanonical - cross_decomposition.CCA - cross_decomposition.PLSSVD - - .. _pipeline_ref: :mod:`sklearn.pipeline`: Pipeline @@ -1160,8 +1155,8 @@ See the :ref:`metrics` section of the user guide for further details. :toctree: generated/ :template: class.rst - pipeline.Pipeline pipeline.FeatureUnion + pipeline.Pipeline .. autosummary:: :toctree: generated/ @@ -1287,13 +1282,13 @@ Estimators :toctree: generated/ :template: class.rst - svm.SVC svm.LinearSVC - svm.NuSVC - svm.SVR svm.LinearSVR + svm.NuSVC svm.NuSVR svm.OneClassSVM + svm.SVC + svm.SVR .. autosummary:: :toctree: generated/ @@ -1308,11 +1303,11 @@ Low-level methods :toctree: generated :template: function.rst - svm.libsvm.fit + svm.libsvm.cross_validation svm.libsvm.decision_function + svm.libsvm.fit svm.libsvm.predict svm.libsvm.predict_proba - svm.libsvm.cross_validation .. _tree_ref: @@ -1361,26 +1356,26 @@ Low-level methods :toctree: generated/ :template: function.rst - utils.assert_all_finite utils.as_float_array + utils.assert_all_finite utils.check_X_y utils.check_array utils.check_consistent_length utils.check_random_state - utils.indexable utils.class_weight.compute_class_weight utils.class_weight.compute_sample_weight utils.estimator_checks.check_estimator utils.extmath.safe_sparse_dot + utils.indexable utils.resample utils.safe_indexing utils.shuffle - utils.sparsefuncs.mean_variance_axis utils.sparsefuncs.incr_mean_variance_axis utils.sparsefuncs.inplace_column_scale utils.sparsefuncs.inplace_row_scale utils.sparsefuncs.inplace_swap_row utils.sparsefuncs.inplace_swap_column + utils.sparsefuncs.mean_variance_axis utils.validation.check_is_fitted utils.validation.check_symmetric utils.validation.column_or_1d @@ -1409,25 +1404,25 @@ To be removed in 0.20 :toctree: generated/ :template: deprecated_class.rst - grid_search.ParameterGrid - grid_search.ParameterSampler - grid_search.GridSearchCV - grid_search.RandomizedSearchCV - cross_validation.LeaveOneOut - cross_validation.LeavePOut cross_validation.KFold cross_validation.LabelKFold cross_validation.LeaveOneLabelOut + cross_validation.LeaveOneOut + cross_validation.LeavePOut cross_validation.LeavePLabelOut cross_validation.LabelShuffleSplit - cross_validation.StratifiedKFold cross_validation.ShuffleSplit + cross_validation.StratifiedKFold cross_validation.StratifiedShuffleSplit cross_validation.PredefinedSplit decomposition.RandomizedPCA gaussian_process.GaussianProcess - mixture.GMM + grid_search.ParameterGrid + grid_search.ParameterSampler + grid_search.GridSearchCV + grid_search.RandomizedSearchCV mixture.DPGMM + mixture.GMM mixture.VBGMM @@ -1435,11 +1430,11 @@ To be removed in 0.20 :toctree: generated/ :template: deprecated_function.rst - grid_search.fit_grid_point - learning_curve.learning_curve - learning_curve.validation_curve + cross_validation.check_cv cross_validation.cross_val_predict cross_validation.cross_val_score - cross_validation.check_cv cross_validation.permutation_test_score cross_validation.train_test_split + grid_search.fit_grid_point + learning_curve.learning_curve + learning_curve.validation_curve diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index f7977845a8ce2..b18cb3a6adcf7 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -301,7 +301,9 @@ is given. Affinity Propagation can be interesting as it chooses the number of clusters based on the data provided. For this purpose, the two important parameters are the *preference*, which controls how many exemplars are -used, and the *damping factor*. +used, and the *damping factor* which damps the responsibility and +availability messages to avoid numerical oscillations when updating these +messages. The main drawback of Affinity Propagation is its complexity. The algorithm has a time complexity of the order :math:`O(N^2 T)`, where :math:`N` @@ -350,6 +352,13 @@ to be the exemplar of sample :math:`i` is given by: To begin with, all values for :math:`r` and :math:`a` are set to zero, and the calculation of each iterates until convergence. +As discussed above, in order to avoid numerical oscillations when updating the +messages, the damping factor :math:`\lambda` is introduced to iteration process: + +.. math:: r_{t+1}(i, k) = \lambda\cdot r_{t}(i, k) + (1-\lambda)\cdot r_{t+1}(i, k) +.. math:: a_{t+1}(i, k) = \lambda\cdot a_{t}(i, k) + (1-\lambda)\cdot a_{t+1}(i, k) + +where :math:`t` indicates the iteration times. .. _mean_shift: @@ -1334,7 +1343,7 @@ mean of homogeneity and completeness**: .. topic:: References - .. [RH2007] `V-Measure: A conditional entropy-based external cluster evaluation + * `V-Measure: A conditional entropy-based external cluster evaluation measure `_ Andrew Rosenberg and Julia Hirschberg, 2007 diff --git a/doc/modules/covariance.rst b/doc/modules/covariance.rst index 88f40f3896190..2f95051ac9ea3 100644 --- a/doc/modules/covariance.rst +++ b/doc/modules/covariance.rst @@ -95,7 +95,7 @@ bias/variance trade-off, and is discussed below. Ledoit-Wolf shrinkage --------------------- -In their 2004 paper [1], O. Ledoit and M. Wolf propose a formula so as +In their 2004 paper [1]_, O. Ledoit and M. Wolf propose a formula so as to compute the optimal shrinkage coefficient :math:`\alpha` that minimizes the Mean Squared Error between the estimated and the real covariance matrix. @@ -112,10 +112,11 @@ fitting a :class:`LedoitWolf` object to the same sample. for visualizing the performances of the Ledoit-Wolf estimator in terms of likelihood. +.. topic:: References: -[1] O. Ledoit and M. Wolf, "A Well-Conditioned Estimator for Large-Dimensional - Covariance Matrices", Journal of Multivariate Analysis, Volume 88, Issue 2, - February 2004, pages 365-411. + .. [1] O. Ledoit and M. Wolf, "A Well-Conditioned Estimator for Large-Dimensional + Covariance Matrices", Journal of Multivariate Analysis, Volume 88, Issue 2, + February 2004, pages 365-411. .. _oracle_approximating_shrinkage: @@ -123,7 +124,7 @@ Oracle Approximating Shrinkage ------------------------------ Under the assumption that the data are Gaussian distributed, Chen et -al. [2] derived a formula aimed at choosing a shrinkage coefficient that +al. [2]_ derived a formula aimed at choosing a shrinkage coefficient that yields a smaller Mean Squared Error than the one given by Ledoit and Wolf's formula. The resulting estimator is known as the Oracle Shrinkage Approximating estimator of the covariance. @@ -141,8 +142,10 @@ object to the same sample. Bias-variance trade-off when setting the shrinkage: comparing the choices of Ledoit-Wolf and OAS estimators -[2] Chen et al., "Shrinkage Algorithms for MMSE Covariance Estimation", - IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010. +.. topic:: References: + + .. [2] Chen et al., "Shrinkage Algorithms for MMSE Covariance Estimation", + IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010. .. topic:: Examples: @@ -266,14 +269,14 @@ perform outlier detection and discard/downweight some observations according to further processing of the data. The ``sklearn.covariance`` package implements a robust estimator of covariance, -the Minimum Covariance Determinant [3]. +the Minimum Covariance Determinant [3]_. Minimum Covariance Determinant ------------------------------ The Minimum Covariance Determinant estimator is a robust estimator of -a data set's covariance introduced by P.J. Rousseeuw in [3]. The idea +a data set's covariance introduced by P.J. Rousseeuw in [3]_. The idea is to find a given proportion (h) of "good" observations which are not outliers and compute their empirical covariance matrix. This empirical covariance matrix is then rescaled to compensate the @@ -283,7 +286,7 @@ weights to observations according to their Mahalanobis distance, leading to a reweighted estimate of the covariance matrix of the data set ("reweighting step"). -Rousseeuw and Van Driessen [4] developed the FastMCD algorithm in order +Rousseeuw and Van Driessen [4]_ developed the FastMCD algorithm in order to compute the Minimum Covariance Determinant. This algorithm is used in scikit-learn when fitting an MCD object to data. The FastMCD algorithm also computes a robust estimate of the data set location at @@ -292,11 +295,13 @@ the same time. Raw estimates can be accessed as ``raw_location_`` and ``raw_covariance_`` attributes of a :class:`MinCovDet` robust covariance estimator object. -[3] P. J. Rousseeuw. Least median of squares regression. - J. Am Stat Ass, 79:871, 1984. -[4] A Fast Algorithm for the Minimum Covariance Determinant Estimator, - 1999, American Statistical Association and the American Society - for Quality, TECHNOMETRICS. +.. topic:: References: + + .. [3] P. J. Rousseeuw. Least median of squares regression. + J. Am Stat Ass, 79:871, 1984. + .. [4] A Fast Algorithm for the Minimum Covariance Determinant Estimator, + 1999, American Statistical Association and the American Society + for Quality, TECHNOMETRICS. .. topic:: Examples: diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index ab7d2227447b1..b47726979351f 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -270,12 +270,12 @@ The following sections list utilities to generate indices that can be used to generate dataset splits according to different cross validation strategies. -.. _iid_cv +.. _iid_cv: Cross-validation iterators for i.i.d. data ========================================== -Assuming that some data is Independent Identically Distributed (i.i.d.) is +Assuming that some data is Independent and Identically Distributed (i.i.d.) is making the assumption that all samples stem from the same generative process and that the generative process is assumed to have no memory of past generated samples. @@ -287,10 +287,10 @@ The following cross-validators can be used in such cases. While i.i.d. data is a common assumption in machine learning theory, it rarely holds in practice. If one knows that the samples have been generated using a time-dependent process, it's safer to -use a `time-series aware cross-validation scheme ` +use a :ref:`time-series aware cross-validation scheme ` Similarly if we know that the generative process has a group structure (samples from collected from different subjects, experiments, measurement -devices) it safer to use `group-wise cross-validation `. +devices) it safer to use :ref:`group-wise cross-validation `. K-fold @@ -506,7 +506,7 @@ Stratified Shuffle Split stratified splits, *i.e* which creates splits by preserving the same percentage for each target class as in the complete set. -.. _group_cv +.. _group_cv: Cross-validation iterators for grouped data. ============================================ @@ -532,11 +532,11 @@ parameter. Group k-fold ------------ -class:GroupKFold is a variation of k-fold which ensures that the same group is +:class:`GroupKFold` is a variation of k-fold which ensures that the same group is not represented in both testing and training sets. For example if the data is obtained from different subjects with several samples per-subject and if the model is flexible enough to learn from highly person specific features it -could fail to generalize to new subjects. class:GroupKFold makes it possible +could fail to generalize to new subjects. :class:`GroupKFold` makes it possible to detect this kind of overfitting situations. Imagine you have three subjects, each with an associated number from 1 to 3:: @@ -613,8 +613,6 @@ Example of Leave-2-Group Out:: Group Shuffle Split ------------------- -:class:`GroupShuffleSplit` - The :class:`GroupShuffleSplit` iterator behaves as a combination of :class:`ShuffleSplit` and :class:`LeavePGroupsOut`, and generates a sequence of randomized partitions in which a subset of groups are held @@ -655,7 +653,7 @@ e.g. when searching for hyperparameters. For example, when using a validation set, set the ``test_fold`` to 0 for all samples that are part of the validation set, and to -1 for all other samples. -.. _timeseries_cv +.. _timeseries_cv: Cross validation of time series data ==================================== @@ -725,8 +723,7 @@ to shuffle the data indices before splitting them. Note that: shuffling will be different every time ``KFold(..., shuffle=True)`` is iterated. However, ``GridSearchCV`` will use the same shuffling for each set of parameters validated by a single call to its ``fit`` method. -* To ensure results are repeatable (*on the same platform*), use a fixed value - for ``random_state``. +* To get identical results for each split, set ``random_state`` to an integer. Cross validation and model selection ==================================== diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index 12a0ff6a74ba0..b766f4dfd4d0c 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -246,7 +246,7 @@ amount of time (e.g., on large datasets). .. [B1998] L. Breiman, "Arcing Classifiers", Annals of Statistics 1998. - .. [GEW2006] P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized + * P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized trees", Machine Learning, 63(1), 3-42, 2006. .. _random_forest_feature_importance: @@ -915,10 +915,10 @@ averaged. .. _voting_classifier: -VotingClassifier +Voting Classifier ======================== -The idea behind the voting classifier implementation is to combine +The idea behind the :class:`VotingClassifier` is to combine conceptually different machine learning classifiers and use a majority vote or the average predicted probabilities (soft vote) to predict the class labels. Such a classifier can be useful for a set of equally well performing model diff --git a/doc/modules/feature_selection.rst b/doc/modules/feature_selection.rst index 0f0adecdd3cf3..f9b767bd2ae89 100644 --- a/doc/modules/feature_selection.rst +++ b/doc/modules/feature_selection.rst @@ -123,10 +123,11 @@ Given an external estimator that assigns weights to features (e.g., the coefficients of a linear model), recursive feature elimination (:class:`RFE`) is to select features by recursively considering smaller and smaller sets of features. First, the estimator is trained on the initial set of features and -weights are assigned to each one of them. Then, features whose absolute weights -are the smallest are pruned from the current set features. That procedure is -recursively repeated on the pruned set until the desired number of features to -select is eventually reached. +the importance of each feature is obtained either through a ``coef_`` attribute +or through a ``feature_importances_`` attribute. Then, the least important +features are pruned from current set of features.That procedure is recursively +repeated on the pruned set until the desired number of features to select is +eventually reached. :class:`RFECV` performs RFE in a cross-validation loop to find the optimal number of features. diff --git a/doc/modules/gaussian_process.rst b/doc/modules/gaussian_process.rst index 7fae49349f342..94cca8999e489 100644 --- a/doc/modules/gaussian_process.rst +++ b/doc/modules/gaussian_process.rst @@ -601,12 +601,7 @@ shown in the following figure: References ---------- - * `[RW2006] - `_ - **Gaussian Processes for Machine Learning**, - Carl Eduard Rasmussen and Christopher K.I. Williams, MIT Press 2006. - Link to an official complete PDF version of the book - `here `_ . +.. [RW2006] Carl Eduard Rasmussen and Christopher K.I. Williams, "Gaussian Processes for Machine Learning", MIT Press 2006, Link to an official complete PDF version of the book `here `_ . .. currentmodule:: sklearn.gaussian_process diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst index 1867a66594ad4..3851392ed2d88 100644 --- a/doc/modules/grid_search.rst +++ b/doc/modules/grid_search.rst @@ -84,7 +84,7 @@ evaluated and the best combination is retained. dataset. This is the best practice for evaluating the performance of a model with grid search. - - See :ref:`sphx_glr_auto_examples_model_selection_plot_multi_metric_evaluation` + - See :ref:`sphx_glr_auto_examples_model_selection_plot_multi_metric_evaluation.py` for an example of :class:`GridSearchCV` being used to evaluate multiple metrics simultaneously. @@ -183,7 +183,7 @@ the ``best_estimator_`` on the whole dataset. If the search should not be refit, set ``refit=False``. Leaving refit to the default value ``None`` will result in an error when using multiple metrics. -See :ref:`sphx_glr_auto_examples_model_selection_plot_multi_metric_evaluation` +See :ref:`sphx_glr_auto_examples_model_selection_plot_multi_metric_evaluation.py` for an example usage. Composite estimators and parameter spaces diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst index e6d0ea882f6d3..018ff884c4ae2 100644 --- a/doc/modules/linear_model.rst +++ b/doc/modules/linear_model.rst @@ -1141,7 +1141,7 @@ in the following ways. .. topic:: References: - .. [#f1] Peter J. Huber, Elvezio M. Ronchetti: Robust Statistics, Concomitant scale estimates, pg 172 + * Peter J. Huber, Elvezio M. Ronchetti: Robust Statistics, Concomitant scale estimates, pg 172 Also, this estimator is different from the R implementation of Robust Regression (http://www.ats.ucla.edu/stat/r/dae/rreg.htm) because the R implementation does a weighted least diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 813a39339e848..4800569556758 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -81,6 +81,7 @@ Scoring Function 'v_measure_score' :func:`metrics.v_measure_score` **Regression** +'explained_variance' :func:`metrics.explained_variance_score` 'neg_mean_absolute_error' :func:`metrics.mean_absolute_error` 'neg_mean_squared_error' :func:`metrics.mean_squared_error` 'neg_mean_squared_log_error' :func:`metrics.mean_squared_log_error` @@ -101,7 +102,7 @@ Usage examples: >>> model = svm.SVC() >>> cross_val_score(model, X, y, scoring='wrong_choice') Traceback (most recent call last): - ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'completeness_score', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score'] + ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'completeness_score', 'explained_variance', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score'] .. note:: @@ -242,14 +243,14 @@ permitted and will require a wrapper to return a single metric:: >>> # A sample toy binary classification dataset >>> X, y = datasets.make_classification(n_classes=2, random_state=0) >>> svm = LinearSVC(random_state=0) - >>> tp = lambda y_true, y_pred: confusion_matrix(y_true, y_pred)[0, 0] - >>> tn = lambda y_true, y_pred: confusion_matrix(y_true, y_pred)[0, 0] - >>> fp = lambda y_true, y_pred: confusion_matrix(y_true, y_pred)[1, 0] - >>> fn = lambda y_true, y_pred: confusion_matrix(y_true, y_pred)[0, 1] + >>> def tp(y_true, y_pred): return confusion_matrix(y_true, y_pred)[0, 0] + >>> def tn(y_true, y_pred): return confusion_matrix(y_true, y_pred)[0, 0] + >>> def fp(y_true, y_pred): return confusion_matrix(y_true, y_pred)[1, 0] + >>> def fn(y_true, y_pred): return confusion_matrix(y_true, y_pred)[0, 1] >>> scoring = {'tp' : make_scorer(tp), 'tn' : make_scorer(tn), ... 'fp' : make_scorer(fp), 'fn' : make_scorer(fn)} >>> cv_results = cross_validate(svm.fit(X, y), X, y, scoring=scoring) - >>> # Getting the test set false positive scores + >>> # Getting the test set true positive scores >>> print(cv_results['test_tp']) # doctest: +NORMALIZE_WHITESPACE [12 13 15] >>> # Getting the test set false negative scores @@ -670,10 +671,6 @@ binary classification and multilabel indicator format. for an example of :func:`precision_recall_curve` usage to evaluate classifier output quality. - * See :ref:`sphx_glr_auto_examples_linear_model_plot_sparse_recovery.py` - for an example of :func:`precision_recall_curve` usage to select - features for sparse linear models. - Binary classification ^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/modules/multiclass.rst b/doc/modules/multiclass.rst index 5094372aca960..2eec94f76b1c2 100644 --- a/doc/modules/multiclass.rst +++ b/doc/modules/multiclass.rst @@ -17,42 +17,42 @@ The :mod:`sklearn.multiclass` module implements *meta-estimators* to solve by decomposing such problems into binary classification problems. Multitarget regression is also supported. - - **Multiclass classification** means a classification task with more than - two classes; e.g., classify a set of images of fruits which may be oranges, - apples, or pears. Multiclass classification makes the assumption that each - sample is assigned to one and only one label: a fruit can be either an - apple or a pear but not both at the same time. - - - **Multilabel classification** assigns to each sample a set of target - labels. This can be thought as predicting properties of a data-point - that are not mutually exclusive, such as topics that are relevant for a - document. A text might be about any of religion, politics, finance or - education at the same time or none of these. - - - **Multioutput regression** assigns each sample a set of target - values. This can be thought of as predicting several properties - for each data-point, such as wind direction and magnitude at a - certain location. - - - **Multioutput-multiclass classification** and **multi-task classification** - means that a single estimator has to handle several joint classification - tasks. This is both a generalization of the multi-label classification - task, which only considers binary classification, as well as a - generalization of the multi-class classification task. *The output format - is a 2d numpy array or sparse matrix.* - - The set of labels can be different for each output variable. - For instance, a sample could be assigned "pear" for an output variable that - takes possible values in a finite set of species such as "pear", "apple"; - and "blue" or "green" for a second output variable that takes possible values - in a finite set of colors such as "green", "red", "blue", "yellow"... - - This means that any classifiers handling multi-output - multiclass or multi-task classification tasks, - support the multi-label classification task as a special case. - Multi-task classification is similar to the multi-output - classification task with different model formulations. For - more information, see the relevant estimator documentation. +- **Multiclass classification** means a classification task with more than + two classes; e.g., classify a set of images of fruits which may be oranges, + apples, or pears. Multiclass classification makes the assumption that each + sample is assigned to one and only one label: a fruit can be either an + apple or a pear but not both at the same time. + +- **Multilabel classification** assigns to each sample a set of target + labels. This can be thought as predicting properties of a data-point + that are not mutually exclusive, such as topics that are relevant for a + document. A text might be about any of religion, politics, finance or + education at the same time or none of these. + +- **Multioutput regression** assigns each sample a set of target + values. This can be thought of as predicting several properties + for each data-point, such as wind direction and magnitude at a + certain location. + +- **Multioutput-multiclass classification** and **multi-task classification** + means that a single estimator has to handle several joint classification + tasks. This is both a generalization of the multi-label classification + task, which only considers binary classification, as well as a + generalization of the multi-class classification task. *The output format + is a 2d numpy array or sparse matrix.* + + The set of labels can be different for each output variable. + For instance, a sample could be assigned "pear" for an output variable that + takes possible values in a finite set of species such as "pear", "apple"; + and "blue" or "green" for a second output variable that takes possible values + in a finite set of colors such as "green", "red", "blue", "yellow"... + + This means that any classifiers handling multi-output + multiclass or multi-task classification tasks, + support the multi-label classification task as a special case. + Multi-task classification is similar to the multi-output + classification task with different model formulations. For + more information, see the relevant estimator documentation. All scikit-learn classifiers are capable of multiclass classification, but the meta-estimators offered by :mod:`sklearn.multiclass` @@ -64,20 +64,69 @@ Below is a summary of the classifiers supported by scikit-learn grouped by strategy; you don't need the meta-estimators in this class if you're using one of these, unless you want custom multiclass behavior: - - Inherently multiclass: :ref:`Naive Bayes `, - :ref:`LDA and QDA `, - :ref:`Decision Trees `, :ref:`Random Forests `, - :ref:`Nearest Neighbors `, - setting ``multi_class='multinomial'`` in - :class:`sklearn.linear_model.LogisticRegression`. - - Support multilabel: :ref:`Decision Trees `, - :ref:`Random Forests `, :ref:`Nearest Neighbors `. - - One-Vs-One: :class:`sklearn.svm.SVC`. - - One-Vs-All: all linear models except :class:`sklearn.svm.SVC`. - -Some estimators also support multioutput-multiclass classification -tasks :ref:`Decision Trees `, :ref:`Random Forests `, -:ref:`Nearest Neighbors `. +- **Inherently multiclass:** + + - :class:`sklearn.naive_bayes.BernoulliNB` + - :class:`sklearn.tree.DecisionTreeClassifier` + - :class:`sklearn.tree.ExtraTreeClassifier` + - :class:`sklearn.ensemble.ExtraTreesClassifier` + - :class:`sklearn.naive_bayes.GaussianNB` + - :class:`sklearn.neighbors.KNeighborsClassifier` + - :class:`sklearn.semi_supervised.LabelPropagation` + - :class:`sklearn.semi_supervised.LabelSpreading` + - :class:`sklearn.discriminant_analysis.LinearDiscriminantAnalysis` + - :class:`sklearn.svm.LinearSVC` (setting multi_class="crammer_singer") + - :class:`sklearn.linear_model.LogisticRegression` (setting multi_class="multinomial") + - :class:`sklearn.linear_model.LogisticRegressionCV` (setting multi_class="multinomial") + - :class:`sklearn.neural_network.MLPClassifier` + - :class:`sklearn.neighbors.NearestCentroid` + - :class:`sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis` + - :class:`sklearn.neighbors.RadiusNeighborsClassifier` + - :class:`sklearn.ensemble.RandomForestClassifier` + - :class:`sklearn.linear_model.RidgeClassifier` + - :class:`sklearn.linear_model.RidgeClassifierCV` + + +- **Multiclass as One-Vs-One:** + + - :class:`sklearn.svm.NuSVC` + - :class:`sklearn.svm.SVC`. + - :class:`sklearn.gaussian_process.GaussianProcessClassifier` (setting multi_class = "one_vs_one") + + +- **Multiclass as One-Vs-All:** + + - :class:`sklearn.ensemble.GradientBoostingClassifier` + - :class:`sklearn.gaussian_process.GaussianProcessClassifier` (setting multi_class = "one_vs_rest") + - :class:`sklearn.svm.LinearSVC` (setting multi_class="ovr") + - :class:`sklearn.linear_model.LogisticRegression` (setting multi_class="ovr") + - :class:`sklearn.linear_model.LogisticRegressionCV` (setting multi_class="ovr") + - :class:`sklearn.linear_model.SGDClassifier` + - :class:`sklearn.linear_model.Perceptron` + - :class:`sklearn.linear_model.PassiveAggressiveClassifier` + + +- **Support multilabel:** + + - :class:`sklearn.tree.DecisionTreeClassifier` + - :class:`sklearn.tree.ExtraTreeClassifier` + - :class:`sklearn.ensemble.ExtraTreesClassifier` + - :class:`sklearn.neighbors.KNeighborsClassifier` + - :class:`sklearn.neural_network.MLPClassifier` + - :class:`sklearn.neighbors.RadiusNeighborsClassifier` + - :class:`sklearn.ensemble.RandomForestClassifier` + - :class:`sklearn.linear_model.RidgeClassifierCV` + + +- **Support multiclass-multioutput:** + + - :class:`sklearn.tree.DecisionTreeClassifier` + - :class:`sklearn.tree.ExtraTreeClassifier` + - :class:`sklearn.ensemble.ExtraTreesClassifier` + - :class:`sklearn.neighbors.KNeighborsClassifier` + - :class:`sklearn.neighbors.RadiusNeighborsClassifier` + - :class:`sklearn.ensemble.RandomForestClassifier` + .. warning:: @@ -202,8 +251,8 @@ Below is an example of multiclass learning using OvO:: .. topic:: References: - .. [1] "Pattern Recognition and Machine Learning. Springer", - Christopher M. Bishop, page 183, (First Edition) + * "Pattern Recognition and Machine Learning. Springer", + Christopher M. Bishop, page 183, (First Edition) .. _ecoc: @@ -266,19 +315,19 @@ Below is an example of multiclass learning using Output-Codes:: .. topic:: References: - .. [2] "Solving multiclass learning problems via error-correcting output codes", - Dietterich T., Bakiri G., - Journal of Artificial Intelligence Research 2, - 1995. + * "Solving multiclass learning problems via error-correcting output codes", + Dietterich T., Bakiri G., + Journal of Artificial Intelligence Research 2, + 1995. .. [3] "The error coding method and PICTs", James G., Hastie T., Journal of Computational and Graphical statistics 7, 1998. - .. [4] "The Elements of Statistical Learning", - Hastie T., Tibshirani R., Friedman J., page 606 (second-edition) - 2008. + * "The Elements of Statistical Learning", + Hastie T., Tibshirani R., Friedman J., page 606 (second-edition) + 2008. Multioutput regression ====================== @@ -353,7 +402,7 @@ Classifier Chain Classifier chains (see :class:`ClassifierChain`) are a way of combining a number of binary classifiers into a single multi-label model that is capable - of exploiting correlations among targets. +of exploiting correlations among targets. For a multi-label classification problem with N classes, N binary classifiers are assigned an integer between 0 and N-1. These integers @@ -373,5 +422,6 @@ typically many randomly ordered chains are fit and their predictions are averaged together. .. topic:: References: + Jesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, - "Classifier Chains for Multi-label Classification", 2009. \ No newline at end of file + "Classifier Chains for Multi-label Classification", 2009. diff --git a/doc/modules/outlier_detection.rst b/doc/modules/outlier_detection.rst index 011bb6ea07889..db130403f9023 100644 --- a/doc/modules/outlier_detection.rst +++ b/doc/modules/outlier_detection.rst @@ -126,8 +126,8 @@ This strategy is illustrated below. .. topic:: References: - .. [RD1999] Rousseeuw, P.J., Van Driessen, K. "A fast algorithm for the minimum - covariance determinant estimator" Technometrics 41(3), 212 (1999) + * Rousseeuw, P.J., Van Driessen, K. "A fast algorithm for the minimum + covariance determinant estimator" Technometrics 41(3), 212 (1999) .. _isolation_forest: @@ -172,8 +172,8 @@ This strategy is illustrated below. .. topic:: References: - .. [LTZ2008] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. "Isolation forest." - Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on. + * Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. "Isolation forest." + Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on. Local Outlier Factor @@ -228,7 +228,7 @@ This strategy is illustrated below. .. topic:: References: - .. [BKNS2000] Breunig, Kriegel, Ng, and Sander (2000) + * Breunig, Kriegel, Ng, and Sander (2000) `LOF: identifying density-based local outliers. `_ Proc. ACM SIGMOD @@ -272,16 +272,16 @@ multiple modes and :class:`ensemble.IsolationForest` and opposite, the decision rule based on fitting an :class:`covariance.EllipticEnvelope` learns an ellipse, which fits well the inlier distribution. The :class:`ensemble.IsolationForest` - and :class:`neighbors.LocalOutlierFactor` perform as well. + and :class:`neighbors.LocalOutlierFactor` perform as well. - |outlier1| * - As the inlier distribution becomes bimodal, the :class:`covariance.EllipticEnvelope` does not fit well the inliers. However, we can see that :class:`ensemble.IsolationForest`, - :class:`svm.OneClassSVM` and :class:`neighbors.LocalOutlierFactor` - have difficulties to detect the two modes, - and that the :class:`svm.OneClassSVM` + :class:`svm.OneClassSVM` and :class:`neighbors.LocalOutlierFactor` + have difficulties to detect the two modes, + and that the :class:`svm.OneClassSVM` tends to overfit: because it has no model of inliers, it interprets a region where, by chance some outliers are clustered, as inliers. @@ -292,7 +292,7 @@ multiple modes and :class:`ensemble.IsolationForest` and :class:`svm.OneClassSVM` is able to recover a reasonable approximation as well as :class:`ensemble.IsolationForest` and :class:`neighbors.LocalOutlierFactor`, - whereas the :class:`covariance.EllipticEnvelope` completely fails. + whereas the :class:`covariance.EllipticEnvelope` completely fails. - |outlier3| .. topic:: Examples: diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst index a4e1364a85ae6..18ef7e004c8de 100644 --- a/doc/modules/preprocessing.rst +++ b/doc/modules/preprocessing.rst @@ -199,7 +199,7 @@ matrices as input, as long as ``with_mean=False`` is explicitly passed to the constructor. Otherwise a ``ValueError`` will be raised as silently centering would break the sparsity and would often crash the execution by allocating excessive amounts of memory unintentionally. -:class:`RobustScaler` cannot be fited to sparse inputs, but you can use +:class:`RobustScaler` cannot be fitted to sparse inputs, but you can use the ``transform`` method on sparse inputs. Note that the scalers accept both Compressed Sparse Rows and Compressed diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst index f793c34b7f53d..3f577795e24be 100644 --- a/doc/modules/tree.rst +++ b/doc/modules/tree.rst @@ -481,7 +481,10 @@ Regression criteria If the target is a continuous value, then for node :math:`m`, representing a region :math:`R_m` with :math:`N_m` observations, common -criteria to minimise are +criteria to minimise as for determining locations for future +splits are Mean Squared Error, which minimizes the L2 error +using mean values at terminal nodes, and Mean Absolute Error, which +minimizes the L1 error using median values at terminal nodes. Mean Squared Error: diff --git a/doc/related_projects.rst b/doc/related_projects.rst index 877a6beeed60e..70971e934ccac 100644 --- a/doc/related_projects.rst +++ b/doc/related_projects.rst @@ -43,9 +43,6 @@ enhance the functionality of scikit-learn's estimators. **Experimentation frameworks** -- `PyMC `_ Bayesian statistical models and - fitting algorithms. - - `REP `_ Environment for conducting data-driven research in a consistent and reproducible way @@ -222,18 +219,19 @@ Other packages useful for data analysis and machine learning. statistical models. More focused on statistical tests and less on prediction than scikit-learn. +- `PyMC `_ Bayesian statistical models and + fitting algorithms. + - `Sacred `_ Tool to help you configure, organize, log and reproduce experiments -- `gensim `_ A library for topic modelling, - document indexing and similarity retrieval - - `Seaborn `_ Visualization library based on matplotlib. It provides a high-level interface for drawing attractive statistical graphics. - `Deep Learning `_ A curated list of deep learning software libraries. + Domain specific packages ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -243,6 +241,9 @@ Domain specific packages - `Natural language toolkit (nltk) `_ Natural language processing and some machine learning. +- `gensim `_ A library for topic modelling, + document indexing and similarity retrieval + - `NiLearn `_ Machine learning for neuro-imaging. - `AstroML `_ Machine learning for astronomy. diff --git a/doc/themes/scikit-learn/layout.html b/doc/themes/scikit-learn/layout.html index d659b9ce86179..9a2691c6b1fbb 100644 --- a/doc/themes/scikit-learn/layout.html +++ b/doc/themes/scikit-learn/layout.html @@ -85,9 +85,9 @@
  • FAQ
  • Contributing
  • -
  • Scikit-learn 0.18 (stable)
  • +
  • Scikit-learn 0.19 (stable)
  • +
  • Scikit-learn 0.18
  • Scikit-learn 0.17
  • -
  • Scikit-learn 0.16
  • PDF documentation
  • diff --git a/doc/themes/scikit-learn/static/img/sloan_logo.jpg b/doc/themes/scikit-learn/static/img/sloan_logo.jpg new file mode 100644 index 0000000000000..ea714312753a2 Binary files /dev/null and b/doc/themes/scikit-learn/static/img/sloan_logo.jpg differ diff --git a/doc/themes/scikit-learn/static/img/sydney-primary.jpeg b/doc/themes/scikit-learn/static/img/sydney-primary.jpeg new file mode 100644 index 0000000000000..292e217402f2f Binary files /dev/null and b/doc/themes/scikit-learn/static/img/sydney-primary.jpeg differ diff --git a/doc/themes/scikit-learn/static/img/sydney-stacked.jpeg b/doc/themes/scikit-learn/static/img/sydney-stacked.jpeg new file mode 100644 index 0000000000000..d35e8c724f435 Binary files /dev/null and b/doc/themes/scikit-learn/static/img/sydney-stacked.jpeg differ diff --git a/doc/tutorial/machine_learning_map/svg2imagemap.py b/doc/tutorial/machine_learning_map/svg2imagemap.py index c2e592d5232fb..c34bf17fab0ef 100644 --- a/doc/tutorial/machine_learning_map/svg2imagemap.py +++ b/doc/tutorial/machine_learning_map/svg2imagemap.py @@ -4,7 +4,7 @@ This script converts a subset of SVG into an HTML imagemap Note *subset*. It only handles elements, for which it only pays -attention to the M and L commands. Futher, it only notices the "translate" +attention to the M and L commands. Further, it only notices the "translate" transform. It was written to generate the examples in the documentation for maphilight, diff --git a/doc/tutorial/statistical_inference/putting_together.rst b/doc/tutorial/statistical_inference/putting_together.rst index acac7c03d1d06..556b6b8df0894 100644 --- a/doc/tutorial/statistical_inference/putting_together.rst +++ b/doc/tutorial/statistical_inference/putting_together.rst @@ -17,7 +17,7 @@ can predict variables. We can also create combined estimators: :align: right .. literalinclude:: ../../auto_examples/plot_digits_pipe.py - :lines: 26-66 + :lines: 23-63 diff --git a/doc/tutorial/statistical_inference/unsupervised_learning.rst b/doc/tutorial/statistical_inference/unsupervised_learning.rst index be32fabd96cb8..afe51320414c6 100644 --- a/doc/tutorial/statistical_inference/unsupervised_learning.rst +++ b/doc/tutorial/statistical_inference/unsupervised_learning.rst @@ -305,14 +305,17 @@ a maximum amount of independent information. It is able to recover :: >>> # Generate sample data + >>> import numpy as np + >>> from scipy import signal >>> time = np.linspace(0, 10, 2000) >>> s1 = np.sin(2 * time) # Signal 1 : sinusoidal signal >>> s2 = np.sign(np.sin(3 * time)) # Signal 2 : square signal - >>> S = np.c_[s1, s2] + >>> s3 = signal.sawtooth(2 * np.pi * time) # Signal 3: saw tooth signal + >>> S = np.c_[s1, s2, s3] >>> S += 0.2 * np.random.normal(size=S.shape) # Add noise >>> S /= S.std(axis=0) # Standardize data >>> # Mix data - >>> A = np.array([[1, 1], [0.5, 2]]) # Mixing matrix + >>> A = np.array([[1, 1, 1], [0.5, 2, 1], [1.5, 1, 2]]) # Mixing matrix >>> X = np.dot(S, A.T) # Generate observations >>> # Compute ICA diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 6b589026548cf..88d451c77c111 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -1,6 +1,5 @@ .. currentmodule:: sklearn - =============== Release history =============== @@ -8,7 +7,7 @@ Release history Version 0.19 ============ -**Release Candidate (0.19b2) July 17, 2017** +**August 12, 2017** Highlights ---------- @@ -22,18 +21,18 @@ algorithms in existing estimators, such as multiplicative update in :class:`decomposition.NMF` and multinomial :class:`linear_model.LogisticRegression` with L1 loss (use ``solver='saga'``). -You can also learn faster. For instance, the :ref:`new option to cache -transformations ` in :class:`pipeline.Pipeline` makes grid -search over pipelines including slow transformations much more efficient. And -you can predict faster: if you're sure you know what you're doing, you can turn -off validating that the input is finite using :func:`config_context`. - Cross validation is now able to return the results from multiple metric evaluations. The new :func:`model_selection.cross_validate` can return many scores on the test data as well as training set performance and timings, and we have extended the ``scoring`` and ``refit`` parameters for grid/randomized search :ref:`to handle multiple metrics `. +You can also learn faster. For instance, the :ref:`new option to cache +transformations ` in :class:`pipeline.Pipeline` makes grid +search over pipelines including slow transformations much more efficient. And +you can predict faster: if you're sure you know what you're doing, you can turn +off validating that the input is finite using :func:`config_context`. + We've made some important fixes too. We've fixed a longstanding implementation error in :func:`metrics.average_precision_score`, so please be cautious with prior results reported from that function. A number of errors in the @@ -63,6 +62,7 @@ random sampling procedures. - :class:`linear_model.LassoLars` (bug fix) - :class:`linear_model.LassoLarsIC` (bug fix) - :class:`manifold.TSNE` (bug fix) +- :class:`neighbors.NearestCentroid` (bug fix) - :class:`semi_supervised.LabelSpreading` (bug fix) - :class:`semi_supervised.LabelPropagation` (bug fix) - tree based models where ``min_weight_fraction_leaf`` is used (enhancement) @@ -172,6 +172,11 @@ Trees and ensembles - :func:`tree.export_graphviz` now shows configurable number of decimal places. :issue:`8698` by :user:`Guillaume Lemaitre `. +- Added ``flatten_transform`` parameter to :class:`ensemble.VotingClassifier` + to change output shape of `transform` method to 2 dimensional. + :issue:`7794` by :user:`Ibraim Ganiev ` and + :user:`Herilalaina Rakotoarison `. + Linear, kernelized and related models - :class:`linear_model.SGDClassifier`, :class:`linear_model.SGDRegressor`, @@ -232,6 +237,9 @@ Decomposition, manifold learning and clustering ``singular_values_``, like in :class:`decomposition.IncrementalPCA`. :issue:`7685` by :user:`Tommy Löfstedt ` +- Fixed the implementation of noise_variance_ in :class:`decomposition.PCA`. + :issue:`9108` by `Hanmin Qin `_. + - :class:`decomposition.NMF` now faster when ``beta_loss=0``. :issue:`9277` by :user:`hongkahjun`. @@ -322,6 +330,9 @@ Model evaluation and meta-estimators - More clustering metrics are now available through :func:`metrics.get_scorer` and ``scoring`` parameters. :issue:`8117` by `Raghav RV`_. +- A scorer based on :func:`metrics.explained_variance_score` is also available. + :issue:`9259` by `Hanmin Qin `_. + Metrics - :func:`metrics.matthews_corrcoef` now support multiclass classification. @@ -490,7 +501,10 @@ Decomposition, manifold learning and clustering in :class:`decomposition.PCA`, :class:`decomposition.RandomizedPCA` and :class:`decomposition.IncrementalPCA`. - :issue:`9105` by `Hanmin Qin `_. + :issue:`9105` by `Hanmin Qin `_. + +- Fixed the implementation of noise_variance_ in :class:`decomposition.PCA`. + :issue:`9108` by `Hanmin Qin `_. - Fixed a bug where :class:`cluster.DBSCAN` gives incorrect result when input is a precomputed sparse matrix with initial @@ -526,6 +540,9 @@ Decomposition, manifold learning and clustering - Fix bug where :mod:`mixture` ``sample`` methods did not return as many samples as requested. :issue:`7702` by :user:`Levi John Wolf `. +- Fixed the shrinkage implementation in :class:`neighbors.NearestCentroid`. + :issue:`9219` by `Hanmin Qin `_. + Preprocessing and feature selection - For sparse matrices, :func:`preprocessing.normalize` with ``return_norm=True`` @@ -588,6 +605,9 @@ Model evaluation and meta-estimators raised on trying to stack matrices with different dimensions. :issue:`8093` by :user:`Peter Bull `. +- Cross validation now works with Pandas datatypes that that have a + read-only index. :issue:`9507` by `Loic Esteve`_. + Metrics - :func:`metrics.average_precision_score` no longer linearly @@ -784,6 +804,13 @@ Miscellaneous :mod:`utils` have been removed or deprecated accordingly. :issue:`8854` and :issue:`8874` by :user:`Naoya Kanai ` +- The ``store_covariances`` and ``covariances_`` parameters of + :class:`discriminant_analysis.QuadraticDiscriminantAnalysis` + has been renamed to ``store_covariance`` and ``covariance_`` to be + consistent with the corresponding parameter names of the + :class:`discriminant_analysis.LinearDiscriminantAnalysis`. They will be + removed in version 0.21. :issue:`7998` by :user:`Jiacheng ` + Removed in 0.19: - ``utils.fixes.argpartition`` @@ -829,6 +856,74 @@ Miscellaneous :issue:`7464` by `Lars Buitinck`_ and `Loic Esteve`_. +Code and Documentation Contributors +----------------------------------- + +Thanks to everyone who has contributed to the maintenance and improvement of the +project since version 0.18, including: + +Joel Nothman, Loic Esteve, Andreas Mueller, Guillaume Lemaitre, Olivier Grisel, +Hanmin Qin, Raghav RV, Alexandre Gramfort, themrmax, Aman Dalmia, Gael +Varoquaux, Naoya Kanai, Tom Dupré la Tour, Rishikesh, Nelson Liu, Taehoon Lee, +Nelle Varoquaux, Aashil, Mikhail Korobov, Sebastin Santy, Joan Massich, Roman +Yurchak, RAKOTOARISON Herilalaina, Thierry Guillemot, Alexandre Abadie, Carol +Willing, Balakumaran Manoharan, Josh Karnofsky, Vlad Niculae, Utkarsh Upadhyay, +Dmitry Petrov, Minghui Liu, Srivatsan, Vincent Pham, Albert Thomas, Jake +VanderPlas, Attractadore, JC Liu, alexandercbooth, chkoar, Óscar Nájera, +Aarshay Jain, Kyle Gilliam, Ramana Subramanyam, CJ Carey, Clement Joudet, David +Robles, He Chen, Joris Van den Bossche, Karan Desai, Katie Luangkote, Leland +McInnes, Maniteja Nandana, Michele Lacchia, Sergei Lebedev, Shubham Bhardwaj, +akshay0724, omtcyfz, rickiepark, waterponey, Vathsala Achar, jbDelafosse, Ralf +Gommers, Ekaterina Krivich, Vivek Kumar, Ishank Gulati, Dave Elliott, ldirer, +Reiichiro Nakano, Levi John Wolf, Mathieu Blondel, Sid Kapur, Dougal J. +Sutherland, midinas, mikebenfield, Sourav Singh, Aseem Bansal, Ibraim Ganiev, +Stephen Hoover, AishwaryaRK, Steven C. Howell, Gary Foreman, Neeraj Gangwar, +Tahar, Jon Crall, dokato, Kathy Chen, ferria, Thomas Moreau, Charlie Brummitt, +Nicolas Goix, Adam Kleczewski, Sam Shleifer, Nikita Singh, Basil Beirouti, +Giorgio Patrini, Manoj Kumar, Rafael Possas, James Bourbeau, James A. Bednar, +Janine Harper, Jaye, Jean Helie, Jeremy Steward, Artsiom, John Wei, Jonathan +LIgo, Jonathan Rahn, seanpwilliams, Arthur Mensch, Josh Levy, Julian Kuhlmann, +Julien Aubert, Jörn Hees, Kai, shivamgargsya, Kat Hempstalk, Kaushik +Lakshmikanth, Kennedy, Kenneth Lyons, Kenneth Myers, Kevin Yap, Kirill Bobyrev, +Konstantin Podshumok, Arthur Imbert, Lee Murray, toastedcornflakes, Lera, Li +Li, Arthur Douillard, Mainak Jas, tobycheese, Manraj Singh, Manvendra Singh, +Marc Meketon, MarcoFalke, Matthew Brett, Matthias Gilch, Mehul Ahuja, Melanie +Goetz, Meng, Peng, Michael Dezube, Michal Baumgartner, vibrantabhi19, Artem +Golubin, Milen Paskov, Antonin Carette, Morikko, MrMjauh, NALEPA Emmanuel, +Namiya, Antoine Wendlinger, Narine Kokhlikyan, NarineK, Nate Guerin, Angus +Williams, Ang Lu, Nicole Vavrova, Nitish Pandey, Okhlopkov Daniil Olegovich, +Andy Craze, Om Prakash, Parminder Singh, Patrick Carlson, Patrick Pei, Paul +Ganssle, Paulo Haddad, PaweÅ‚ Lorek, Peng Yu, Pete Bachant, Peter Bull, Peter +Csizsek, Peter Wang, Pieter Arthur de Jong, Ping-Yao, Chang, Preston Parry, +Puneet Mathur, Quentin Hibon, Andrew Smith, Andrew Jackson, 1kastner, Rameshwar +Bhaskaran, Rebecca Bilbro, Remi Rampin, Andrea Esuli, Rob Hall, Robert +Bradshaw, Romain Brault, Aman Pratik, Ruifeng Zheng, Russell Smith, Sachin +Agarwal, Sailesh Choyal, Samson Tan, Samuël Weber, Sarah Brown, Sebastian +Pölsterl, Sebastian Raschka, Sebastian Saeger, Alyssa Batula, Abhyuday Pratap +Singh, Sergey Feldman, Sergul Aydore, Sharan Yalburgi, willduan, Siddharth +Gupta, Sri Krishna, Almer, Stijn Tonk, Allen Riddell, Theofilos Papapanagiotou, +Alison, Alexis Mignon, Tommy Boucher, Tommy Löfstedt, Toshihiro Kamishima, +Tyler Folkman, Tyler Lanigan, Alexander Junge, Varun Shenoy, Victor Poughon, +Vilhelm von Ehrenheim, Aleksandr Sandrovskii, Alan Yee, Vlasios Vasileiou, +Warut Vijitbenjaronk, Yang Zhang, Yaroslav Halchenko, Yichuan Liu, Yuichi +Fujikawa, affanv14, aivision2020, xor, andreh7, brady salz, campustrampus, +Agamemnon Krasoulis, ditenberg, elena-sharova, filipj8, fukatani, gedeck, +guiniol, guoci, hakaa1, hongkahjun, i-am-xhy, jakirkham, jaroslaw-weber, +jayzed82, jeroko, jmontoyam, jonathan.striebel, josephsalmon, jschendel, +leereeves, martin-hahn, mathurinm, mehak-sachdeva, mlewis1729, mlliou112, +mthorrell, ndingwall, nuffe, yangarbiter, plagree, pldtc325, Breno Freitas, +Brett Olsen, Brian A. Alfano, Brian Burns, polmauri, Brandon Carter, Charlton +Austin, Chayant T15h, Chinmaya Pancholi, Christian Danielsen, Chung Yen, +Chyi-Kwei Yau, pravarmahajan, DOHMATOB Elvis, Daniel LeJeune, Daniel Hnyk, +Darius Morawiec, David DeTomaso, David Gasquez, David Haberthür, David +Heryanto, David Kirkby, David Nicholson, rashchedrin, Deborah Gertrude Digges, +Denis Engemann, Devansh D, Dickson, Bob Baxley, Don86, E. Lynch-Klarup, Ed +Rogers, Elizabeth Ferriss, Ellen-Co2, Fabian Egli, Fang-Chieh Chou, Bing Tian +Dai, Greg Stupp, Grzegorz Szpak, Bertrand Thirion, Hadrien Bertrand, Harizo +Rajaona, zxcvbnius, Henry Lin, Holger Peters, Icyblade Dai, Igor +Andriushchenko, Ilya, Isaac Laughlin, Iván Vallés, Aurélien Bellet, JPFrancoia, +Jacob Schreiber, Asish Mahapatra + .. _changes_0_18_2: Version 0.18.2 @@ -4350,7 +4445,7 @@ Highlights - :ref:`out_of_bag` of generalization error for :ref:`ensemble` by `Andreas Müller`_. -- :ref:`randomized_l1`: Randomized sparse linear models for feature +- Randomized sparse linear models for feature selection, by `Alexandre Gramfort`_ and `Gael Varoquaux`_ - :ref:`label_propagation` for semi-supervised learning, by Clay diff --git a/examples/applications/plot_stock_market.py b/examples/applications/plot_stock_market.py index f7ad4dcb526b5..8a85b0645cb8c 100644 --- a/examples/applications/plot_stock_market.py +++ b/examples/applications/plot_stock_market.py @@ -77,6 +77,17 @@ # ############################################################################# # Retrieve the data from Internet +def retry(f, n_attempts=3): + "Wrapper function to retry function calls in case of exceptions" + def wrapper(*args, **kwargs): + for i in range(n_attempts): + try: + return f(*args, **kwargs) + except Exception as e: + if i == n_attempts - 1: + raise + return wrapper + def quotes_historical_google(symbol, date1, date2): """Get the historical data from Google finance. @@ -179,8 +190,10 @@ def quotes_historical_google(symbol, date1, date2): symbols, names = np.array(list(symbol_dict.items())).T +# retry is used because quotes_historical_google can temporarily fail +# for various reasons (e.g. empty result from Google API). quotes = [ - quotes_historical_google(symbol, d1, d2) for symbol in symbols + retry(quotes_historical_google)(symbol, d1, d2) for symbol in symbols ] close_prices = np.vstack([q['close'] for q in quotes]) diff --git a/examples/applications/plot_tomography_l1_reconstruction.py b/examples/applications/plot_tomography_l1_reconstruction.py index a8d45938fef30..dc0a1265e27bd 100644 --- a/examples/applications/plot_tomography_l1_reconstruction.py +++ b/examples/applications/plot_tomography_l1_reconstruction.py @@ -101,7 +101,7 @@ def generate_synthetic_data(): rs = np.random.RandomState(0) n_pts = 36 x, y = np.ogrid[0:l, 0:l] - mask_outer = (x - l / 2) ** 2 + (y - l / 2) ** 2 < (l / 2) ** 2 + mask_outer = (x - l / 2.) ** 2 + (y - l / 2.) ** 2 < (l / 2.) ** 2 mask = np.zeros((l, l)) points = l * rs.rand(2, n_pts) mask[(points[0]).astype(np.int), (points[1]).astype(np.int)] = 1 diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py index 8b4a24af021e8..e0f39c86b371c 100755 --- a/examples/cluster/plot_cluster_iris.py +++ b/examples/cluster/plot_cluster_iris.py @@ -34,7 +34,6 @@ np.random.seed(5) -centers = [[1, 1], [-1, -1], [1, -1]] iris = datasets.load_iris() X = iris.data y = iris.target diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py index adb57f003cfbb..d33b77d68a438 100644 --- a/examples/covariance/plot_covariance_estimation.py +++ b/examples/covariance/plot_covariance_estimation.py @@ -98,7 +98,7 @@ # Plot results fig = plt.figure() plt.title("Regularized covariance: likelihood and shrinkage coefficient") -plt.xlabel('Regularizaton parameter: shrinkage coefficient') +plt.xlabel('Regularization parameter: shrinkage coefficient') plt.ylabel('Error: negative log-likelihood on test data') # range shrinkage curve plt.loglog(shrinkages, negative_logliks, label="Negative log-likelihood") diff --git a/examples/ensemble/plot_adaboost_hastie_10_2.py b/examples/ensemble/plot_adaboost_hastie_10_2.py index b27636956ef26..4d48d13dd24f2 100644 --- a/examples/ensemble/plot_adaboost_hastie_10_2.py +++ b/examples/ensemble/plot_adaboost_hastie_10_2.py @@ -3,11 +3,11 @@ Discrete versus Real AdaBoost ============================= -This example is based on Figure 10.2 from Hastie et al 2009 [1] and illustrates -the difference in performance between the discrete SAMME [2] boosting -algorithm and real SAMME.R boosting algorithm. Both algorithms are evaluated -on a binary classification task where the target Y is a non-linear function -of 10 input features. +This example is based on Figure 10.2 from Hastie et al 2009 [1]_ and +illustrates the difference in performance between the discrete SAMME [2]_ +boosting algorithm and real SAMME.R boosting algorithm. Both algorithms are +evaluated on a binary classification task where the target Y is a non-linear +function of 10 input features. Discrete SAMME AdaBoost adapts based on errors in predicted class labels whereas real SAMME.R uses the predicted class probabilities. diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py index 39e7cdcb8ef4d..906df85ccf645 100644 --- a/examples/ensemble/plot_adaboost_multiclass.py +++ b/examples/ensemble/plot_adaboost_multiclass.py @@ -3,14 +3,14 @@ Multi-class AdaBoosted Decision Trees ===================================== -This example reproduces Figure 1 of Zhu et al [1] and shows how boosting can +This example reproduces Figure 1 of Zhu et al [1]_ and shows how boosting can improve prediction accuracy on a multi-class problem. The classification dataset is constructed by taking a ten-dimensional standard normal distribution and defining three classes separated by nested concentric ten-dimensional spheres such that roughly equal numbers of samples are in each class (quantiles of the :math:`\chi^2` distribution). -The performance of the SAMME and SAMME.R [1] algorithms are compared. SAMME.R +The performance of the SAMME and SAMME.R [1]_ algorithms are compared. SAMME.R uses the probability estimates to update the additive model, while SAMME uses the classifications only. As the example illustrates, the SAMME.R algorithm typically converges faster than SAMME, achieving a lower test error with fewer diff --git a/examples/ensemble/plot_adaboost_regression.py b/examples/ensemble/plot_adaboost_regression.py index b5b98d140da1b..0c76ac6af3ae9 100644 --- a/examples/ensemble/plot_adaboost_regression.py +++ b/examples/ensemble/plot_adaboost_regression.py @@ -3,7 +3,7 @@ Decision Tree Regression with AdaBoost ====================================== -A decision tree is boosted using the AdaBoost.R2 [1] algorithm on a 1D +A decision tree is boosted using the AdaBoost.R2 [1]_ algorithm on a 1D sinusoidal dataset with a small amount of Gaussian noise. 299 boosts (300 decision trees) is compared with a single decision tree regressor. As the number of boosts is increased the regressor can fit more diff --git a/examples/ensemble/plot_ensemble_oob.py b/examples/ensemble/plot_ensemble_oob.py index 811cec13b24be..19b01772d5c24 100644 --- a/examples/ensemble/plot_ensemble_oob.py +++ b/examples/ensemble/plot_ensemble_oob.py @@ -8,7 +8,7 @@ :math:`z_i = (x_i, y_i)`. The *out-of-bag* (OOB) error is the average error for each :math:`z_i` calculated using predictions from the trees that do not contain :math:`z_i` in their respective bootstrap sample. This allows the -``RandomForestClassifier`` to be fit and validated whilst being trained [1]. +``RandomForestClassifier`` to be fit and validated whilst being trained [1]_. The example below demonstrates how the OOB error can be measured at the addition of each new tree during training. The resulting plot allows a diff --git a/examples/ensemble/plot_gradient_boosting_regularization.py b/examples/ensemble/plot_gradient_boosting_regularization.py index e5a01240ccdb0..592dd40ca47cb 100644 --- a/examples/ensemble/plot_gradient_boosting_regularization.py +++ b/examples/ensemble/plot_gradient_boosting_regularization.py @@ -4,7 +4,7 @@ ================================ Illustration of the effect of different regularization strategies -for Gradient Boosting. The example is taken from Hastie et al 2009. +for Gradient Boosting. The example is taken from Hastie et al 2009 [1]_. The loss function used is binomial deviance. Regularization via shrinkage (``learning_rate < 1.0``) improves performance considerably. diff --git a/examples/linear_model/plot_sparse_logistic_regression_mnist.py b/examples/linear_model/plot_sparse_logistic_regression_mnist.py index 2b889d25013d3..5610f471b5d05 100644 --- a/examples/linear_model/plot_sparse_logistic_regression_mnist.py +++ b/examples/linear_model/plot_sparse_logistic_regression_mnist.py @@ -52,7 +52,7 @@ X_test = scaler.transform(X_test) # Turn up tolerance for faster convergence -clf = LogisticRegression(C=50 / train_samples, +clf = LogisticRegression(C=50. / train_samples, multi_class='multinomial', penalty='l1', solver='saga', tol=0.1) clf.fit(X_train, y_train) diff --git a/examples/multioutput/README.txt b/examples/multioutput/README.txt index 57adada325e43..6121721d5fc73 100644 --- a/examples/multioutput/README.txt +++ b/examples/multioutput/README.txt @@ -1,6 +1,6 @@ .. _multioutput_examples: Multioutput methods ----------------- +------------------- -Examples concerning the :mod:`sklearn.multioutput` module. \ No newline at end of file +Examples concerning the :mod:`sklearn.multioutput` module. diff --git a/examples/multioutput/plot_classifier_chain_yeast.py b/examples/multioutput/plot_classifier_chain_yeast.py index af649268a6151..6a90e14dfc379 100644 --- a/examples/multioutput/plot_classifier_chain_yeast.py +++ b/examples/multioutput/plot_classifier_chain_yeast.py @@ -5,12 +5,12 @@ Example of using classifier chain on a multilabel dataset. For this example we will use the `yeast -http://mldata.org/repository/data/viewslug/yeast/`_ dataset which -contains 2417 datapoints each with 103 features and 14 possible labels. Each -datapoint has at least one label. As a baseline we first train a logistic -regression classifier for each of the 14 labels. To evaluate the performance -of these classifiers we predict on a held-out test set and calculate the -:ref:`User Guide `. +`_ dataset which contains +2417 datapoints each with 103 features and 14 possible labels. Each +data point has at least one label. As a baseline we first train a logistic +regression classifier for each of the 14 labels. To evaluate the performance of +these classifiers we predict on a held-out test set and calculate the +:ref:`jaccard similarity score `. Next we create 10 classifier chains. Each classifier chain contains a logistic regression model for each of the 14 labels. The models in each @@ -79,7 +79,7 @@ model_scores = [ovr_jaccard_score] + chain_jaccard_scores model_scores.append(ensemble_jaccard_score) -model_names = ('Independent Models', +model_names = ('Independent', 'Chain 1', 'Chain 2', 'Chain 3', @@ -90,21 +90,22 @@ 'Chain 8', 'Chain 9', 'Chain 10', - 'Ensemble Average') + 'Ensemble') -y_pos = np.arange(len(model_names)) -y_pos[1:] += 1 -y_pos[-1] += 1 +x_pos = np.arange(len(model_names)) # Plot the Jaccard similarity scores for the independent model, each of the # chains, and the ensemble (note that the vertical axis on this plot does # not begin at 0). -fig = plt.figure(figsize=(7, 4)) -plt.title('Classifier Chain Ensemble') -plt.xticks(y_pos, model_names, rotation='vertical') -plt.ylabel('Jaccard Similarity Score') -plt.ylim([min(model_scores) * .9, max(model_scores) * 1.1]) +fig, ax = plt.subplots(figsize=(7, 4)) +ax.grid(True) +ax.set_title('Classifier Chain Ensemble Performance Comparison') +ax.set_xticks(x_pos) +ax.set_xticklabels(model_names, rotation='vertical') +ax.set_ylabel('Jaccard Similarity Score') +ax.set_ylim([min(model_scores) * .9, max(model_scores) * 1.1]) colors = ['r'] + ['b'] * len(chain_jaccard_scores) + ['g'] -plt.bar(y_pos, model_scores, align='center', alpha=0.5, color=colors) +ax.bar(x_pos, model_scores, alpha=0.5, color=colors) +plt.tight_layout() plt.show() diff --git a/examples/plot_kernel_ridge_regression.py b/examples/plot_kernel_ridge_regression.py index cb91908ed5f89..59e22ea3e6969 100644 --- a/examples/plot_kernel_ridge_regression.py +++ b/examples/plot_kernel_ridge_regression.py @@ -104,7 +104,6 @@ zorder=2, edgecolors=(0, 0, 0)) plt.scatter(X[:100], y[:100], c='k', label='data', zorder=1, edgecolors=(0, 0, 0)) -plt.hold('on') plt.plot(X_plot, y_svr, c='r', label='SVR (fit: %.3fs, predict: %.3fs)' % (svr_fit, svr_predict)) plt.plot(X_plot, y_kr, c='g', diff --git a/examples/svm/plot_separating_hyperplane.py b/examples/svm/plot_separating_hyperplane.py index ff6f3fc8f31ad..9fdbcc785ed2b 100644 --- a/examples/svm/plot_separating_hyperplane.py +++ b/examples/svm/plot_separating_hyperplane.py @@ -12,37 +12,34 @@ import numpy as np import matplotlib.pyplot as plt from sklearn import svm +from sklearn.datasets import make_blobs + # we create 40 separable points -np.random.seed(0) -X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]] -Y = [0] * 20 + [1] * 20 - -# fit the model -clf = svm.SVC(kernel='linear') -clf.fit(X, Y) - -# get the separating hyperplane -w = clf.coef_[0] -a = -w[0] / w[1] -xx = np.linspace(-5, 5) -yy = a * xx - (clf.intercept_[0]) / w[1] - -# plot the parallels to the separating hyperplane that pass through the -# support vectors -b = clf.support_vectors_[0] -yy_down = a * xx + (b[1] - a * b[0]) -b = clf.support_vectors_[-1] -yy_up = a * xx + (b[1] - a * b[0]) - -# plot the line, the points, and the nearest vectors to the plane -plt.plot(xx, yy, 'k-') -plt.plot(xx, yy_down, 'k--') -plt.plot(xx, yy_up, 'k--') - -plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], - s=80, facecolors='none') -plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired) - -plt.axis('tight') +X, y = make_blobs(n_samples=40, centers=2, random_state=6) + +# fit the model, don't regularize for illustration purposes +clf = svm.SVC(kernel='linear', C=1000) +clf.fit(X, y) + +plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired) + +# plot the decision function +ax = plt.gca() +xlim = ax.get_xlim() +ylim = ax.get_ylim() + +# create grid to evaluate model +xx = np.linspace(xlim[0], xlim[1], 30) +yy = np.linspace(ylim[0], ylim[1], 30) +YY, XX = np.meshgrid(yy, xx) +xy = np.vstack([XX.ravel(), YY.ravel()]).T +Z = clf.decision_function(xy).reshape(XX.shape) + +# plot decision boundary and margins +ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, + linestyles=['--', '-', '--']) +# plot support vectors +ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100, + linewidth=1, facecolors='none') plt.show() diff --git a/examples/svm/plot_separating_hyperplane_unbalanced.py b/examples/svm/plot_separating_hyperplane_unbalanced.py index 438291dc5538d..cf3130a6ae5c5 100644 --- a/examples/svm/plot_separating_hyperplane_unbalanced.py +++ b/examples/svm/plot_separating_hyperplane_unbalanced.py @@ -29,7 +29,6 @@ import numpy as np import matplotlib.pyplot as plt from sklearn import svm -#from sklearn.linear_model import SGDClassifier # we create 40 separable points rng = np.random.RandomState(0) @@ -43,25 +42,36 @@ clf = svm.SVC(kernel='linear', C=1.0) clf.fit(X, y) -w = clf.coef_[0] -a = -w[0] / w[1] -xx = np.linspace(-5, 5) -yy = a * xx - clf.intercept_[0] / w[1] - - -# get the separating hyperplane using weighted classes +# fit the model and get the separating hyperplane using weighted classes wclf = svm.SVC(kernel='linear', class_weight={1: 10}) wclf.fit(X, y) -ww = wclf.coef_[0] -wa = -ww[0] / ww[1] -wyy = wa * xx - wclf.intercept_[0] / ww[1] - # plot separating hyperplanes and samples -h0 = plt.plot(xx, yy, 'k-', label='no weights') -h1 = plt.plot(xx, wyy, 'k--', label='with weights') plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k') plt.legend() -plt.axis('tight') -plt.show() +# plot the decision functions for both classifiers +ax = plt.gca() +xlim = ax.get_xlim() +ylim = ax.get_ylim() + +# create grid to evaluate model +xx = np.linspace(xlim[0], xlim[1], 30) +yy = np.linspace(ylim[0], ylim[1], 30) +YY, XX = np.meshgrid(yy, xx) +xy = np.vstack([XX.ravel(), YY.ravel()]).T + +# get the separating hyperplane +Z = clf.decision_function(xy).reshape(XX.shape) + +# plot decision boundary and margins +a = ax.contour(XX, YY, Z, colors='k', levels=[0], alpha=0.5, linestyles=['-']) + +# get the separating hyperplane for weighted classes +Z = wclf.decision_function(xy).reshape(XX.shape) + +# plot decision boundary and margins for weighted classes +b = ax.contour(XX, YY, Z, colors='r', levels=[0], alpha=0.5, linestyles=['-']) + +plt.legend([a.collections[0], b.collections[0]], ["non weighted", "weighted"], + loc="upper right") diff --git a/examples/svm/plot_svm_regression.py b/examples/svm/plot_svm_regression.py index e46675eb0e069..54d2c0b54337b 100644 --- a/examples/svm/plot_svm_regression.py +++ b/examples/svm/plot_svm_regression.py @@ -34,7 +34,6 @@ # Look at the results lw = 2 plt.scatter(X, y, color='darkorange', label='data') -plt.hold('on') plt.plot(X, y_rbf, color='navy', lw=lw, label='RBF model') plt.plot(X, y_lin, color='c', lw=lw, label='Linear model') plt.plot(X, y_poly, color='cornflowerblue', lw=lw, label='Polynomial model') diff --git a/sklearn/__init__.py b/sklearn/__init__.py index e74466efd8a95..d29d5f81156c1 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -114,7 +114,7 @@ def config_context(**new_config): # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = '0.19.dev0' +__version__ = '0.19.0' try: diff --git a/sklearn/cluster/affinity_propagation_.py b/sklearn/cluster/affinity_propagation_.py index 398529793880f..8bf94cee95cda 100644 --- a/sklearn/cluster/affinity_propagation_.py +++ b/sklearn/cluster/affinity_propagation_.py @@ -197,7 +197,11 @@ class AffinityPropagation(BaseEstimator, ClusterMixin): Parameters ---------- damping : float, optional, default: 0.5 - Damping factor between 0.5 and 1. + Damping factor (between 0.5 and 1) is the extent to + which the current value is maintained relative to + incoming values (weighted 1 - damping). This in order + to avoid numerical oscillations when updating these + values (messages). max_iter : int, optional, default: 200 Maximum number of iterations. diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py index 29d725bd8ce54..b7560ce970b90 100644 --- a/sklearn/cluster/hierarchical.py +++ b/sklearn/cluster/hierarchical.py @@ -30,8 +30,7 @@ # For non fully-connected graphs -def _fix_connectivity(X, connectivity, n_components=None, - affinity="euclidean"): +def _fix_connectivity(X, connectivity, affinity): """ Fixes the connectivity matrix @@ -190,7 +189,8 @@ def ward_tree(X, connectivity=None, n_clusters=None, return_distance=False): else: return children_, 1, n_samples, None - connectivity, n_components = _fix_connectivity(X, connectivity) + connectivity, n_components = _fix_connectivity(X, connectivity, + affinity='euclidean') if n_clusters is None: n_nodes = 2 * n_samples - 1 else: @@ -289,7 +289,7 @@ def ward_tree(X, connectivity=None, n_clusters=None, return_distance=False): # average and complete linkage -def linkage_tree(X, connectivity=None, n_components=None, +def linkage_tree(X, connectivity=None, n_components='deprecated', n_clusters=None, linkage='complete', affinity="euclidean", return_distance=False): """Linkage agglomerative clustering based on a Feature matrix. @@ -368,6 +368,10 @@ def linkage_tree(X, connectivity=None, n_components=None, -------- ward_tree : hierarchical clustering with ward linkage """ + if n_components != 'deprecated': + warnings.warn("n_components was deprecated in 0.18" + "will be removed in 0.21", DeprecationWarning) + X = np.asarray(X) if X.ndim == 1: X = np.reshape(X, (-1, 1)) @@ -418,7 +422,8 @@ def linkage_tree(X, connectivity=None, n_components=None, return children_, 1, n_samples, None, distances return children_, 1, n_samples, None - connectivity, n_components = _fix_connectivity(X, connectivity) + connectivity, n_components = _fix_connectivity(X, connectivity, + affinity=affinity) connectivity = connectivity.tocoo() # Put the diagonal to zero diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index 986b92e0ce9f4..c4534663236b0 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -36,6 +36,20 @@ from sklearn.utils.testing import assert_warns +def test_deprecation_of_n_components_in_linkage_tree(): + rng = np.random.RandomState(0) + X = rng.randn(50, 100) + # Test for warning of deprecation of n_components in linkage_tree + children, n_nodes, n_leaves, parent = assert_warns(DeprecationWarning, + linkage_tree, + X.T, + n_components=10) + children_t, n_nodes_t, n_leaves_t, parent_t = linkage_tree(X.T) + assert_array_equal(children, children_t) + assert_equal(n_nodes, n_nodes_t) + assert_equal(n_leaves, n_leaves_t) + assert_equal(parent, parent_t) + def test_linkage_misc(): # Misc tests on linkage rng = np.random.RandomState(42) @@ -518,3 +532,30 @@ def test_agg_n_clusters(): msg = ("n_clusters should be an integer greater than 0." " %s was provided." % str(agc.n_clusters)) assert_raise_message(ValueError, msg, agc.fit, X) + + +def test_affinity_passed_to_fix_connectivity(): + # Test that the affinity parameter is actually passed to the pairwise + # function + + size = 2 + rng = np.random.RandomState(0) + X = rng.randn(size, size) + mask = np.array([True, False, False, True]) + + connectivity = grid_to_graph(n_x=size, n_y=size, + mask=mask, return_as=np.ndarray) + + class FakeAffinity: + def __init__(self): + self.counter = 0 + + def increment(self, *args, **kwargs): + self.counter += 1 + return self.counter + + fa = FakeAffinity() + + linkage_tree(X, connectivity=connectivity, affinity=fa.increment) + + assert_equal(fa.counter, 3) diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py index 3345f5193e598..2cae73de9b6c2 100644 --- a/sklearn/covariance/graph_lasso_.py +++ b/sklearn/covariance/graph_lasso_.py @@ -221,7 +221,7 @@ def graph_lasso(emp_cov, alpha, cov_init=None, mode='cd', tol=1e-4, _, _, coefs = lars_path( sub_covariance, row, Xy=row, Gram=sub_covariance, alpha_min=alpha / (n_features - 1), copy_Gram=True, - method='lars', return_path=False) + eps=eps, method='lars', return_path=False) # Update the precision matrix precision_[idx, idx] = ( 1. / (covariance_[idx, idx] diff --git a/sklearn/covariance/robust_covariance.py b/sklearn/covariance/robust_covariance.py index 985dda92f990c..de5ee308764bb 100644 --- a/sklearn/covariance/robust_covariance.py +++ b/sklearn/covariance/robust_covariance.py @@ -190,7 +190,7 @@ def select_candidates(X, n_support, n_trials, select=1, n_iter=30, Starting from a random support, the pure data set is found by the c_step procedure introduced by Rousseeuw and Van Driessen in - [Rouseeuw1999]_. + [RV]_. Parameters ---------- @@ -250,7 +250,7 @@ def select_candidates(X, n_support, n_trials, select=1, n_iter=30, References ---------- - .. [Rouseeuw1999] A Fast Algorithm for the Minimum Covariance Determinant + .. [RV] A Fast Algorithm for the Minimum Covariance Determinant Estimator, 1999, American Statistical Association and the American Society for Quality, TECHNOMETRICS @@ -339,13 +339,13 @@ def fast_mcd(X, support_fraction=None, such computation levels. Note that only raw estimates are returned. If one is interested in - the correction and reweighting steps described in [Rouseeuw1999]_, + the correction and reweighting steps described in [RouseeuwVan]_, see the MinCovDet object. References ---------- - .. [Rouseeuw1999] A Fast Algorithm for the Minimum Covariance + .. [RouseeuwVan] A Fast Algorithm for the Minimum Covariance Determinant Estimator, 1999, American Statistical Association and the American Society for Quality, TECHNOMETRICS @@ -580,10 +580,10 @@ class MinCovDet(EmpiricalCovariance): .. [Rouseeuw1984] `P. J. Rousseeuw. Least median of squares regression. J. Am Stat Ass, 79:871, 1984.` - .. [Rouseeuw1999] `A Fast Algorithm for the Minimum Covariance Determinant + .. [Rousseeuw] `A Fast Algorithm for the Minimum Covariance Determinant Estimator, 1999, American Statistical Association and the American Society for Quality, TECHNOMETRICS` - .. [Butler1993] `R. W. Butler, P. L. Davies and M. Jhun, + .. [ButlerDavies] `R. W. Butler, P. L. Davies and M. Jhun, Asymptotics For The Minimum Covariance Determinant Estimator, The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400` @@ -650,7 +650,7 @@ def correct_covariance(self, data): """Apply a correction to raw Minimum Covariance Determinant estimates. Correction using the empirical correction factor suggested - by Rousseeuw and Van Driessen in [Rouseeuw1984]_. + by Rousseeuw and Van Driessen in [RVD]_. Parameters ---------- @@ -659,6 +659,13 @@ def correct_covariance(self, data): The data set must be the one which was used to compute the raw estimates. + References + ---------- + + .. [RVD] `A Fast Algorithm for the Minimum Covariance + Determinant Estimator, 1999, American Statistical Association + and the American Society for Quality, TECHNOMETRICS` + Returns ------- covariance_corrected : array-like, shape (n_features, n_features) @@ -675,7 +682,8 @@ def reweight_covariance(self, data): Re-weight observations using Rousseeuw's method (equivalent to deleting outlying observations from the data set before - computing location and covariance estimates). [Rouseeuw1984]_ + computing location and covariance estimates) described + in [RVDriessen]_. Parameters ---------- @@ -684,6 +692,13 @@ def reweight_covariance(self, data): The data set must be the one which was used to compute the raw estimates. + References + ---------- + + .. [RVDriessen] `A Fast Algorithm for the Minimum Covariance + Determinant Estimator, 1999, American Statistical Association + and the American Society for Quality, TECHNOMETRICS` + Returns ------- location_reweighted : array-like, shape (n_features, ) diff --git a/sklearn/datasets/lfw.py b/sklearn/datasets/lfw.py index 0d5f56f189b45..51850ad6c8898 100644 --- a/sklearn/datasets/lfw.py +++ b/sklearn/datasets/lfw.py @@ -93,6 +93,7 @@ def scale_face(face): def check_fetch_lfw(data_home=None, funneled=True, download_if_missing=True): """Helper function to download any missing LFW data""" + data_home = get_data_home(data_home=data_home) lfw_home = join(data_home, "lfw_home") diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py index b5116a3f39baa..705052b3c4fd1 100644 --- a/sklearn/datasets/twenty_newsgroups.py +++ b/sklearn/datasets/twenty_newsgroups.py @@ -274,7 +274,8 @@ def fetch_20newsgroups(data_home=None, subset='train', categories=None, return data -def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None): +def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None, + download_if_missing=True): """Load the 20 newsgroups dataset and transform it into tf-idf vectors. This is a convenience function; the tf-idf transformation is done using the @@ -304,6 +305,10 @@ def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None): Specify an download and cache folder for the datasets. If None, all scikit-learn data is stored in '~/scikit_learn_data' subfolders. + download_if_missing : optional, True by default + If False, raise an IOError if the data is not locally available + instead of trying to download the data from the source site. + Returns ------- bunch : Bunch object @@ -323,14 +328,16 @@ def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None): categories=None, shuffle=True, random_state=12, - remove=remove) + remove=remove, + download_if_missing=download_if_missing) data_test = fetch_20newsgroups(data_home=data_home, subset='test', categories=None, shuffle=True, random_state=12, - remove=remove) + remove=remove, + download_if_missing=download_if_missing) if os.path.exists(target_file): X_train, X_test = joblib.load(target_file) diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py index de447f1edd6aa..c0f1eb77b5f56 100644 --- a/sklearn/decomposition/pca.py +++ b/sklearn/decomposition/pca.py @@ -201,6 +201,9 @@ class PCA(_BasePCA): explained_variance_ : array, shape (n_components,) The amount of variance explained by each of the selected components. + Equal to n_components largest eigenvalues + of the covariance matrix of X. + .. versionadded:: 0.18 explained_variance_ratio_ : array, shape (n_components,) @@ -232,6 +235,9 @@ class PCA(_BasePCA): http://www.miketipping.com/papers/met-mppca.pdf. It is required to computed the estimated data covariance and score samples. + Equal to the average of (min(n_features, n_samples) - n_components) + smallest eigenvalues of the covariance matrix of X. + References ---------- For n_components == 'mle', this class uses the method of `Thomas P. Minka: @@ -494,9 +500,10 @@ def _fit_truncated(self, X, n_components, svd_solver): self.explained_variance_ratio_ = \ self.explained_variance_ / total_var.sum() self.singular_values_ = S.copy() # Store the singular values. - if self.n_components_ < n_features: + if self.n_components_ < min(n_features, n_samples): self.noise_variance_ = (total_var.sum() - self.explained_variance_.sum()) + self.noise_variance_ /= min(n_features, n_samples) - n_components else: self.noise_variance_ = 0. diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py index 34b63c0674335..6795013b0790a 100644 --- a/sklearn/decomposition/tests/test_pca.py +++ b/sklearn/decomposition/tests/test_pca.py @@ -529,6 +529,50 @@ def test_pca_score3(): assert_true(ll.argmax() == 1) +def test_pca_score_with_different_solvers(): + digits = datasets.load_digits() + X_digits = digits.data + + pca_dict = {svd_solver: PCA(n_components=30, svd_solver=svd_solver, + random_state=0) + for svd_solver in solver_list} + + for pca in pca_dict.values(): + pca.fit(X_digits) + # Sanity check for the noise_variance_. For more details see + # https://github.com/scikit-learn/scikit-learn/issues/7568 + # https://github.com/scikit-learn/scikit-learn/issues/8541 + # https://github.com/scikit-learn/scikit-learn/issues/8544 + assert np.all((pca.explained_variance_ - pca.noise_variance_) >= 0) + + # Compare scores with different svd_solvers + score_dict = {svd_solver: pca.score(X_digits) + for svd_solver, pca in pca_dict.items()} + assert_almost_equal(score_dict['full'], score_dict['arpack']) + assert_almost_equal(score_dict['full'], score_dict['randomized'], + decimal=3) + + +def test_pca_zero_noise_variance_edge_cases(): + # ensure that noise_variance_ is 0 in edge cases + # when n_components == min(n_samples, n_features) + n, p = 100, 3 + + rng = np.random.RandomState(0) + X = rng.randn(n, p) * .1 + np.array([3, 4, 5]) + # arpack raises ValueError for n_components == min(n_samples, + # n_features) + svd_solvers = ['full', 'randomized'] + + for svd_solver in svd_solvers: + pca = PCA(svd_solver=svd_solver, n_components=p) + pca.fit(X) + assert pca.noise_variance_ == 0 + + pca.fit(X.T) + assert pca.noise_variance_ == 0 + + def test_svd_solver_auto(): rng = np.random.RandomState(0) X = rng.uniform(size=(1000, 50)) diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 8506d35a76c9a..b44a21668fa0f 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -11,8 +11,8 @@ from __future__ import print_function import warnings - import numpy as np +from .utils import deprecated from scipy import linalg from .externals.six import string_types from .externals.six.moves import xrange @@ -170,7 +170,8 @@ class LinearDiscriminantAnalysis(BaseEstimator, LinearClassifierMixin, Number of components (< n_classes - 1) for dimensionality reduction. store_covariance : bool, optional - Additionally compute class covariance matrix (default False). + Additionally compute class covariance matrix (default False), used + only in 'svd' solver. .. versionadded:: 0.17 @@ -245,6 +246,7 @@ class LinearDiscriminantAnalysis(BaseEstimator, LinearClassifierMixin, >>> print(clf.predict([[-0.8, -1]])) [1] """ + def __init__(self, solver='svd', shrinkage=None, priors=None, n_components=None, store_covariance=False, tol=1e-4): self.solver = solver @@ -554,9 +556,9 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin): Regularizes the covariance estimate as ``(1-reg_param)*Sigma + reg_param*np.eye(n_features)`` - store_covariances : boolean + store_covariance : boolean If True the covariance matrices are computed and stored in the - `self.covariances_` attribute. + `self.covariance_` attribute. .. versionadded:: 0.17 @@ -567,7 +569,7 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin): Attributes ---------- - covariances_ : list of array-like, shape = [n_features, n_features] + covariance_ : list of array-like, shape = [n_features, n_features] Covariance matrices of each class. means_ : array-like, shape = [n_classes, n_features] @@ -597,7 +599,8 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin): >>> clf.fit(X, y) ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE QuadraticDiscriminantAnalysis(priors=None, reg_param=0.0, - store_covariances=False, tol=0.0001) + store_covariance=False, + store_covariances=None, tol=0.0001) >>> print(clf.predict([[-0.8, -1]])) [1] @@ -607,21 +610,30 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin): Discriminant Analysis """ - def __init__(self, priors=None, reg_param=0., store_covariances=False, - tol=1.0e-4): + def __init__(self, priors=None, reg_param=0., store_covariance=False, + tol=1.0e-4, store_covariances=None): self.priors = np.asarray(priors) if priors is not None else None self.reg_param = reg_param self.store_covariances = store_covariances + self.store_covariance = store_covariance self.tol = tol + @property + @deprecated("Attribute covariances_ was deprecated in version" + " 0.19 and will be removed in 0.21. Use " + "covariance_ instead") + def covariances_(self): + return self.covariance_ + def fit(self, X, y): """Fit the model according to the given training data and parameters. .. versionchanged:: 0.19 - *store_covariance* has been moved to main constructor. + ``store_covariances`` has been moved to main constructor as + ``store_covariance`` .. versionchanged:: 0.19 - *tol* has been moved to main constructor. + ``tol`` has been moved to main constructor. Parameters ---------- @@ -645,7 +657,12 @@ def fit(self, X, y): self.priors_ = self.priors cov = None + store_covariance = self.store_covariance or self.store_covariances if self.store_covariances: + warnings.warn("'store_covariances' was renamed to store_covariance" + " in version 0.19 and will be removed in 0.21.", + DeprecationWarning) + if store_covariance: cov = [] means = [] scalings = [] @@ -665,13 +682,13 @@ def fit(self, X, y): warnings.warn("Variables are collinear") S2 = (S ** 2) / (len(Xg) - 1) S2 = ((1 - self.reg_param) * S2) + self.reg_param - if self.store_covariances: + if self.store_covariance or store_covariance: # cov = V * (S^2 / (n-1)) * V.T cov.append(np.dot(S2 * Vt.T, Vt)) scalings.append(S2) rotations.append(Vt.T) - if self.store_covariances: - self.covariances_ = cov + if self.store_covariance or store_covariance: + self.covariance_ = cov self.means_ = np.asarray(means) self.scalings_ = scalings self.rotations_ = rotations diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py index cc7e1b95e89b3..7ea3030bdf120 100644 --- a/sklearn/ensemble/bagging.py +++ b/sklearn/ensemble/bagging.py @@ -773,8 +773,8 @@ def decision_function(self, X): if self.n_features_ != X.shape[1]: raise ValueError("Number of features of the model must " - "match the input. Model n_features is {1} and " - "input n_features is {2} " + "match the input. Model n_features is {0} and " + "input n_features is {1} " "".format(self.n_features_, X.shape[1])) # Parallel loop diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index 51792383eb0cb..53538866be1fc 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -922,6 +922,27 @@ class labels (multi-output problem). was never left out during the bootstrap. In this case, `oob_decision_function_` might contain NaN. + Examples + -------- + >>> from sklearn.ensemble import RandomForestClassifier + >>> from sklearn.datasets import make_classification + >>> + >>> X, y = make_classification(n_samples=1000, n_features=4, + ... n_informative=2, n_redundant=0, + ... random_state=0, shuffle=False) + >>> clf = RandomForestClassifier(max_depth=2, random_state=0) + >>> clf.fit(X, y) + RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', + max_depth=2, max_features='auto', max_leaf_nodes=None, + min_impurity_decrease=0.0, min_impurity_split=None, + min_samples_leaf=1, min_samples_split=2, + min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1, + oob_score=False, random_state=0, verbose=0, warm_start=False) + >>> print(clf.feature_importances_) + [ 0.17287856 0.80608704 0.01884792 0.00218648] + >>> print(clf.predict([[0, 0, 0, 0]])) + [1] + Notes ----- The default values for the parameters controlling the size of the trees @@ -1142,6 +1163,26 @@ class RandomForestRegressor(ForestRegressor): oob_prediction_ : array of shape = [n_samples] Prediction computed with out-of-bag estimate on the training set. + Examples + -------- + >>> from sklearn.ensemble import RandomForestRegressor + >>> from sklearn.datasets import make_regression + >>> + >>> X, y = make_regression(n_features=4, n_informative=2, + ... random_state=0, shuffle=False) + >>> regr = RandomForestRegressor(max_depth=2, random_state=0) + >>> regr.fit(X, y) + RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=2, + max_features='auto', max_leaf_nodes=None, + min_impurity_decrease=0.0, min_impurity_split=None, + min_samples_leaf=1, min_samples_split=2, + min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1, + oob_score=False, random_state=0, verbose=0, warm_start=False) + >>> print(regr.feature_importances_) + [ 0.17339552 0.81594114 0. 0.01066333] + >>> print(regr.predict([[0, 0, 0, 0]])) + [-2.50699856] + Notes ----- The default values for the parameters controlling the size of the trees diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index e725d2e6ebe81..a37377fe7bde8 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -448,7 +448,7 @@ class ClassificationLossFunction(six.with_metaclass(ABCMeta, LossFunction)): def _score_to_proba(self, score): """Template method to convert scores to probabilities. - the does not support probabilites raises AttributeError. + the does not support probabilities raises AttributeError. """ raise TypeError('%s does not support predict_proba' % type(self).__name__) diff --git a/sklearn/ensemble/partial_dependence.py b/sklearn/ensemble/partial_dependence.py index d4ed3233f44e7..e8bfc2110bb90 100644 --- a/sklearn/ensemble/partial_dependence.py +++ b/sklearn/ensemble/partial_dependence.py @@ -53,13 +53,13 @@ def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100): raise ValueError('percentile values must be in [0, 1]') axes = [] + emp_percentiles = mquantiles(X, prob=percentiles, axis=0) for col in range(X.shape[1]): uniques = np.unique(X[:, col]) if uniques.shape[0] < grid_resolution: # feature has low resolution use unique vals axis = uniques else: - emp_percentiles = mquantiles(X, prob=percentiles, axis=0) # create axis based on percentiles and grid resolution axis = np.linspace(emp_percentiles[0, col], emp_percentiles[1, col], diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py index c0a46d6c15036..e71462daa3a14 100644 --- a/sklearn/ensemble/tests/test_bagging.py +++ b/sklearn/ensemble/tests/test_bagging.py @@ -19,6 +19,7 @@ from sklearn.utils.testing import assert_false from sklearn.utils.testing import assert_warns from sklearn.utils.testing import assert_warns_message +from sklearn.utils.testing import assert_raise_message from sklearn.dummy import DummyClassifier, DummyRegressor from sklearn.model_selection import GridSearchCV, ParameterGrid @@ -449,6 +450,13 @@ def test_parallel_classification(): decisions2 = ensemble.decision_function(X_test) assert_array_almost_equal(decisions1, decisions2) + X_err = np.hstack((X_test, np.zeros((X_test.shape[0], 1)))) + assert_raise_message(ValueError, "Number of features of the model " + "must match the input. Model n_features is {0} " + "and input n_features is {1} " + "".format(X_test.shape[1], X_err.shape[1]), + ensemble.decision_function, X_err) + ensemble = BaggingClassifier(SVC(decision_function_shape='ovr'), n_jobs=1, random_state=0).fit(X_train, y_train) diff --git a/sklearn/ensemble/tests/test_base.py b/sklearn/ensemble/tests/test_base.py index 65ea8b62a2927..f2a87d8fb559f 100644 --- a/sklearn/ensemble/tests/test_base.py +++ b/sklearn/ensemble/tests/test_base.py @@ -109,7 +109,7 @@ def make_steps(): assert_not_equal(est1.get_params()['sel__estimator__random_state'], est1.get_params()['clf__random_state']) - # ensure multiple random_state paramaters are invariant to get_params() + # ensure multiple random_state parameters are invariant to get_params() # iteration order class AlphaParamPipeline(Pipeline): diff --git a/sklearn/ensemble/tests/test_voting_classifier.py b/sklearn/ensemble/tests/test_voting_classifier.py index d61d8bfac62be..023be79912d12 100644 --- a/sklearn/ensemble/tests/test_voting_classifier.py +++ b/sklearn/ensemble/tests/test_voting_classifier.py @@ -4,6 +4,7 @@ from sklearn.utils.testing import assert_almost_equal, assert_array_equal from sklearn.utils.testing import assert_equal, assert_true, assert_false from sklearn.utils.testing import assert_raise_message +from sklearn.utils.testing import assert_warns_message from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import GaussianNB @@ -16,6 +17,7 @@ from sklearn.svm import SVC from sklearn.multiclass import OneVsRestClassifier from sklearn.neighbors import KNeighborsClassifier +from sklearn.base import BaseEstimator, ClassifierMixin # Load the iris dataset and randomly permute it @@ -223,7 +225,7 @@ def test_gridsearch(): grid.fit(iris.data, iris.target) -def test_parallel_predict(): +def test_parallel_fit(): """Check parallel backend of VotingClassifier on toy dataset.""" clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) @@ -273,6 +275,20 @@ def test_sample_weight(): assert_raise_message(ValueError, msg, eclf3.fit, X, y, sample_weight) +def test_sample_weight_kwargs(): + """Check that VotingClassifier passes sample_weight as kwargs""" + class MockClassifier(BaseEstimator, ClassifierMixin): + """Mock Classifier to check that sample_weight is received as kwargs""" + def fit(self, X, y, *args, **sample_weight): + assert_true('sample_weight' in sample_weight) + + clf = MockClassifier() + eclf = VotingClassifier(estimators=[('mock', clf)], voting='soft') + + # Should not raise an error. + eclf.fit(X, y, sample_weight=np.ones((len(y),))) + + def test_set_params(): """set_params should be able to set estimators""" clf1 = LogisticRegression(random_state=123, C=1.0) @@ -364,3 +380,38 @@ def test_estimator_weights_format(): eclf1.fit(X, y) eclf2.fit(X, y) assert_array_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) + + +def test_transform(): + """Check transform method of VotingClassifier on toy dataset.""" + clf1 = LogisticRegression(random_state=123) + clf2 = RandomForestClassifier(random_state=123) + clf3 = GaussianNB() + X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]]) + y = np.array([1, 1, 2, 2]) + + eclf1 = VotingClassifier(estimators=[ + ('lr', clf1), ('rf', clf2), ('gnb', clf3)], + voting='soft').fit(X, y) + eclf2 = VotingClassifier(estimators=[ + ('lr', clf1), ('rf', clf2), ('gnb', clf3)], + voting='soft', + flatten_transform=True).fit(X, y) + eclf3 = VotingClassifier(estimators=[ + ('lr', clf1), ('rf', clf2), ('gnb', clf3)], + voting='soft', + flatten_transform=False).fit(X, y) + + warn_msg = ("'flatten_transform' default value will be " + "changed to True in 0.21." + "To silence this warning you may" + " explicitly set flatten_transform=False.") + res = assert_warns_message(DeprecationWarning, warn_msg, + eclf1.transform, X) + assert_array_equal(res.shape, (3, 4, 2)) + assert_array_equal(eclf2.transform(X).shape, (4, 6)) + assert_array_equal(eclf3.transform(X).shape, (3, 4, 2)) + assert_array_equal(res.swapaxes(0, 1).reshape((4, 6)), + eclf2.transform(X)) + assert_array_equal(eclf3.transform(X).swapaxes(0, 1).reshape((4, 6)), + eclf2.transform(X)) diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py index c4832d7e49a9e..ad6c0125dd664 100644 --- a/sklearn/ensemble/voting_classifier.py +++ b/sklearn/ensemble/voting_classifier.py @@ -12,6 +12,7 @@ # License: BSD 3 clause import numpy as np +import warnings from ..base import ClassifierMixin from ..base import TransformerMixin @@ -22,10 +23,10 @@ from ..utils.metaestimators import _BaseComposition -def _parallel_fit_estimator(estimator, X, y, sample_weight): +def _parallel_fit_estimator(estimator, X, y, sample_weight=None): """Private function used to fit an estimator within a job.""" if sample_weight is not None: - estimator.fit(X, y, sample_weight) + estimator.fit(X, y, sample_weight=sample_weight) else: estimator.fit(X, y) return estimator @@ -61,6 +62,13 @@ class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin): The number of jobs to run in parallel for ``fit``. If -1, then the number of jobs is set to the number of cores. + flatten_transform : bool, optional (default=None) + Affects shape of transform output only when voting='soft' + If voting='soft' and flatten_transform=True, transform method returns + matrix with shape (n_samples, n_classifiers * n_classes). If + flatten_transform=False, it returns + (n_classifiers, n_samples, n_classes). + Attributes ---------- estimators_ : list of classifiers @@ -94,18 +102,23 @@ class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin): [1 1 1 2 2 2] >>> eclf3 = VotingClassifier(estimators=[ ... ('lr', clf1), ('rf', clf2), ('gnb', clf3)], - ... voting='soft', weights=[2,1,1]) + ... voting='soft', weights=[2,1,1], + ... flatten_transform=True) >>> eclf3 = eclf3.fit(X, y) >>> print(eclf3.predict(X)) [1 1 1 2 2 2] + >>> print(eclf3.transform(X).shape) + (6, 6) >>> """ - def __init__(self, estimators, voting='hard', weights=None, n_jobs=1): + def __init__(self, estimators, voting='hard', weights=None, n_jobs=1, + flatten_transform=None): self.estimators = estimators self.voting = voting self.weights = weights self.n_jobs = n_jobs + self.flatten_transform = flatten_transform @property def named_estimators(self): @@ -163,6 +176,7 @@ def fit(self, X, y, sample_weight=None): if n_isnone == len(self.estimators): raise ValueError('All estimators are None. At least one is ' 'required to be a classifier!') + self.le_ = LabelEncoder().fit(y) self.classes_ = self.le_.classes_ self.estimators_ = [] @@ -171,7 +185,7 @@ def fit(self, X, y, sample_weight=None): self.estimators_ = Parallel(n_jobs=self.n_jobs)( delayed(_parallel_fit_estimator)(clone(clf), X, transformed_y, - sample_weight) + sample_weight=sample_weight) for clf in clfs if clf is not None) return self @@ -256,16 +270,30 @@ def transform(self, X): Returns ------- - If `voting='soft'`: - array-like = [n_classifiers, n_samples, n_classes] + If `voting='soft'` and `flatten_transform=True`: + array-like = (n_classifiers, n_samples * n_classes) + otherwise array-like = (n_classifiers, n_samples, n_classes) Class probabilities calculated by each classifier. If `voting='hard'`: array-like = [n_samples, n_classifiers] Class labels predicted by each classifier. """ check_is_fitted(self, 'estimators_') + if self.voting == 'soft': - return self._collect_probas(X) + probas = self._collect_probas(X) + if self.flatten_transform is None: + warnings.warn("'flatten_transform' default value will be " + "changed to True in 0.21." + "To silence this warning you may" + " explicitly set flatten_transform=False.", + DeprecationWarning) + return probas + elif not self.flatten_transform: + return probas + else: + return np.hstack(probas) + else: return self._predict(X) diff --git a/sklearn/ensemble/weight_boosting.py b/sklearn/ensemble/weight_boosting.py index 3108717d4676e..a53c57d3495e9 100644 --- a/sklearn/ensemble/weight_boosting.py +++ b/sklearn/ensemble/weight_boosting.py @@ -29,7 +29,7 @@ from numpy.core.umath_tests import inner1d from .base import BaseEnsemble -from ..base import ClassifierMixin, RegressorMixin, is_regressor +from ..base import ClassifierMixin, RegressorMixin, is_regressor, is_classifier from ..externals import six from ..externals.six.moves import zip from ..externals.six.moves import xrange as range @@ -231,7 +231,7 @@ def staged_score(self, X, y, sample_weight=None): z : float """ for y_pred in self.staged_predict(X): - if isinstance(self, ClassifierMixin): + if is_classifier(self): yield accuracy_score(y, y_pred, sample_weight=sample_weight) else: yield r2_score(y, y_pred, sample_weight=sample_weight) diff --git a/sklearn/feature_extraction/tests/test_feature_hasher.py b/sklearn/feature_extraction/tests/test_feature_hasher.py index 0204910607f32..d258625897e27 100644 --- a/sklearn/feature_extraction/tests/test_feature_hasher.py +++ b/sklearn/feature_extraction/tests/test_feature_hasher.py @@ -20,6 +20,7 @@ def test_feature_hasher_dicts(): assert_array_equal(X1.toarray(), X2.toarray()) +@ignore_warnings(category=DeprecationWarning) def test_feature_hasher_strings(): # mix byte and Unicode strings; note that "foo" is a duplicate in row 0 raw_X = [["foo", "bar", "baz", "foo".encode("ascii")], diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py index de6674646c981..9e613b1bca8c1 100644 --- a/sklearn/feature_extraction/tests/test_text.py +++ b/sklearn/feature_extraction/tests/test_text.py @@ -28,7 +28,8 @@ assert_not_equal, assert_almost_equal, assert_in, assert_less, assert_greater, assert_warns_message, assert_raise_message, - clean_warning_registry, SkipTest) + clean_warning_registry, ignore_warnings, + SkipTest) from collections import defaultdict, Mapping from functools import partial @@ -222,6 +223,25 @@ def test_char_wb_ngram_analyzer(): assert_equal(cnga(text)[:6], expected) +def test_word_ngram_analyzer(): + cnga = CountVectorizer(analyzer='word', strip_accents='unicode', + ngram_range=(3, 6)).build_analyzer() + + text = "This \n\tis a test, really.\n\n I met Harry yesterday" + expected = ['this is test', 'is test really', 'test really met'] + assert_equal(cnga(text)[:3], expected) + + expected = ['test really met harry yesterday', + 'this is test really met harry', + 'is test really met harry yesterday'] + assert_equal(cnga(text)[-3:], expected) + + cnga_file = CountVectorizer(input='file', analyzer='word', + ngram_range=(3, 6)).build_analyzer() + file = StringIO(text) + assert_equal(cnga_file(file), cnga(text)) + + def test_countvectorizer_custom_vocabulary(): vocab = {"pizza": 0, "beer": 1} terms = set(vocab.keys()) @@ -480,6 +500,7 @@ def test_tfidf_vectorizer_setters(): assert_true(tv._tfidf.sublinear_tf) +@ignore_warnings(category=DeprecationWarning) def test_hashing_vectorizer(): v = HashingVectorizer() X = v.transform(ALL_FOOD_DOCS) @@ -651,6 +672,7 @@ def test_count_binary_occurrences(): assert_equal(X_sparse.dtype, np.float32) +@ignore_warnings(category=DeprecationWarning) def test_hashed_binary_occurrences(): # by default multiple occurrences are counted as longs test_data = ['aaabc', 'abbde'] @@ -784,6 +806,7 @@ def test_vectorizer_pipeline_cross_validation(): assert_array_equal(cv_scores, [1., 1., 1.]) +@ignore_warnings(category=DeprecationWarning) def test_vectorizer_unicode(): # tests that the count vectorizer works with cyrillic. document = ( diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py index dc7e9e8e206be..d505099cc6a88 100644 --- a/sklearn/feature_selection/rfe.py +++ b/sklearn/feature_selection/rfe.py @@ -39,8 +39,9 @@ class RFE(BaseEstimator, MetaEstimatorMixin, SelectorMixin): coefficients of a linear model), the goal of recursive feature elimination (RFE) is to select features by recursively considering smaller and smaller sets of features. First, the estimator is trained on the initial set of - features and weights are assigned to each one of them. Then, features whose - absolute weights are the smallest are pruned from the current set features. + features and the importance of each feature is obtained either through a + ``coef_`` attribute or through a ``feature_importances_`` attribute. + Then, the least important features are pruned from current set of features. That procedure is recursively repeated on the pruned set until the desired number of features to select is eventually reached. @@ -49,13 +50,9 @@ class RFE(BaseEstimator, MetaEstimatorMixin, SelectorMixin): Parameters ---------- estimator : object - A supervised learning estimator with a `fit` method that updates a - `coef_` attribute that holds the fitted parameters. Important features - must correspond to high absolute values in the `coef_` array. - - For instance, this is the case for most supervised learning - algorithms such as Support Vector Classifiers and Generalized - Linear Models from the `svm` and `linear_model` modules. + A supervised learning estimator with a ``fit`` method that provides + information about feature importance either through a ``coef_`` + attribute or through a ``feature_importances_`` attribute. n_features_to_select : int or None (default=None) The number of features to select. If `None`, half of the features @@ -282,13 +279,9 @@ class RFECV(RFE, MetaEstimatorMixin): Parameters ---------- estimator : object - A supervised learning estimator with a `fit` method that updates a - `coef_` attribute that holds the fitted parameters. Important features - must correspond to high absolute values in the `coef_` array. - - For instance, this is the case for most supervised learning - algorithms such as Support Vector Classifiers and Generalized - Linear Models from the `svm` and `linear_model` modules. + A supervised learning estimator with a ``fit`` method that provides + information about feature importance either through a ``coef_`` + attribute or through a ``feature_importances_`` attribute. step : int or float, optional (default=1) If greater than or equal to 1, then `step` corresponds to the (integer) diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py index 2d003429815c9..6bcdd624083e9 100644 --- a/sklearn/linear_model/base.py +++ b/sklearn/linear_model/base.py @@ -105,8 +105,8 @@ def sparse_center_data(X, y, fit_intercept, normalize=False): return X, y, X_offset, y_offset, X_std -@deprecated("center_data was deprecated in version 0.18 and will be removed in " - "0.20. Use utilities in preprocessing.data instead") +@deprecated("center_data was deprecated in version 0.18 and will be removed " + "in 0.20. Use utilities in preprocessing.data instead") def center_data(X, y, fit_intercept, normalize=False, copy=True, sample_weight=None): """ @@ -520,10 +520,11 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy): n_samples, n_features = X.shape if sparse.isspmatrix(X): + # copy is not needed here as X is not modified inplace when X is sparse precompute = False X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, fit_intercept=fit_intercept, normalize=normalize, - return_mean=True) + copy=False, return_mean=True) else: # copy was done in fit if necessary X, y, X_offset, y_offset, X_scale = _preprocess_data( diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index a1a034cb9eb72..e03aece7f2762 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -614,6 +614,25 @@ class ElasticNet(LinearModel, RegressorMixin): number of iterations run by the coordinate descent solver to reach the specified tolerance. + Examples + -------- + >>> from sklearn.linear_model import ElasticNet + >>> from sklearn.datasets import make_regression + >>> + >>> X, y = make_regression(n_features=2, random_state=0) + >>> regr = ElasticNet(random_state=0) + >>> regr.fit(X, y) + ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5, + max_iter=1000, normalize=False, positive=False, precompute=False, + random_state=0, selection='cyclic', tol=0.0001, warm_start=False) + >>> print(regr.coef_) # doctest: +ELLIPSIS + [ 18.83816048 64.55968825] + >>> print(regr.intercept_) # doctest: +ELLIPSIS + 1.45126075617 + >>> print(regr.predict([[0, 0]])) # doctest: +ELLIPSIS + [ 1.45126076] + + Notes ----- To avoid unnecessary memory duplication the X argument of the fit method @@ -1486,6 +1505,26 @@ class ElasticNetCV(LinearModelCV, RegressorMixin): number of iterations run by the coordinate descent solver to reach the specified tolerance for the optimal alpha. + Examples + -------- + >>> from sklearn.linear_model import ElasticNetCV + >>> from sklearn.datasets import make_regression + >>> + >>> X, y = make_regression(n_features=2, random_state=0) + >>> regr = ElasticNetCV(cv=5, random_state=0) + >>> regr.fit(X, y) + ElasticNetCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=True, + l1_ratio=0.5, max_iter=1000, n_alphas=100, n_jobs=1, + normalize=False, positive=False, precompute='auto', random_state=0, + selection='cyclic', tol=0.0001, verbose=0) + >>> print(regr.alpha_) # doctest: +ELLIPSIS + 0.19947279427 + >>> print(regr.intercept_) # doctest: +ELLIPSIS + 0.398882965428 + >>> print(regr.predict([[0, 0]])) # doctest: +ELLIPSIS + [ 0.39888297] + + Notes ----- For an example, see diff --git a/sklearn/linear_model/passive_aggressive.py b/sklearn/linear_model/passive_aggressive.py index 183049e4fdb55..a82b1c12ffdb6 100644 --- a/sklearn/linear_model/passive_aggressive.py +++ b/sklearn/linear_model/passive_aggressive.py @@ -105,6 +105,25 @@ class PassiveAggressiveClassifier(BaseSGDClassifier): The actual number of iterations to reach the stopping criterion. For multiclass fits, it is the maximum over every binary fit. + Examples + -------- + >>> from sklearn.linear_model import PassiveAggressiveClassifier + >>> from sklearn.datasets import make_classification + >>> + >>> X, y = make_classification(n_features=4, random_state=0) + >>> clf = PassiveAggressiveClassifier(random_state=0) + >>> clf.fit(X, y) + PassiveAggressiveClassifier(C=1.0, average=False, class_weight=None, + fit_intercept=True, loss='hinge', max_iter=5, n_iter=None, + n_jobs=1, random_state=0, shuffle=True, tol=None, verbose=0, + warm_start=False) + >>> print(clf.coef_) + [[ 0.49324685 1.0552176 1.49519589 1.33798314]] + >>> print(clf.intercept_) + [ 2.18438388] + >>> print(clf.predict([[0, 0, 0, 0]])) + [1] + See also -------- @@ -291,6 +310,25 @@ class PassiveAggressiveRegressor(BaseSGDRegressor): n_iter_ : int The actual number of iterations to reach the stopping criterion. + Examples + -------- + >>> from sklearn.linear_model import PassiveAggressiveRegressor + >>> from sklearn.datasets import make_regression + >>> + >>> X, y = make_regression(n_features=4, random_state=0) + >>> regr = PassiveAggressiveRegressor(random_state=0) + >>> regr.fit(X, y) + PassiveAggressiveRegressor(C=1.0, average=False, epsilon=0.1, + fit_intercept=True, loss='epsilon_insensitive', max_iter=5, + n_iter=None, random_state=0, shuffle=True, tol=None, + verbose=0, warm_start=False) + >>> print(regr.coef_) + [ 20.48736655 34.18818427 67.59122734 87.94731329] + >>> print(regr.intercept_) + [-0.02306214] + >>> print(regr.predict([[0, 0, 0, 0]])) + [-0.02306214] + See also -------- diff --git a/sklearn/linear_model/randomized_l1.py b/sklearn/linear_model/randomized_l1.py index a84558823146e..8f3692dc8675b 100644 --- a/sklearn/linear_model/randomized_l1.py +++ b/sklearn/linear_model/randomized_l1.py @@ -195,8 +195,6 @@ class RandomizedLasso(BaseRandomizedLinearModel): is known as stability selection. In short, features selected more often are considered good features. - Read more in the :ref:`User Guide `. - Parameters ---------- alpha : float, 'aic', or 'bic', optional @@ -206,7 +204,7 @@ class RandomizedLasso(BaseRandomizedLinearModel): scaling : float, optional The s parameter used to randomly scale the penalty of different - features (See :ref:`User Guide ` for details ). + features. Should be between 0 and 1. sample_fraction : float, optional @@ -300,11 +298,6 @@ class RandomizedLasso(BaseRandomizedLinearModel): >>> from sklearn.linear_model import RandomizedLasso >>> randomized_lasso = RandomizedLasso() - Notes - ----- - For an example, see :ref:`examples/linear_model/plot_sparse_recovery.py - `. - References ---------- Stability selection @@ -407,8 +400,6 @@ class RandomizedLogisticRegression(BaseRandomizedLinearModel): randomizations. This is known as stability selection. In short, features selected more often are considered good features. - Read more in the :ref:`User Guide `. - Parameters ---------- C : float or array-like of shape [n_reg_parameter], optional, default=1 @@ -420,7 +411,7 @@ class RandomizedLogisticRegression(BaseRandomizedLinearModel): scaling : float, optional, default=0.5 The s parameter used to randomly scale the penalty of different - features (See :ref:`User Guide ` for details ). + features. Should be between 0 and 1. sample_fraction : float, optional, default=0.75 @@ -501,11 +492,6 @@ class RandomizedLogisticRegression(BaseRandomizedLinearModel): >>> from sklearn.linear_model import RandomizedLogisticRegression >>> randomized_logistic = RandomizedLogisticRegression() - Notes - ----- - For an example, see :ref:`examples/linear_model/plot_sparse_recovery.py - `. - References ---------- Stability selection @@ -590,8 +576,6 @@ def lasso_stability_path(X, y, scaling=0.5, random_state=None, verbose=False): """Stability path based on randomized Lasso estimates - Read more in the :ref:`User Guide `. - Parameters ---------- X : array-like, shape = [n_samples, n_features] @@ -638,11 +622,6 @@ def lasso_stability_path(X, y, scaling=0.5, random_state=None, scores_path : array, shape = [n_features, n_grid] The scores for each feature along the path. - - Notes - ----- - For an example, see :ref:`examples/linear_model/plot_sparse_recovery.py - `. """ X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo']) rng = check_random_state(random_state) diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 6a7f717946481..031520362a528 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -986,7 +986,7 @@ def test_logreg_predict_proba_multinomial(): X, y = make_classification(n_samples=10, n_features=20, random_state=0, n_classes=3, n_informative=10) - # Predicted probabilites using the true-entropy loss should give a + # Predicted probabilities using the true-entropy loss should give a # smaller loss than those using the ovr method. clf_multi = LogisticRegression(multi_class="multinomial", solver="lbfgs") clf_multi.fit(X, y) @@ -996,7 +996,7 @@ def test_logreg_predict_proba_multinomial(): clf_ovr_loss = log_loss(y, clf_ovr.predict_proba(X)) assert_greater(clf_ovr_loss, clf_multi_loss) - # Predicted probabilites using the soft-max function should give a + # Predicted probabilities using the soft-max function should give a # smaller loss than those using the logistic function. clf_multi_loss = log_loss(y, clf_multi.predict_proba(X)) clf_wrong_loss = log_loss(y, clf_multi._predict_proba_lr(X)) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 09aa4d87b8e21..be71d2eb84a20 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -167,6 +167,7 @@ def accuracy_score(y_true, y_pred, normalize=True, sample_weight=None): 2 In the multilabel case with binary label indicators: + >>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2))) 0.5 """ diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 4e82328f6fc53..0fa3ad793524a 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -11,6 +11,7 @@ import itertools from functools import partial +import warnings import numpy as np from scipy.spatial import distance @@ -467,7 +468,7 @@ def pairwise_distances_argmin(X, Y, axis=1, metric="euclidean", def manhattan_distances(X, Y=None, sum_over_features=True, - size_threshold=5e8): + size_threshold=None): """ Compute the L1 distances between the vectors in X and Y. With sum_over_features equal to False it returns the componentwise @@ -520,6 +521,10 @@ def manhattan_distances(X, Y=None, sum_over_features=True, array([[ 1., 1.], [ 1., 1.]]...) """ + if size_threshold is not None: + warnings.warn('Use of the "size_threshold" is deprecated ' + 'in 0.19 and it will be removed version ' + '0.21 of scikit-learn', DeprecationWarning) X, Y = check_pairwise_arrays(X, Y) if issparse(X) or issparse(Y): diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 2b54896781929..2003ed8b314c8 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -840,7 +840,7 @@ def ndcg_score(y_true, y_score, k=5): """ y_score, y_true = check_X_y(y_score, y_true) - # Make sure we use all the labels (max between the lenght and the higher + # Make sure we use all the labels (max between the length and the higher # number in the array) lb = LabelBinarizer() lb.fit(np.arange(max(np.max(y_true) + 1, len(y_true)))) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 7d213ae39aaed..b1f01c1a18e1b 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -26,7 +26,8 @@ from . import (r2_score, median_absolute_error, mean_absolute_error, mean_squared_error, mean_squared_log_error, accuracy_score, f1_score, roc_auc_score, average_precision_score, - precision_score, recall_score, log_loss) + precision_score, recall_score, log_loss, + explained_variance_score) from .cluster import adjusted_rand_score from .cluster import homogeneity_score @@ -320,7 +321,7 @@ def _check_multimetric_scoring(estimator, scoring=None): value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. - See :ref:`multivalued_scorer_wrapping` for an example. + See :ref:`multimetric_grid_search` for an example. If None the estimator's default scorer (if available) is used. The return value in that case will be ``{'score': }``. @@ -463,6 +464,7 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, # Standard regression scores +explained_variance_scorer = make_scorer(explained_variance_score) r2_scorer = make_scorer(r2_score) neg_mean_squared_error_scorer = make_scorer(mean_squared_error, greater_is_better=False) @@ -525,7 +527,8 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, fowlkes_mallows_scorer = make_scorer(fowlkes_mallows_score) -SCORERS = dict(r2=r2_scorer, +SCORERS = dict(explained_variance=explained_variance_scorer, + r2=r2_scorer, neg_median_absolute_error=neg_median_absolute_error_scorer, neg_mean_absolute_error=neg_mean_absolute_error_scorer, neg_mean_squared_error=neg_mean_squared_error_scorer, diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py index d8b64b58ca481..242523034e7af 100644 --- a/sklearn/metrics/tests/test_pairwise.py +++ b/sklearn/metrics/tests/test_pairwise.py @@ -12,6 +12,7 @@ from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regexp from sklearn.utils.testing import assert_true +from sklearn.utils.testing import assert_warns from sklearn.utils.testing import ignore_warnings from sklearn.externals.six import iteritems @@ -74,10 +75,10 @@ def test_pairwise_distances(): assert_equal(S.shape[0], X.shape[0]) assert_equal(S.shape[1], Y.shape[0]) assert_array_almost_equal(S, S2) - # Low-level function for manhattan can divide in blocks to avoid - # using too much memory during the broadcasting - S3 = manhattan_distances(X, Y, size_threshold=10) - assert_array_almost_equal(S, S3) + # Using size_threshold argument should raise + # a deprecation warning + assert_warns(DeprecationWarning, + manhattan_distances, X, Y, size_threshold=10) # Test cosine as a string metric versus cosine callable # The string "cosine" uses sklearn.metric, # while the function cosine is scipy.spatial diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 47c4d334f893a..fc5ba91401eab 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -29,7 +29,6 @@ from sklearn.svm import LinearSVC from sklearn.pipeline import make_pipeline from sklearn.cluster import KMeans -from sklearn.dummy import DummyRegressor from sklearn.linear_model import Ridge, LogisticRegression from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.datasets import make_blobs @@ -42,8 +41,9 @@ from sklearn.externals import joblib -REGRESSION_SCORERS = ['r2', 'neg_mean_absolute_error', - 'neg_mean_squared_error', 'neg_mean_squared_log_error', +REGRESSION_SCORERS = ['explained_variance', 'r2', + 'neg_mean_absolute_error', 'neg_mean_squared_error', + 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'mean_absolute_error', 'mean_squared_error', 'median_absolute_error'] @@ -68,7 +68,7 @@ def _make_estimators(X_train, y_train, y_ml_train): # Make estimators that make sense to test various scoring methods - sensible_regr = DummyRegressor(strategy='median') + sensible_regr = DecisionTreeRegressor(random_state=0) sensible_regr.fit(X_train, y_train) sensible_clf = DecisionTreeClassifier(random_state=0) sensible_clf.fit(X_train, y_train) diff --git a/sklearn/mixture/bayesian_mixture.py b/sklearn/mixture/bayesian_mixture.py index 51c57c7c475a1..642c0aade30d0 100644 --- a/sklearn/mixture/bayesian_mixture.py +++ b/sklearn/mixture/bayesian_mixture.py @@ -131,7 +131,8 @@ class BayesianGaussianMixture(BaseMixture): weight_concentration_prior : float | None, optional. The dirichlet concentration of each component on the weight - distribution (Dirichlet). The higher concentration puts more mass in + distribution (Dirichlet). This is commonly called gamma in the + literature. The higher concentration puts more mass in the center and will lead to more components being active, while a lower concentration parameter will lead to more mass at the edge of the mixture weights simplex. The value of the parameter must be greater diff --git a/sklearn/mixture/dpgmm.py b/sklearn/mixture/dpgmm.py index 3d1858c513b2a..c2fd42ab45842 100644 --- a/sklearn/mixture/dpgmm.py +++ b/sklearn/mixture/dpgmm.py @@ -47,7 +47,7 @@ def gammaln(x): @deprecated("The function log_normalize is deprecated in 0.18 and " "will be removed in 0.20.") def log_normalize(v, axis=0): - """Normalized probabilities from unnormalized log-probabilites""" + """Normalized probabilities from unnormalized log-probabilities""" v = np.rollaxis(v, axis) v = v.copy() v -= v.max(axis=0) @@ -672,7 +672,7 @@ class VBGMM(_DPGMMBase): Initialization is with normally-distributed means and identity covariance, for proper convergence. - Read more in the :ref:`User Guide `. + Read more in the :ref:`User Guide `. Parameters ---------- diff --git a/sklearn/mixture/gmm.py b/sklearn/mixture/gmm.py index 79ff8d169dcd8..2c90cb7b92fdf 100644 --- a/sklearn/mixture/gmm.py +++ b/sklearn/mixture/gmm.py @@ -781,7 +781,7 @@ def _validate_covars(covars, covariance_type, n_components): "'spherical', 'tied', 'diag', 'full'") -@deprecated("The functon distribute_covar_matrix_to_match_covariance_type" +@deprecated("The function distribute_covar_matrix_to_match_covariance_type" "is deprecated in 0.18 and will be removed in 0.20.") def distribute_covar_matrix_to_match_covariance_type( tied_cv, covariance_type, n_components): diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 17c588c293eda..ebfa1e9bd3e18 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -801,7 +801,7 @@ class GridSearchCV(BaseSearchCV): value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. - See :ref:`multivalued_scorer_wrapping` for an example. + See :ref:`multimetric_grid_search` for an example. If None, the estimator's default scorer (if available) is used. @@ -924,7 +924,7 @@ class GridSearchCV(BaseSearchCV): For instance the below given table +------------+-----------+------------+-----------------+---+---------+ - |param_kernel|param_gamma|param_degree|split0_test_score|...|..rank...| + |param_kernel|param_gamma|param_degree|split0_test_score|...|rank_t...| +============+===========+============+=================+===+=========+ | 'poly' | -- | 2 | 0.8 |...| 2 | +------------+-----------+------------+-----------------+---+---------+ @@ -1111,7 +1111,7 @@ class RandomizedSearchCV(BaseSearchCV): value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. - See :ref:`multivalued_scorer_wrapping` for an example. + See :ref:`multimetric_grid_search` for an example. If None, the estimator's default scorer (if available) is used. diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 4bcc0ae1c5349..fbc00f3069e51 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -83,6 +83,12 @@ def split(self, X, y=None, groups=None): test : ndarray The testing set indices for that split. + + Notes + ----- + Randomized CV splitters may return different results for each call of + split. You can make the results identical by setting ``random_state`` + to an integer. """ X, y, groups = indexable(X, y, groups) indices = np.arange(_num_samples(X)) @@ -308,6 +314,12 @@ def split(self, X, y=None, groups=None): test : ndarray The testing set indices for that split. + + Notes + ----- + Randomized CV splitters may return different results for each call of + split. You can make the results identical by setting ``random_state`` + to an integer. """ X, y, groups = indexable(X, y, groups) n_samples = _num_samples(X) @@ -567,23 +579,20 @@ def __init__(self, n_splits=3, shuffle=False, random_state=None): super(StratifiedKFold, self).__init__(n_splits, shuffle, random_state) def _make_test_folds(self, X, y=None): - if self.shuffle: - rng = check_random_state(self.random_state) - else: - rng = self.random_state + rng = self.random_state y = np.asarray(y) n_samples = y.shape[0] unique_y, y_inversed = np.unique(y, return_inverse=True) y_counts = np.bincount(y_inversed) min_groups = np.min(y_counts) if np.all(self.n_splits > y_counts): - raise ValueError("All the n_groups for individual classes" - " are less than n_splits=%d." + raise ValueError("n_splits=%d cannot be greater than the" + " number of members in each class." % (self.n_splits)) if self.n_splits > min_groups: warnings.warn(("The least populated class in y has only %d" " members, which is too few. The minimum" - " number of groups for any class cannot" + " number of members in any class cannot" " be less than n_splits=%d." % (min_groups, self.n_splits)), Warning) @@ -645,6 +654,12 @@ def split(self, X, y, groups=None): test : ndarray The testing set indices for that split. + + Notes + ----- + Randomized CV splitters may return different results for each call of + split. You can make the results identical by setting ``random_state`` + to an integer. """ y = check_array(y, ensure_2d=False, dtype=None) return super(StratifiedKFold, self).split(X, y, groups) @@ -726,6 +741,12 @@ def split(self, X, y=None, groups=None): test : ndarray The testing set indices for that split. + + Notes + ----- + Randomized CV splitters may return different results for each call of + split. You can make the results identical by setting ``random_state`` + to an integer. """ X, y, groups = indexable(X, y, groups) n_samples = _num_samples(X) @@ -1164,6 +1185,12 @@ def split(self, X, y=None, groups=None): test : ndarray The testing set indices for that split. + + Notes + ----- + Randomized CV splitters may return different results for each call of + split. You can make the results identical by setting ``random_state`` + to an integer. """ X, y, groups = indexable(X, y, groups) for train, test in self._iter_indices(X, y, groups): @@ -1578,6 +1605,12 @@ def split(self, X, y, groups=None): test : ndarray The testing set indices for that split. + + Notes + ----- + Randomized CV splitters may return different results for each call of + split. You can make the results identical by setting ``random_state`` + to an integer. """ y = check_array(y, ensure_2d=False, dtype=None) return super(StratifiedShuffleSplit, self).split(X, y, groups) diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 1e5ea29740c00..147d741b500b9 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -69,7 +69,7 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None, value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. - See :ref:`multivalued_scorer_wrapping` for an example. + See :ref:`multimetric_grid_search` for an example. If None, the estimator's default scorer (if available) is used. @@ -803,8 +803,8 @@ def permutation_test_score(estimator, X, y, groups=None, cv=None, the dataset into train/test set. scoring : string, callable or None, optional, default: None - A single string (see :ref:`_scoring_parameter`) or a callable - (see :ref:`_scoring`) to evaluate the predictions on the test set. + A single string (see :ref:`scoring_parameter`) or a callable + (see :ref:`scoring`) to evaluate the predictions on the test set. If None the estimator's default scorer, if available, is used. diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index 9dfd49714ee08..5e667727d9dda 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -7,6 +7,7 @@ from itertools import chain, product import pickle import sys +from types import GeneratorType import re import numpy as np @@ -1070,16 +1071,10 @@ def test_search_cv_results_rank_tie_breaking(): cv_results['mean_test_score'][1]) assert_almost_equal(cv_results['mean_train_score'][0], cv_results['mean_train_score'][1]) - try: - assert_almost_equal(cv_results['mean_test_score'][1], - cv_results['mean_test_score'][2]) - except AssertionError: - pass - try: - assert_almost_equal(cv_results['mean_train_score'][1], - cv_results['mean_train_score'][2]) - except AssertionError: - pass + assert_false(np.allclose(cv_results['mean_test_score'][1], + cv_results['mean_test_score'][2])) + assert_false(np.allclose(cv_results['mean_train_score'][1], + cv_results['mean_train_score'][2])) # 'min' rank should be assigned to the tied candidates assert_almost_equal(search.cv_results_['rank_test_score'], [1, 1, 3]) @@ -1421,6 +1416,33 @@ def test_grid_search_cv_splits_consistency(): cv=KFold(n_splits=n_splits)) gs2.fit(X, y) + # Give generator as a cv parameter + assert_true(isinstance(KFold(n_splits=n_splits, + shuffle=True, random_state=0).split(X, y), + GeneratorType)) + gs3 = GridSearchCV(LinearSVC(random_state=0), + param_grid={'C': [0.1, 0.2, 0.3]}, + cv=KFold(n_splits=n_splits, shuffle=True, + random_state=0).split(X, y)) + gs3.fit(X, y) + + gs4 = GridSearchCV(LinearSVC(random_state=0), + param_grid={'C': [0.1, 0.2, 0.3]}, + cv=KFold(n_splits=n_splits, shuffle=True, + random_state=0)) + gs4.fit(X, y) + + def _pop_time_keys(cv_results): + for key in ('mean_fit_time', 'std_fit_time', + 'mean_score_time', 'std_score_time'): + cv_results.pop(key) + return cv_results + + # Check if generators are supported as cv and + # that the splits are consistent + np.testing.assert_equal(_pop_time_keys(gs3.cv_results_), + _pop_time_keys(gs4.cv_results_)) + # OneTimeSplitter is a non-re-entrant cv where split can be called only # once if ``cv.split`` is called once per param setting in GridSearchCV.fit # the 2nd and 3rd parameter will not be evaluated as no train/test indices diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py index b1bb44efe59c2..300bb8953efae 100644 --- a/sklearn/model_selection/tests/test_split.py +++ b/sklearn/model_selection/tests/test_split.py @@ -446,9 +446,11 @@ def test_shuffle_kfold_stratifiedkfold_reproducibility(): for cv in (kf, skf): for data in zip((X, X2), (y, y2)): + # Test if the two splits are different + # numpy's assert_equal properly compares nested lists try: - np.testing.assert_equal(list(cv.split(*data)), - list(cv.split(*data))) + np.testing.assert_array_equal(list(cv.split(*data)), + list(cv.split(*data))) except AssertionError: pass else: @@ -1188,6 +1190,7 @@ def test_cv_iterable_wrapper(): # results kf_randomized_iter = KFold(n_splits=5, shuffle=True).split(X, y) kf_randomized_iter_wrapped = check_cv(kf_randomized_iter) + # numpy's assert_array_equal properly compares nested lists np.testing.assert_equal(list(kf_randomized_iter_wrapped.split(X, y)), list(kf_randomized_iter_wrapped.split(X, y))) diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index dedb77026c544..5f650cb644079 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -452,8 +452,8 @@ def check_cross_validate_multi_metric(clf, X, y, scores): assert type(cv_results['test_r2']) == np.ndarray assert (type(cv_results['test_neg_mean_squared_error']) == np.ndarray) - assert type(cv_results['fit_time'] == np.ndarray) - assert type(cv_results['score_time'] == np.ndarray) + assert type(cv_results['fit_time']) == np.ndarray + assert type(cv_results['score_time']) == np.ndarray # Ensure all the times are within sane limits assert np.all(cv_results['fit_time'] >= 0) diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py index 3ca3b1ad42a28..a8510cf0a0a85 100644 --- a/sklearn/multiclass.py +++ b/sklearn/multiclass.py @@ -721,7 +721,7 @@ def fit(self, X, y): """ X, y = check_X_y(X, y) if self.code_size <= 0: - raise ValueError("code_size should be greater than 0, got {1}" + raise ValueError("code_size should be greater than 0, got {0}" "".format(self.code_size)) _check_estimator(self.estimator) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 6906d95869f2b..6c9fbc55f7863 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -14,13 +14,11 @@ # # License: BSD 3 clause -from abc import ABCMeta - import numpy as np import scipy.sparse as sp from abc import ABCMeta, abstractmethod from .base import BaseEstimator, clone, MetaEstimatorMixin -from .base import RegressorMixin, ClassifierMixin +from .base import RegressorMixin, ClassifierMixin, is_classifier from .model_selection import cross_val_predict from .utils import check_array, check_X_y, check_random_state from .utils.fixes import parallel_helper @@ -154,7 +152,7 @@ def fit(self, X, y, sample_weight=None): multi_output=True, accept_sparse=True) - if isinstance(self, ClassifierMixin): + if is_classifier(self): check_classification_targets(y) if y.ndim == 1: @@ -309,7 +307,7 @@ class MultiOutputClassifier(MultiOutputEstimator, ClassifierMixin): Attributes ---------- - estimators_ : list of `n_output` estimators + estimators_ : list of ``n_output`` estimators Estimators used for predictions. """ @@ -318,7 +316,7 @@ def __init__(self, estimator, n_jobs=1): def predict_proba(self, X): """Probability estimates. - Returns prediction probabilites for each class of each output. + Returns prediction probabilities for each class of each output. Parameters ---------- @@ -370,7 +368,7 @@ def score(self, X, y): return np.mean(np.all(y == y_pred, axis=1)) -class ClassifierChain(BaseEstimator): +class ClassifierChain(BaseEstimator, ClassifierMixin, MetaEstimatorMixin): """A multi-label model that arranges binary classifiers into a chain. Each model makes a prediction in the order specified by the chain using @@ -420,7 +418,7 @@ class ClassifierChain(BaseEstimator): Attributes ---------- classes_ : list - A list of arrays of length len(estimators_) containing the + A list of arrays of length ``len(estimators_)`` containing the class labels for each estimator in the chain. estimators_ : list @@ -456,7 +454,7 @@ def fit(self, X, Y): self : object Returns self. """ - X, Y = check_X_y(X, Y, multi_output=True, accept_sparse=True) + X, Y = check_X_y(X, Y, multi_output=True, accept_sparse=True) random_state = check_random_state(self.random_state) check_array(X, accept_sparse=True) diff --git a/sklearn/neighbors/approximate.py b/sklearn/neighbors/approximate.py index ac59305e12378..907b379731a2f 100644 --- a/sklearn/neighbors/approximate.py +++ b/sklearn/neighbors/approximate.py @@ -122,8 +122,6 @@ class LSHForest(BaseEstimator, KNeighborsMixin, RadiusNeighborsMixin): points. Its value does not depend on the norm of the vector points but only on their relative angles. - Read more in the :ref:`User Guide `. - Parameters ---------- @@ -132,9 +130,9 @@ class LSHForest(BaseEstimator, KNeighborsMixin, RadiusNeighborsMixin): radius : float, optinal (default = 1.0) Radius from the data point to its neighbors. This is the parameter - space to use by default for the :meth`radius_neighbors` queries. + space to use by default for the :meth:`radius_neighbors` queries. - n_candidates : int (default = 10) + n_candidates : int (default = 50) Minimum number of candidates evaluated per estimator, assuming enough items meet the `min_hash_match` constraint. diff --git a/sklearn/neighbors/dist_metrics.pyx b/sklearn/neighbors/dist_metrics.pyx index eb4b292dbdc38..29c83a341b7ba 100755 --- a/sklearn/neighbors/dist_metrics.pyx +++ b/sklearn/neighbors/dist_metrics.pyx @@ -114,7 +114,7 @@ cdef class DistanceMetric: >>> dist = DistanceMetric.get_metric('euclidean') >>> X = [[0, 1, 2], - [3, 4, 5]]) + [3, 4, 5]] >>> dist.pairwise(X) array([[ 0. , 5.19615242], [ 5.19615242, 0. ]]) diff --git a/sklearn/neighbors/lof.py b/sklearn/neighbors/lof.py index 3559d76cf898a..b3686d69d771b 100644 --- a/sklearn/neighbors/lof.py +++ b/sklearn/neighbors/lof.py @@ -85,8 +85,8 @@ class LocalOutlierFactor(NeighborsBase, KNeighborsMixin, UnsupervisedMixin): p : integer, optional (default=2) Parameter for the Minkowski metric from - :ref:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this is - equivalent to using manhattan_distance (l1), and euclidean_distance + :func:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this + is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used. metric_params : dict, optional (default=None) diff --git a/sklearn/neighbors/nearest_centroid.py b/sklearn/neighbors/nearest_centroid.py index d15013a1e299a..ec00ec87aeabf 100644 --- a/sklearn/neighbors/nearest_centroid.py +++ b/sklearn/neighbors/nearest_centroid.py @@ -147,7 +147,7 @@ def fit(self, X, y): dataset_centroid_ = np.mean(X, axis=0) # m parameter for determining deviation - m = np.sqrt((1. / nk) + (1. / n_samples)) + m = np.sqrt((1. / nk) - (1. / n_samples)) # Calculate deviation using the standard deviation of centroids. variance = (X - self.centroids_[y_ind]) ** 2 variance = variance.sum(axis=0) diff --git a/sklearn/neighbors/tests/test_nearest_centroid.py b/sklearn/neighbors/tests/test_nearest_centroid.py index 65a0f7d64e249..e50a2e6f07445 100644 --- a/sklearn/neighbors/tests/test_nearest_centroid.py +++ b/sklearn/neighbors/tests/test_nearest_centroid.py @@ -97,6 +97,20 @@ def test_pickle(): " after pickling (classification).") +def test_shrinkage_correct(): + # Ensure that the shrinking is correct. + # The expected result is calculated by R (pamr), + # which is implemented by the author of the original paper. + # (One need to modify the code to output the new centroid in pamr.predict) + + X = np.array([[0, 1], [1, 0], [1, 1], [2, 0], [6, 8]]) + y = np.array([1, 1, 2, 2, 2]) + clf = NearestCentroid(shrink_threshold=0.1) + clf.fit(X, y) + expected_result = np.array([[0.7787310, 0.8545292], [2.814179, 2.763647]]) + np.testing.assert_array_almost_equal(clf.centroids_, expected_result) + + def test_shrinkage_threshold_decoded_y(): clf = NearestCentroid(shrink_threshold=0.01) y_ind = np.asarray(y) diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index af1eca3b201d5..ae6df22c2fc5a 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -13,6 +13,7 @@ import warnings from ..base import BaseEstimator, ClassifierMixin, RegressorMixin +from ..base import is_classifier from ._base import ACTIVATIONS, DERIVATIVES, LOSS_FUNCTIONS from ._stochastic_optimizers import SGDOptimizer, AdamOptimizer from ..model_selection import train_test_split @@ -268,7 +269,7 @@ def _initialize(self, y, layer_units): self.n_layers_ = len(layer_units) # Output for regression - if not isinstance(self, ClassifierMixin): + if not is_classifier(self): self.out_activation_ = 'identity' # Output for multi class elif self._label_binarizer.y_type_ == 'multiclass': @@ -491,7 +492,7 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads, X, X_val, y, y_val = train_test_split( X, y, random_state=self._random_state, test_size=self.validation_fraction) - if isinstance(self, ClassifierMixin): + if is_classifier(self): y_val = self._label_binarizer.inverse_transform(y_val) else: X_val = None diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index eb19494c83b75..aec1ec7c045de 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -244,6 +244,24 @@ class MinMaxScaler(BaseEstimator, TransformerMixin): .. versionadded:: 0.17 *data_range_* + Examples + -------- + >>> from sklearn.preprocessing import MinMaxScaler + >>> + >>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]] + >>> scaler = MinMaxScaler() + >>> print(scaler.fit(data)) + MinMaxScaler(copy=True, feature_range=(0, 1)) + >>> print(scaler.data_max_) + [ 1. 18.] + >>> print(scaler.transform(data)) + [[ 0. 0. ] + [ 0.25 0.25] + [ 0.5 0.5 ] + [ 1. 1. ]] + >>> print(scaler.transform([[2, 2]])) + [[ 1.5 0. ]] + See also -------- minmax_scale: Equivalent function without the estimator API. @@ -504,6 +522,24 @@ class StandardScaler(BaseEstimator, TransformerMixin): The number of samples processed by the estimator. Will be reset on new calls to fit, but increments across ``partial_fit`` calls. + Examples + -------- + >>> from sklearn.preprocessing import StandardScaler + >>> + >>> data = [[0, 0], [0, 0], [1, 1], [1, 1]] + >>> scaler = StandardScaler() + >>> print(scaler.fit(data)) + StandardScaler(copy=True, with_mean=True, with_std=True) + >>> print(scaler.mean_) + [ 0.5 0.5] + >>> print(scaler.transform(data)) + [[-1. -1.] + [-1. -1.] + [ 1. 1.] + [ 1. 1.]] + >>> print(scaler.transform([[2, 2]])) + [[ 3. 3.]] + See also -------- scale: Equivalent function without the estimator API. @@ -909,9 +945,9 @@ class RobustScaler(BaseEstimator, TransformerMixin): and the 3rd quartile (75th quantile). Centering and scaling happen independently on each feature (or each - sample, depending on the `axis` argument) by computing the relevant + sample, depending on the ``axis`` argument) by computing the relevant statistics on the samples in the training set. Median and interquartile - range are then stored to be used on later data using the `transform` + range are then stored to be used on later data using the ``transform`` method. Standardization of a dataset is a common requirement for many @@ -928,7 +964,7 @@ class RobustScaler(BaseEstimator, TransformerMixin): ---------- with_centering : boolean, True by default If True, center the data before scaling. - This does not work (and will raise an exception) when attempted on + This will cause ``transform`` to raise an exception when attempted on sparse matrices, because centering them entails building a dense matrix which in common use cases is likely to be too large to fit in memory. @@ -1023,11 +1059,14 @@ def fit(self, X, y=None): return self def transform(self, X): - """Center and scale the data + """Center and scale the data. + + Can be called on sparse input, provided that ``RobustScaler`` has been + fitted to dense input and ``with_centering=False``. Parameters ---------- - X : array-like + X : {array-like, sparse matrix} The data used to scale along the specified axis. """ if self.with_centering: diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index c690ac1f151f4..10eebba86f04e 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -34,8 +34,8 @@ >>> from sklearn.semi_supervised import LabelPropagation >>> label_prop_model = LabelPropagation() >>> iris = datasets.load_iris() ->>> random_unlabeled_points = np.where(np.random.randint(0, 2, -... size=len(iris.target))) +>>> rng = np.random.RandomState(42) +>>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3 >>> labels = np.copy(iris.target) >>> labels[random_unlabeled_points] = -1 >>> label_prop_model.fit(iris.data, labels) @@ -53,6 +53,7 @@ """ # Authors: Clay Woolam +# Utkarsh Upadhyay # License: BSD from abc import ABCMeta, abstractmethod @@ -67,13 +68,7 @@ from ..utils.extmath import safe_sparse_dot from ..utils.multiclass import check_classification_targets from ..utils.validation import check_X_y, check_is_fitted, check_array - - -# Helper functions - -def _not_converged(y_truth, y_prediction, tol=1e-3): - """basic convergence check""" - return np.abs(y_truth - y_prediction).sum() > tol +from ..exceptions import ConvergenceWarning class BaseLabelPropagation(six.with_metaclass(ABCMeta, BaseEstimator, @@ -97,7 +92,7 @@ class BaseLabelPropagation(six.with_metaclass(ABCMeta, BaseEstimator, alpha : float Clamping factor - max_iter : float + max_iter : integer Change maximum number of iterations allowed tol : float @@ -264,12 +259,14 @@ def fit(self, X, y): l_previous = np.zeros((self.X_.shape[0], n_classes)) - remaining_iter = self.max_iter unlabeled = unlabeled[:, np.newaxis] if sparse.isspmatrix(graph_matrix): graph_matrix = graph_matrix.tocsr() - while (_not_converged(self.label_distributions_, l_previous, self.tol) - and remaining_iter > 1): + + for self.n_iter_ in range(self.max_iter): + if np.abs(self.label_distributions_ - l_previous).sum() < self.tol: + break + l_previous = self.label_distributions_ self.label_distributions_ = safe_sparse_dot( graph_matrix, self.label_distributions_) @@ -285,7 +282,12 @@ def fit(self, X, y): # clamp self.label_distributions_ = np.multiply( alpha, self.label_distributions_) + y_static - remaining_iter -= 1 + else: + warnings.warn( + 'max_iter=%d was reached without convergence.' % self.max_iter, + category=ConvergenceWarning + ) + self.n_iter_ += 1 normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis] self.label_distributions_ /= normalizer @@ -294,7 +296,6 @@ def fit(self, X, y): transduction = self.classes_[np.argmax(self.label_distributions_, axis=1)] self.transduction_ = transduction.ravel() - self.n_iter_ = self.max_iter - remaining_iter return self @@ -324,7 +325,7 @@ class LabelPropagation(BaseLabelPropagation): This parameter will be removed in 0.21. 'alpha' is fixed to zero in 'LabelPropagation'. - max_iter : float + max_iter : integer Change maximum number of iterations allowed tol : float @@ -358,8 +359,8 @@ class LabelPropagation(BaseLabelPropagation): >>> from sklearn.semi_supervised import LabelPropagation >>> label_prop_model = LabelPropagation() >>> iris = datasets.load_iris() - >>> random_unlabeled_points = np.where(np.random.randint(0, 2, - ... size=len(iris.target))) + >>> rng = np.random.RandomState(42) + >>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3 >>> labels = np.copy(iris.target) >>> labels[random_unlabeled_points] = -1 >>> label_prop_model.fit(iris.data, labels) @@ -441,7 +442,7 @@ class LabelSpreading(BaseLabelPropagation): alpha=0 means keeping the initial label information; alpha=1 means replacing all initial information. - max_iter : float + max_iter : integer maximum number of iterations allowed tol : float @@ -475,8 +476,8 @@ class LabelSpreading(BaseLabelPropagation): >>> from sklearn.semi_supervised import LabelSpreading >>> label_prop_model = LabelSpreading() >>> iris = datasets.load_iris() - >>> random_unlabeled_points = np.where(np.random.randint(0, 2, - ... size=len(iris.target))) + >>> rng = np.random.RandomState(42) + >>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3 >>> labels = np.copy(iris.target) >>> labels[random_unlabeled_points] = -1 >>> label_prop_model.fit(iris.data, labels) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 3d5bd21a89110..8cd0cce41d7e9 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -9,6 +9,7 @@ from sklearn.semi_supervised import label_propagation from sklearn.metrics.pairwise import rbf_kernel from sklearn.datasets import make_classification +from sklearn.exceptions import ConvergenceWarning from numpy.testing import assert_array_almost_equal from numpy.testing import assert_array_equal @@ -70,7 +71,7 @@ def test_alpha_deprecation(): y[::3] = -1 lp_default = label_propagation.LabelPropagation(kernel='rbf', gamma=0.1) - lp_default_y = assert_no_warnings(lp_default.fit, X, y).transduction_ + lp_default_y = lp_default.fit(X, y).transduction_ lp_0 = label_propagation.LabelPropagation(alpha=0, kernel='rbf', gamma=0.1) lp_0_y = assert_warns(DeprecationWarning, lp_0.fit, X, y).transduction_ @@ -108,7 +109,8 @@ def test_label_propagation_closed_form(): labelled_idx = (Y[:, (-1,)] == 0).nonzero()[0] clf = label_propagation.LabelPropagation(max_iter=10000, - gamma=0.1).fit(X, y) + gamma=0.1) + clf.fit(X, y) # adopting notation from Zhu et al 2002 T_bar = clf._build_graph() Tuu = T_bar[np.meshgrid(unlabelled_idx, unlabelled_idx, indexing='ij')] @@ -145,3 +147,22 @@ def test_convergence_speed(): # this should converge quickly: assert mdl.n_iter_ < 10 assert_array_equal(mdl.predict(X), [0, 1, 1]) + + +def test_convergence_warning(): + # This is a non-regression test for #5774 + X = np.array([[1., 0.], [0., 1.], [1., 2.5]]) + y = np.array([0, 1, -1]) + mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=1) + assert_warns(ConvergenceWarning, mdl.fit, X, y) + assert_equal(mdl.n_iter_, mdl.max_iter) + + mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=1) + assert_warns(ConvergenceWarning, mdl.fit, X, y) + assert_equal(mdl.n_iter_, mdl.max_iter) + + mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=500) + assert_no_warnings(mdl.fit, X, y) + + mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=500) + assert_no_warnings(mdl.fit, X, y) diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py index 4833042827361..7c6642a504ad1 100644 --- a/sklearn/svm/classes.py +++ b/sklearn/svm/classes.py @@ -108,6 +108,24 @@ class LinearSVC(BaseEstimator, LinearClassifierMixin, intercept_ : array, shape = [1] if n_classes == 2 else [n_classes] Constants in decision function. + Examples + -------- + >>> from sklearn.svm import LinearSVC + >>> from sklearn.datasets import make_classification + >>> X, y = make_classification(n_features=4, random_state=0) + >>> clf = LinearSVC(random_state=0) + >>> clf.fit(X, y) + LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True, + intercept_scaling=1, loss='squared_hinge', max_iter=1000, + multi_class='ovr', penalty='l2', random_state=0, tol=0.0001, + verbose=0) + >>> print(clf.coef_) + [[ 0.08551385 0.39414796 0.49847831 0.37513797]] + >>> print(clf.intercept_) + [ 0.28418066] + >>> print(clf.predict([[0, 0, 0, 0]])) + [1] + Notes ----- The underlying C implementation uses a random number generator to @@ -302,6 +320,22 @@ class LinearSVR(LinearModel, RegressorMixin): intercept_ : array, shape = [1] if n_classes == 2 else [n_classes] Constants in decision function. + Examples + -------- + >>> from sklearn.svm import LinearSVR + >>> from sklearn.datasets import make_regression + >>> X, y = make_regression(n_features=4, random_state=0) + >>> regr = LinearSVR(random_state=0) + >>> regr.fit(X, y) + LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True, + intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000, + random_state=0, tol=0.0001, verbose=0) + >>> print(regr.coef_) + [ 16.35750999 26.91499923 42.30652207 60.47843124] + >>> print(regr.intercept_) + [-4.29756543] + >>> print(regr.predict([[0, 0, 0, 0]])) + [-4.29756543] See also -------- diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index a7a878a73160e..8eb5da1908ba7 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -5,9 +5,11 @@ from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_true +from sklearn.utils.testing import assert_false from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import assert_warns +from sklearn.utils.testing import assert_warns_message from sklearn.utils.testing import assert_greater from sklearn.utils.testing import ignore_warnings @@ -223,6 +225,38 @@ def test_lda_scaling(): 'using covariance: %s' % solver) +def test_lda_store_covariance(): + # Test for slover 'lsqr' and 'eigen' + # 'store_covariance' has no effect on 'lsqr' and 'eigen' solvers + for solver in ('lsqr', 'eigen'): + clf = LinearDiscriminantAnalysis(solver=solver).fit(X6, y6) + assert_true(hasattr(clf, 'covariance_')) + + # Test the actual attribute: + clf = LinearDiscriminantAnalysis(solver=solver, + store_covariance=True).fit(X6, y6) + assert_true(hasattr(clf, 'covariance_')) + + assert_array_almost_equal( + clf.covariance_, + np.array([[0.422222, 0.088889], [0.088889, 0.533333]]) + ) + + # Test for SVD slover, the default is to not set the covariances_ attribute + clf = LinearDiscriminantAnalysis(solver='svd').fit(X6, y6) + assert_false(hasattr(clf, 'covariance_')) + + # Test the actual attribute: + clf = LinearDiscriminantAnalysis(solver=solver, + store_covariance=True).fit(X6, y6) + assert_true(hasattr(clf, 'covariance_')) + + assert_array_almost_equal( + clf.covariance_, + np.array([[0.422222, 0.088889], [0.088889, 0.533333]]) + ) + + def test_qda(): # QDA classification. # This checks that QDA implements fit and predict and returns @@ -262,26 +296,40 @@ def test_qda_priors(): assert_greater(n_pos2, n_pos) -def test_qda_store_covariances(): +def test_qda_store_covariance(): # The default is to not set the covariances_ attribute clf = QuadraticDiscriminantAnalysis().fit(X6, y6) - assert_true(not hasattr(clf, 'covariances_')) + assert_false(hasattr(clf, 'covariance_')) # Test the actual attribute: - clf = QuadraticDiscriminantAnalysis(store_covariances=True).fit(X6, y6) - assert_true(hasattr(clf, 'covariances_')) + clf = QuadraticDiscriminantAnalysis(store_covariance=True).fit(X6, y6) + assert_true(hasattr(clf, 'covariance_')) assert_array_almost_equal( - clf.covariances_[0], + clf.covariance_[0], np.array([[0.7, 0.45], [0.45, 0.7]]) ) assert_array_almost_equal( - clf.covariances_[1], + clf.covariance_[1], np.array([[0.33333333, -0.33333333], [-0.33333333, 0.66666667]]) ) +def test_qda_deprecation(): + # Test the deprecation + clf = QuadraticDiscriminantAnalysis(store_covariances=True) + assert_warns_message(DeprecationWarning, "'store_covariances' was renamed" + " to store_covariance in version 0.19 and will be " + "removed in 0.21.", clf.fit, X, y) + + # check that covariance_ (and covariances_ with warning) is stored + assert_warns_message(DeprecationWarning, "Attribute covariances_ was " + "deprecated in version 0.19 and will be removed " + "in 0.21. Use covariance_ instead", getattr, clf, + 'covariances_') + + def test_qda_regularization(): # the default is reg_param=0. and will cause issues # when there is a constant variable diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py index 7008fff41aaa1..45222a1c12a68 100644 --- a/sklearn/tests/test_multiclass.py +++ b/sklearn/tests/test_multiclass.py @@ -704,6 +704,9 @@ def test_ecoc_float_y(): ovo = OutputCodeClassifier(LinearSVC()) assert_raise_message(ValueError, "Unknown label type", ovo.fit, X, y) + ovo = OutputCodeClassifier(LinearSVC(), code_size=-1) + assert_raise_message(ValueError, "code_size should be greater than 0," + " got -1", ovo.fit, X, y) def test_pairwise_indices(): diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 0c58d04c27581..5d5de53bbde6c 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -29,6 +29,7 @@ from sklearn.multioutput import MultiOutputClassifier from sklearn.multioutput import MultiOutputRegressor from sklearn.svm import LinearSVC +from sklearn.base import ClassifierMixin from sklearn.utils import shuffle @@ -380,6 +381,8 @@ def test_classifier_chain_fit_and_predict_with_logistic_regression(): assert_equal([c.coef_.size for c in classifier_chain.estimators_], list(range(X.shape[1], X.shape[1] + Y.shape[1]))) + assert isinstance(classifier_chain, ClassifierMixin) + def test_classifier_chain_fit_and_predict_with_linear_svc(): # Fit classifier chain and verify predict performance using LinearSVC diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py index 0bf70073d34c7..230c1cc23102d 100644 --- a/sklearn/tree/tests/test_export.py +++ b/sklearn/tree/tests/test_export.py @@ -6,7 +6,7 @@ from numpy.random import RandomState -from sklearn.base import ClassifierMixin +from sklearn.base import is_classifier from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.ensemble import GradientBoostingClassifier from sklearn.tree import export_graphviz @@ -292,7 +292,7 @@ def test_precision(): len(search("\.\d+", finding.group()).group()), precision + 1) # check impurity - if isinstance(clf, ClassifierMixin): + if is_classifier(clf): pattern = "gini = \d+\.\d+" else: pattern = "friedman_mse = \d+\.\d+" diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py index 93db4eb98f34e..789ffb8b61cac 100644 --- a/sklearn/tree/tree.py +++ b/sklearn/tree/tree.py @@ -29,6 +29,7 @@ from ..base import BaseEstimator from ..base import ClassifierMixin from ..base import RegressorMixin +from ..base import is_classifier from ..externals import six from ..utils import check_array from ..utils import check_random_state @@ -123,7 +124,7 @@ def fit(self, X, y, sample_weight=None, check_input=True, # Determine output settings n_samples, self.n_features_ = X.shape - is_classification = isinstance(self, ClassifierMixin) + is_classification = is_classifier(self) y = np.atleast_1d(y) expanded_class_weight = None @@ -413,7 +414,7 @@ def predict(self, X, check_input=True): n_samples = X.shape[0] # Classification - if isinstance(self, ClassifierMixin): + if is_classifier(self): if self.n_outputs_ == 1: return self.classes_.take(np.argmax(proba, axis=1), axis=0) @@ -879,8 +880,11 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin): criterion : string, optional (default="mse") The function to measure the quality of a split. Supported criteria are "mse" for the mean squared error, which is equal to variance - reduction as feature selection criterion, and "mae" for the mean - absolute error. + reduction as feature selection criterion and minimizes the L2 loss + using the mean of each terminal node, "friedman_mse", which uses mean + squared error with Friedman's improvement score for potential splits, + and "mae" for the mean absolute error, which minimizes the L1 loss + using the median of each terminal node. .. versionadded:: 0.18 Mean Absolute Error (MAE) criterion. diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index fc71c387903a3..4b2665cdd4f77 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -135,8 +135,15 @@ def safe_indexing(X, indices): ------- subset Subset of X on first axis + + Notes + ----- + CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are + not supported. """ if hasattr(X, "iloc"): + # Work-around for indexing with read-only indices in pandas + indices = indices if indices.flags.writeable else indices.copy() # Pandas Dataframes and Series try: return X.iloc[indices] diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 4760253a5a43e..ba83535988fad 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -35,8 +35,8 @@ from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -from sklearn.base import (clone, ClassifierMixin, RegressorMixin, - TransformerMixin, ClusterMixin, BaseEstimator) +from sklearn.base import (clone, TransformerMixin, ClusterMixin, + BaseEstimator, is_classifier, is_regressor) from sklearn.metrics import accuracy_score, adjusted_rand_score, f1_score from sklearn.random_projection import BaseRandomProjection @@ -208,10 +208,10 @@ def _yield_clustering_checks(name, clusterer): def _yield_all_checks(name, estimator): for check in _yield_non_meta_checks(name, estimator): yield check - if isinstance(estimator, ClassifierMixin): + if is_classifier(estimator): for check in _yield_classifier_checks(name, estimator): yield check - if isinstance(estimator, RegressorMixin): + if is_regressor(estimator): for check in _yield_regressor_checks(name, estimator): yield check if isinstance(estimator, TransformerMixin): @@ -980,7 +980,7 @@ def check_estimators_partial_fit_n_features(name, estimator_orig): X -= X.min() try: - if isinstance(estimator, ClassifierMixin): + if is_classifier(estimator): classes = np.unique(y) estimator.partial_fit(X, y, classes=classes) else: diff --git a/sklearn/utils/random.py b/sklearn/utils/random.py index 93235f07b467e..044b8c70d8b71 100644 --- a/sklearn/utils/random.py +++ b/sklearn/utils/random.py @@ -184,7 +184,7 @@ def random_choice_csc(n_samples, classes, class_probability=None, random_state=random_state) indices.extend(ind_sample) - # Normalize probabilites for the nonzero elements + # Normalize probabilities for the nonzero elements classes_j_nonzero = classes[j] != 0 class_probability_nz = class_prob_j[classes_j_nonzero] class_probability_nz_norm = (class_probability_nz / diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index 8ac31764e89ad..1b3a1ea7e597a 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -6,7 +6,7 @@ from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.testing import (assert_raises_regex, assert_true, - assert_equal) + assert_equal, ignore_warnings) from sklearn.utils.estimator_checks import check_estimator from sklearn.utils.estimator_checks import set_random_state from sklearn.utils.estimator_checks import set_checking_parameters @@ -203,7 +203,9 @@ def test_check_estimator_clones(): for Estimator in [GaussianMixture, LinearRegression, RandomForestClassifier, NMF, SGDClassifier, MiniBatchKMeans]: - est = Estimator() + with ignore_warnings(category=FutureWarning): + # when 'est = SGDClassifier()' + est = Estimator() set_checking_parameters(est) set_random_state(est) # without fitting @@ -211,7 +213,9 @@ def test_check_estimator_clones(): check_estimator(est) assert_equal(old_hash, joblib.hash(est)) - est = Estimator() + with ignore_warnings(category=FutureWarning): + # when 'est = SGDClassifier()' + est = Estimator() set_checking_parameters(est) set_random_state(est) # with fitting diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py index c0fd079a932fb..fa93bf34fe6bc 100644 --- a/sklearn/utils/tests/test_utils.py +++ b/sklearn/utils/tests/test_utils.py @@ -1,4 +1,4 @@ -from itertools import chain +from itertools import chain, product import warnings import numpy as np @@ -200,10 +200,15 @@ def test_safe_indexing_pandas(): # this happens in joblib memmapping X.setflags(write=False) X_df_readonly = pd.DataFrame(X) - with warnings.catch_warnings(record=True): - X_df_ro_indexed = safe_indexing(X_df_readonly, inds) + inds_readonly = inds.copy() + inds_readonly.setflags(write=False) - assert_array_equal(np.array(X_df_ro_indexed), X_indexed) + for this_df, this_inds in product([X_df, X_df_readonly], + [inds, inds_readonly]): + with warnings.catch_warnings(record=True): + X_df_indexed = safe_indexing(this_df, this_inds) + + assert_array_equal(np.array(X_df_indexed), X_indexed) def test_safe_indexing_mock_pandas():