From f16c987592d01890cf61679f7f323bc15cbd1269 Mon Sep 17 00:00:00 2001 From: Logan Thomas Date: Tue, 12 Apr 2022 03:53:27 -0500 Subject: [PATCH] DOC: various spellcheck and typos (#23093) --- asv_benchmarks/benchmarks/linear_model.py | 2 +- doc/computing/computational_performance.rst | 2 +- doc/conftest.py | 2 +- doc/datasets/sample_generators.rst | 4 ++-- doc/developers/advanced_installation.rst | 4 ++-- doc/developers/contributing.rst | 4 ++-- doc/developers/develop.rst | 2 +- doc/glossary.rst | 2 +- doc/inspection.rst | 10 ++++---- doc/modules/clustering.rst | 2 +- doc/modules/cross_validation.rst | 6 ++--- doc/modules/grid_search.rst | 6 ++--- doc/modules/mixture.rst | 4 ++-- doc/modules/tree.rst | 14 +++++------ doc/related_projects.rst | 24 +++++++++---------- .../plot_calibration_multiclass.py | 2 +- 16 files changed, 45 insertions(+), 45 deletions(-) diff --git a/asv_benchmarks/benchmarks/linear_model.py b/asv_benchmarks/benchmarks/linear_model.py index a533a1a97cfb7..663ceca61d063 100644 --- a/asv_benchmarks/benchmarks/linear_model.py +++ b/asv_benchmarks/benchmarks/linear_model.py @@ -110,7 +110,7 @@ def skip(self, params): class LinearRegressionBenchmark(Predictor, Estimator, Benchmark): """ - Benchmarks for Linear Reagression. + Benchmarks for Linear Regression. """ param_names = ["representation"] diff --git a/doc/computing/computational_performance.rst b/doc/computing/computational_performance.rst index 32a485e21a2a5..ceb0a0af2e66c 100644 --- a/doc/computing/computational_performance.rst +++ b/doc/computing/computational_performance.rst @@ -128,7 +128,7 @@ by quite a bit as only the non zero valued features impact the dot product and thus the model predictions. Hence if you have 100 non zeros in 1e6 dimensional space, you only need 100 multiply and add operation instead of 1e6. -Calculation over a dense representation, however, may leverage highly optimised +Calculation over a dense representation, however, may leverage highly optimized vector operations and multithreading in BLAS, and tends to result in fewer CPU cache misses. So the sparsity should typically be quite high (10% non-zeros max, to be checked depending on the hardware) for the sparse input diff --git a/doc/conftest.py b/doc/conftest.py index 719c46fccf2d2..10253efeabf98 100644 --- a/doc/conftest.py +++ b/doc/conftest.py @@ -109,7 +109,7 @@ def skip_if_matplotlib_not_installed(fname): def pytest_runtest_setup(item): fname = item.fspath.strpath - # normalise filename to use forward slashes on Windows for easier handling + # normalize filename to use forward slashes on Windows for easier handling # later fname = fname.replace(os.sep, "/") diff --git a/doc/datasets/sample_generators.rst b/doc/datasets/sample_generators.rst index deed09f24e5fe..7dc123f08424c 100644 --- a/doc/datasets/sample_generators.rst +++ b/doc/datasets/sample_generators.rst @@ -25,7 +25,7 @@ Both :func:`make_blobs` and :func:`make_classification` create multiclass datasets by allocating each class one or more normally-distributed clusters of points. :func:`make_blobs` provides greater control regarding the centers and standard deviations of each cluster, and is used to demonstrate clustering. -:func:`make_classification` specialises in introducing noise by way of: +:func:`make_classification` specializes in introducing noise by way of: correlated, redundant and uninformative features; multiple Gaussian clusters per class; and linear transformations of the feature space. @@ -41,7 +41,7 @@ near-equal-size classes separated by concentric hyperspheres. :func:`make_circles` and :func:`make_moons` generate 2d binary classification datasets that are challenging to certain algorithms (e.g. centroid-based clustering or linear classification), including optional Gaussian noise. -They are useful for visualisation. :func:`make_circles` produces Gaussian data +They are useful for visualization. :func:`make_circles` produces Gaussian data with a spherical decision boundary for binary classification, while :func:`make_moons` produces two interleaving half circles. diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst index 2e8411e264019..89dc6e5267ded 100644 --- a/doc/developers/advanced_installation.rst +++ b/doc/developers/advanced_installation.rst @@ -145,8 +145,8 @@ Building Scikit-learn also requires: (before cythonization) will force the build to fail if OpenMP is not supported. -Since version 0.21, scikit-learn automatically detects and use the linear -algebrea library used by SciPy **at runtime**. Scikit-learn has therefore no +Since version 0.21, scikit-learn automatically detects and uses the linear +algebra library used by SciPy **at runtime**. Scikit-learn has therefore no build dependency on BLAS/LAPACK implementations such as OpenBlas, Atlas, Blis or MKL. diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst index cfe6fac568970..445a9d2211733 100644 --- a/doc/developers/contributing.rst +++ b/doc/developers/contributing.rst @@ -381,9 +381,9 @@ In order to ease the reviewing process, we recommend that your contribution complies with the following rules before marking a PR as ``[MRG]``. The **bolded** ones are especially important: -1. **Give your pull request a helpful title** that summarises what your +1. **Give your pull request a helpful title** that summarizes what your contribution does. This title will often become the commit message once - merged so it should summarise your contribution for posterity. In some + merged so it should summarize your contribution for posterity. In some cases "Fix " is enough. "Fix #" is never a good title. diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst index 7041f78df81c9..a60d60260b485 100644 --- a/doc/developers/develop.rst +++ b/doc/developers/develop.rst @@ -336,7 +336,7 @@ estimator:: ... self.my_extra_param = my_extra_param The parameter `deep` will control whether or not the parameters of the -`subsestimator` should be reported. Thus when `deep=True`, the output will be:: +`subestimator` should be reported. Thus when `deep=True`, the output will be:: >>> my_estimator = MyEstimator(subestimator=LogisticRegression()) >>> for param, value in my_estimator.get_params(deep=True).items(): diff --git a/doc/glossary.rst b/doc/glossary.rst index 04dcfa606552e..b52dcde382246 100644 --- a/doc/glossary.rst +++ b/doc/glossary.rst @@ -1696,7 +1696,7 @@ See concept :term:`attribute`. predictors. ``coef_`` - The weight/coefficient matrix of a generalised linear model + The weight/coefficient matrix of a generalized linear model :term:`predictor`, of shape ``(n_features,)`` for binary classification and single-output regression, ``(n_classes, n_features)`` for multiclass classification and ``(n_targets, n_features)`` for diff --git a/doc/inspection.rst b/doc/inspection.rst index 72305bec73a10..57c1cfc3275e8 100644 --- a/doc/inspection.rst +++ b/doc/inspection.rst @@ -10,14 +10,14 @@ Inspection ---------- Predictive performance is often the main goal of developing machine learning -models. Yet summarising performance with an evaluation metric is often +models. Yet summarizing performance with an evaluation metric is often insufficient: it assumes that the evaluation metric and test dataset perfectly reflect the target domain, which is rarely true. In certain domains, a model needs a certain level of interpretability before it can be deployed. -A model that is exhibiting performance issues needs to be debugged for one to -understand the model's underlying issue. The -:mod:`sklearn.inspection` module provides tools to help understand the -predictions from a model and what affects them. This can be used to +A model that is exhibiting performance issues needs to be debugged for one to +understand the model's underlying issue. The +:mod:`sklearn.inspection` module provides tools to help understand the +predictions from a model and what affects them. This can be used to evaluate assumptions and biases of a model, design a better model, or to diagnose issues with model performance. diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index 1775ec5386ab5..48b0785c8544c 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -451,7 +451,7 @@ to be specified in advance. It works well for a small number of clusters, but is not advised for many clusters. For two clusters, SpectralClustering solves a convex relaxation of the -`normalised cuts `_ +`normalized cuts `_ problem on the similarity graph: cutting the graph in two so that the weight of the edges cut is small compared to the weights of the edges inside each cluster. This criteria is especially interesting when working on images, where diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index 6705a3da4a9b0..3ecab9bd8eb04 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -278,7 +278,7 @@ can be used (otherwise, an exception is raised). over cross-validation folds, whereas :func:`cross_val_predict` simply returns the labels (or probabilities) from several distinct models undistinguished. Thus, :func:`cross_val_predict` is not an appropriate - measure of generalisation error. + measure of generalization error. The function :func:`cross_val_predict` is appropriate for: @@ -844,13 +844,13 @@ cross-validation splitter. For example:: Cross validation of time series data ------------------------------------ -Time series data is characterised by the correlation between observations +Time series data is characterized by the correlation between observations that are near in time (*autocorrelation*). However, classical cross-validation techniques such as :class:`KFold` and :class:`ShuffleSplit` assume the samples are independent and identically distributed, and would result in unreasonable correlation between training and testing instances (yielding poor estimates of -generalisation error) on time series data. Therefore, it is very important +generalization error) on time series data. Therefore, it is very important to evaluate our model for time series data on the "future" observations least like those that are used to train the model. To achieve this, one solution is provided by :class:`TimeSeriesSplit`. diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst index 19910d5a16da3..e92099a9833e2 100644 --- a/doc/modules/grid_search.rst +++ b/doc/modules/grid_search.rst @@ -108,7 +108,7 @@ Randomized Parameter Optimization ================================= While using a grid of parameter settings is currently the most widely used method for parameter optimization, other search methods have more -favourable properties. +favorable properties. :class:`RandomizedSearchCV` implements a randomized search over parameters, where each setting is sampled from a distribution over possible parameter values. This has two main benefits over an exhaustive search: @@ -495,10 +495,10 @@ min_resources = 20``. .. _successive_halving_cv_results: -Analysing results with the `cv_results_` attribute +Analyzing results with the `cv_results_` attribute -------------------------------------------------- -The ``cv_results_`` attribute contains useful information for analysing the +The ``cv_results_`` attribute contains useful information for analyzing the results of a search. It can be converted to a pandas dataframe with ``df = pd.DataFrame(est.cv_results_)``. The ``cv_results_`` attribute of :class:`HalvingGridSearchCV` and :class:`HalvingRandomSearchCV` is similar diff --git a/doc/modules/mixture.rst b/doc/modules/mixture.rst index 114b5ea3e8eb3..2037f15fe3ee8 100644 --- a/doc/modules/mixture.rst +++ b/doc/modules/mixture.rst @@ -139,7 +139,7 @@ Choice of the Initialization Method ----------------------------------- There is a choice of four initialization methods (as well as inputting user defined -initial means) to generate the initial centers for the model components: +initial means) to generate the initial centers for the model components: k-means (default) This applies a traditional k-means clustering algorithm. @@ -159,7 +159,7 @@ random_from_data results if the chosen points are too close to each other. random - Centers are chosen as a small pertubation away from the mean of all data. + Centers are chosen as a small perturbation away from the mean of all data. This method is simple but can lead to the model taking longer to converge. .. figure:: ../auto_examples/mixture/images/sphx_glr_plot_gmm_init_001.png diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst index 4284f27beb4cc..90c16d2ac056f 100644 --- a/doc/modules/tree.rst +++ b/doc/modules/tree.rst @@ -23,10 +23,10 @@ the tree, the more complex the decision rules and the fitter the model. Some advantages of decision trees are: - - Simple to understand and to interpret. Trees can be visualised. + - Simple to understand and to interpret. Trees can be visualized. - Requires little data preparation. Other techniques often require data - normalisation, dummy variables need to be created and blank values to + normalization, dummy variables need to be created and blank values to be removed. Note however that this module does not support missing values. @@ -35,7 +35,7 @@ Some advantages of decision trees are: - Able to handle both numerical and categorical data. However scikit-learn implementation does not support categorical variables for now. Other - techniques are usually specialised in analysing datasets that have only one type + techniques are usually specialized in analyzing datasets that have only one type of variable. See :ref:`algorithms ` for more information. @@ -56,7 +56,7 @@ Some advantages of decision trees are: The disadvantages of decision trees include: - Decision-tree learners can create over-complex trees that do not - generalise the data well. This is called overfitting. Mechanisms + generalize the data well. This is called overfitting. Mechanisms such as pruning, setting the minimum number of samples required at a leaf node or setting the maximum depth of the tree are necessary to avoid this problem. @@ -345,7 +345,7 @@ Tips on practical use in gaining more insights about how the decision tree makes predictions, which is important for understanding the important features in the data. - * Visualise your tree as you are training by using the ``export`` + * Visualize your tree as you are training by using the ``export`` function. Use ``max_depth=3`` as an initial tree depth to get a feel for how the tree is fitting to your data, and then increase the depth. @@ -407,7 +407,7 @@ The algorithm creates a multiway tree, finding for each node (i.e. in a greedy manner) the categorical feature that will yield the largest information gain for categorical targets. Trees are grown to their maximum size and then a pruning step is usually applied to improve the -ability of the tree to generalise to unseen data. +ability of the tree to generalize to unseen data. C4.5 is the successor to ID3 and removed the restriction that features must be categorical by dynamically defining a discrete attribute (based @@ -427,7 +427,7 @@ it differs in that it supports numerical target variables (regression) and does not compute rule sets. CART constructs binary trees using the feature and threshold that yield the largest information gain at each node. -scikit-learn uses an optimised version of the CART algorithm; however, scikit-learn +scikit-learn uses an optimized version of the CART algorithm; however, scikit-learn implementation does not support categorical variables for now. .. _ID3: https://en.wikipedia.org/wiki/ID3_algorithm diff --git a/doc/related_projects.rst b/doc/related_projects.rst index 24bfd47781b6f..0f5532bd52357 100644 --- a/doc/related_projects.rst +++ b/doc/related_projects.rst @@ -7,7 +7,7 @@ Related Projects Projects implementing the scikit-learn estimator API are encouraged to use the `scikit-learn-contrib template `_ which facilitates best practices for testing and documenting estimators. -The `scikit-learn-contrib GitHub organisation `_ +The `scikit-learn-contrib GitHub organization `_ also accepts high-quality contributions of repositories conforming to this template. @@ -45,10 +45,10 @@ enhance the functionality of scikit-learn's estimators. operators to design a machine learning pipeline, including data and feature preprocessors as well as the estimators. Works as a drop-in replacement for a scikit-learn estimator. - + - `Featuretools `_ - A framework to perform automated feature engineering. It can be used for - transforming temporal and relational datasets into feature matrices for + A framework to perform automated feature engineering. It can be used for + transforming temporal and relational datasets into feature matrices for machine learning. - `Neuraxle `_ @@ -66,9 +66,9 @@ enhance the functionality of scikit-learn's estimators. **Experimentation frameworks** -- `Neptune `_ Metadata store for MLOps, - built for teams that run a lot of experiments.‌ It gives you a single - place to log, store, display, organize, compare, and query all your +- `Neptune `_ Metadata store for MLOps, + built for teams that run a lot of experiments.‌ It gives you a single + place to log, store, display, organize, compare, and query all your model building metadata. - `Sacred `_ Tool to help you configure, @@ -82,7 +82,7 @@ enhance the functionality of scikit-learn's estimators. wrapper around scikit-learn that makes it easy to run machine learning experiments with multiple learners and large feature sets. -**Model inspection and visualisation** +**Model inspection and visualization** - `dtreeviz `_ A python library for decision tree visualization and model interpretation. @@ -143,7 +143,7 @@ and tasks. **Structured learning** -- `tslearn `_ A machine learning library for time series +- `tslearn `_ A machine learning library for time series that offers tools for pre-processing and feature extraction as well as dedicated models for clustering, classification and regression. - `sktime `_ A scikit-learn compatible toolbox for machine learning with time series including time series classification/regression and (supervised/panel) forecasting. @@ -182,7 +182,7 @@ and tasks. **Federated Learning** -- `Flower `_ A friendly federated learning framework with a +- `Flower `_ A friendly federated learning framework with a unified approach that can federate any workload, any ML framework, and any programming language. **Broad scope** @@ -190,7 +190,7 @@ and tasks. - `mlxtend `_ Includes a number of additional estimators as well as model visualization utilities. -- `scikit-lego `_ A number of scikit-learn compatible +- `scikit-lego `_ A number of scikit-learn compatible custom transformers, models and metrics, focusing on solving practical industry tasks. **Other regression and classification** @@ -354,7 +354,7 @@ and promote community efforts. (`source `__) - `Spanish translation `_ (`source `__) - + .. rubric:: Footnotes diff --git a/examples/calibration/plot_calibration_multiclass.py b/examples/calibration/plot_calibration_multiclass.py index 223a7525c09c2..24962a786ea03 100644 --- a/examples/calibration/plot_calibration_multiclass.py +++ b/examples/calibration/plot_calibration_multiclass.py @@ -198,7 +198,7 @@ class of an instance (red: class 1, green: class 2, blue: class 3). # the true class is 'green') generally point towards the green vertex. This # results in fewer over-confident, 0 predicted probabilities and at the same # time an increase in the predicted probabilities of the correct class. -# Thus, the calibrated classifier produces more accurate predicted probablities +# Thus, the calibrated classifier produces more accurate predicted probabilities # that incur a lower :ref:`log loss ` # # We can show this objectively by comparing the :ref:`log loss ` of