From f16c987592d01890cf61679f7f323bc15cbd1269 Mon Sep 17 00:00:00 2001
From: Logan Thomas <logan.thomas005@gmail.com>
Date: Tue, 12 Apr 2022 03:53:27 -0500
Subject: [PATCH] DOC: various spellcheck and typos (#23093)

---
 asv_benchmarks/benchmarks/linear_model.py     |  2 +-
 doc/computing/computational_performance.rst   |  2 +-
 doc/conftest.py                               |  2 +-
 doc/datasets/sample_generators.rst            |  4 ++--
 doc/developers/advanced_installation.rst      |  4 ++--
 doc/developers/contributing.rst               |  4 ++--
 doc/developers/develop.rst                    |  2 +-
 doc/glossary.rst                              |  2 +-
 doc/inspection.rst                            | 10 ++++----
 doc/modules/clustering.rst                    |  2 +-
 doc/modules/cross_validation.rst              |  6 ++---
 doc/modules/grid_search.rst                   |  6 ++---
 doc/modules/mixture.rst                       |  4 ++--
 doc/modules/tree.rst                          | 14 +++++------
 doc/related_projects.rst                      | 24 +++++++++----------
 .../plot_calibration_multiclass.py            |  2 +-
 16 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/asv_benchmarks/benchmarks/linear_model.py b/asv_benchmarks/benchmarks/linear_model.py
index a533a1a97cfb7..663ceca61d063 100644
--- a/asv_benchmarks/benchmarks/linear_model.py
+++ b/asv_benchmarks/benchmarks/linear_model.py
@@ -110,7 +110,7 @@ def skip(self, params):
 
 class LinearRegressionBenchmark(Predictor, Estimator, Benchmark):
     """
-    Benchmarks for Linear Reagression.
+    Benchmarks for Linear Regression.
     """
 
     param_names = ["representation"]
diff --git a/doc/computing/computational_performance.rst b/doc/computing/computational_performance.rst
index 32a485e21a2a5..ceb0a0af2e66c 100644
--- a/doc/computing/computational_performance.rst
+++ b/doc/computing/computational_performance.rst
@@ -128,7 +128,7 @@ by quite a bit as only the non zero valued features impact the dot product
 and thus the model predictions. Hence if you have 100 non zeros in 1e6
 dimensional space, you only need 100 multiply and add operation instead of 1e6.
 
-Calculation over a dense representation, however, may leverage highly optimised
+Calculation over a dense representation, however, may leverage highly optimized
 vector operations and multithreading in BLAS, and tends to result in fewer CPU
 cache misses. So the sparsity should typically be quite high (10% non-zeros
 max, to be checked depending on the hardware) for the sparse input
diff --git a/doc/conftest.py b/doc/conftest.py
index 719c46fccf2d2..10253efeabf98 100644
--- a/doc/conftest.py
+++ b/doc/conftest.py
@@ -109,7 +109,7 @@ def skip_if_matplotlib_not_installed(fname):
 
 def pytest_runtest_setup(item):
     fname = item.fspath.strpath
-    # normalise filename to use forward slashes on Windows for easier handling
+    # normalize filename to use forward slashes on Windows for easier handling
     # later
     fname = fname.replace(os.sep, "/")
 
diff --git a/doc/datasets/sample_generators.rst b/doc/datasets/sample_generators.rst
index deed09f24e5fe..7dc123f08424c 100644
--- a/doc/datasets/sample_generators.rst
+++ b/doc/datasets/sample_generators.rst
@@ -25,7 +25,7 @@ Both :func:`make_blobs` and :func:`make_classification` create multiclass
 datasets by allocating each class one or more normally-distributed clusters of
 points.  :func:`make_blobs` provides greater control regarding the centers and
 standard deviations of each cluster, and is used to demonstrate clustering.
-:func:`make_classification` specialises in introducing noise by way of:
+:func:`make_classification` specializes in introducing noise by way of:
 correlated, redundant and uninformative features; multiple Gaussian clusters
 per class; and linear transformations of the feature space.
 
@@ -41,7 +41,7 @@ near-equal-size classes separated by concentric hyperspheres.
 :func:`make_circles` and :func:`make_moons` generate 2d binary classification
 datasets that are challenging to certain algorithms (e.g. centroid-based
 clustering or linear classification), including optional Gaussian noise.
-They are useful for visualisation. :func:`make_circles` produces Gaussian data
+They are useful for visualization. :func:`make_circles` produces Gaussian data
 with a spherical decision boundary for binary classification, while
 :func:`make_moons` produces two interleaving half circles.
 
diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst
index 2e8411e264019..89dc6e5267ded 100644
--- a/doc/developers/advanced_installation.rst
+++ b/doc/developers/advanced_installation.rst
@@ -145,8 +145,8 @@ Building Scikit-learn also requires:
    (before cythonization) will force the build to fail if OpenMP is not
    supported.
 
-Since version 0.21, scikit-learn automatically detects and use the linear
-algebrea library used by SciPy **at runtime**. Scikit-learn has therefore no
+Since version 0.21, scikit-learn automatically detects and uses the linear
+algebra library used by SciPy **at runtime**. Scikit-learn has therefore no
 build dependency on BLAS/LAPACK implementations such as OpenBlas, Atlas, Blis
 or MKL.
 
diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
index cfe6fac568970..445a9d2211733 100644
--- a/doc/developers/contributing.rst
+++ b/doc/developers/contributing.rst
@@ -381,9 +381,9 @@ In order to ease the reviewing process, we recommend that your contribution
 complies with the following rules before marking a PR as ``[MRG]``. The
 **bolded** ones are especially important:
 
-1. **Give your pull request a helpful title** that summarises what your
+1. **Give your pull request a helpful title** that summarizes what your
    contribution does. This title will often become the commit message once
-   merged so it should summarise your contribution for posterity. In some
+   merged so it should summarize your contribution for posterity. In some
    cases "Fix <ISSUE TITLE>" is enough. "Fix #<ISSUE NUMBER>" is never a
    good title.
 
diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst
index 7041f78df81c9..a60d60260b485 100644
--- a/doc/developers/develop.rst
+++ b/doc/developers/develop.rst
@@ -336,7 +336,7 @@ estimator::
     ...         self.my_extra_param = my_extra_param
 
 The parameter `deep` will control whether or not the parameters of the
-`subsestimator` should be reported. Thus when `deep=True`, the output will be::
+`subestimator` should be reported. Thus when `deep=True`, the output will be::
 
     >>> my_estimator = MyEstimator(subestimator=LogisticRegression())
     >>> for param, value in my_estimator.get_params(deep=True).items():
diff --git a/doc/glossary.rst b/doc/glossary.rst
index 04dcfa606552e..b52dcde382246 100644
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -1696,7 +1696,7 @@ See concept :term:`attribute`.
         predictors.
 
     ``coef_``
-        The weight/coefficient matrix of a generalised linear model
+        The weight/coefficient matrix of a generalized linear model
         :term:`predictor`, of shape ``(n_features,)`` for binary classification
         and single-output regression, ``(n_classes, n_features)`` for
         multiclass classification and ``(n_targets, n_features)`` for
diff --git a/doc/inspection.rst b/doc/inspection.rst
index 72305bec73a10..57c1cfc3275e8 100644
--- a/doc/inspection.rst
+++ b/doc/inspection.rst
@@ -10,14 +10,14 @@ Inspection
 ----------
 
 Predictive performance is often the main goal of developing machine learning
-models. Yet summarising performance with an evaluation metric is often
+models. Yet summarizing performance with an evaluation metric is often
 insufficient: it assumes that the evaluation metric and test dataset
 perfectly reflect the target domain, which is rarely true. In certain domains,
 a model needs a certain level of interpretability before it can be deployed.
-A model that is exhibiting performance issues needs to be debugged for one to 
-understand the model's underlying issue. The 
-:mod:`sklearn.inspection` module provides tools to help understand the 
-predictions from a model and what affects them. This can be used to 
+A model that is exhibiting performance issues needs to be debugged for one to
+understand the model's underlying issue. The
+:mod:`sklearn.inspection` module provides tools to help understand the
+predictions from a model and what affects them. This can be used to
 evaluate assumptions and biases of a model, design a better model, or
 to diagnose issues with model performance.
 
diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
index 1775ec5386ab5..48b0785c8544c 100644
--- a/doc/modules/clustering.rst
+++ b/doc/modules/clustering.rst
@@ -451,7 +451,7 @@ to be specified in advance. It works well for a small number of clusters,
 but is not advised for many clusters.
 
 For two clusters, SpectralClustering solves a convex relaxation of the
-`normalised cuts <https://people.eecs.berkeley.edu/~malik/papers/SM-ncut.pdf>`_
+`normalized cuts <https://people.eecs.berkeley.edu/~malik/papers/SM-ncut.pdf>`_
 problem on the similarity graph: cutting the graph in two so that the weight of
 the edges cut is small compared to the weights of the edges inside each
 cluster. This criteria is especially interesting when working on images, where
diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index 6705a3da4a9b0..3ecab9bd8eb04 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -278,7 +278,7 @@ can be used (otherwise, an exception is raised).
     over cross-validation folds, whereas :func:`cross_val_predict` simply
     returns the labels (or probabilities) from several distinct models
     undistinguished. Thus, :func:`cross_val_predict` is not an appropriate
-    measure of generalisation error.
+    measure of generalization error.
 
 
 The function :func:`cross_val_predict` is appropriate for:
@@ -844,13 +844,13 @@ cross-validation splitter. For example::
 Cross validation of time series data
 ------------------------------------
 
-Time series data is characterised by the correlation between observations
+Time series data is characterized by the correlation between observations
 that are near in time (*autocorrelation*). However, classical
 cross-validation techniques such as :class:`KFold` and
 :class:`ShuffleSplit` assume the samples are independent and
 identically distributed, and would result in unreasonable correlation
 between training and testing instances (yielding poor estimates of
-generalisation error) on time series data. Therefore, it is very important
+generalization error) on time series data. Therefore, it is very important
 to evaluate our model for time series data on the "future" observations
 least like those that are used to train the model. To achieve this, one
 solution is provided by :class:`TimeSeriesSplit`.
diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst
index 19910d5a16da3..e92099a9833e2 100644
--- a/doc/modules/grid_search.rst
+++ b/doc/modules/grid_search.rst
@@ -108,7 +108,7 @@ Randomized Parameter Optimization
 =================================
 While using a grid of parameter settings is currently the most widely used
 method for parameter optimization, other search methods have more
-favourable properties.
+favorable properties.
 :class:`RandomizedSearchCV` implements a randomized search over parameters,
 where each setting is sampled from a distribution over possible parameter values.
 This has two main benefits over an exhaustive search:
@@ -495,10 +495,10 @@ min_resources = 20``.
 
 .. _successive_halving_cv_results:
 
-Analysing results with the `cv_results_` attribute
+Analyzing results with the `cv_results_` attribute
 --------------------------------------------------
 
-The ``cv_results_`` attribute contains useful information for analysing the
+The ``cv_results_`` attribute contains useful information for analyzing the
 results of a search. It can be converted to a pandas dataframe with ``df =
 pd.DataFrame(est.cv_results_)``. The ``cv_results_`` attribute of
 :class:`HalvingGridSearchCV` and :class:`HalvingRandomSearchCV` is similar
diff --git a/doc/modules/mixture.rst b/doc/modules/mixture.rst
index 114b5ea3e8eb3..2037f15fe3ee8 100644
--- a/doc/modules/mixture.rst
+++ b/doc/modules/mixture.rst
@@ -139,7 +139,7 @@ Choice of the Initialization Method
 -----------------------------------
 
 There is a choice of four initialization methods (as well as inputting user defined
-initial means) to generate the initial centers for the model components: 
+initial means) to generate the initial centers for the model components:
 
 k-means (default)
   This applies a traditional k-means clustering algorithm.
@@ -159,7 +159,7 @@ random_from_data
   results if the chosen points are too close to each other.
 
 random
-  Centers are chosen as a small pertubation away from the mean of all data.
+  Centers are chosen as a small perturbation away from the mean of all data.
   This method is simple but can lead to the model taking longer to converge.
 
 .. figure:: ../auto_examples/mixture/images/sphx_glr_plot_gmm_init_001.png
diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst
index 4284f27beb4cc..90c16d2ac056f 100644
--- a/doc/modules/tree.rst
+++ b/doc/modules/tree.rst
@@ -23,10 +23,10 @@ the tree, the more complex the decision rules and the fitter the model.
 
 Some advantages of decision trees are:
 
-    - Simple to understand and to interpret. Trees can be visualised.
+    - Simple to understand and to interpret. Trees can be visualized.
 
     - Requires little data preparation. Other techniques often require data
-      normalisation, dummy variables need to be created and blank values to
+      normalization, dummy variables need to be created and blank values to
       be removed. Note however that this module does not support missing
       values.
 
@@ -35,7 +35,7 @@ Some advantages of decision trees are:
 
     - Able to handle both numerical and categorical data. However scikit-learn
       implementation does not support categorical variables for now. Other
-      techniques are usually specialised in analysing datasets that have only one type
+      techniques are usually specialized in analyzing datasets that have only one type
       of variable. See :ref:`algorithms <tree_algorithms>` for more
       information.
 
@@ -56,7 +56,7 @@ Some advantages of decision trees are:
 The disadvantages of decision trees include:
 
     - Decision-tree learners can create over-complex trees that do not
-      generalise the data well. This is called overfitting. Mechanisms
+      generalize the data well. This is called overfitting. Mechanisms
       such as pruning, setting the minimum number of samples required
       at a leaf node or setting the maximum depth of the tree are
       necessary to avoid this problem.
@@ -345,7 +345,7 @@ Tips on practical use
     in gaining more insights about how the decision tree makes predictions, which is
     important for understanding the important features in the data.
 
-  * Visualise your tree as you are training by using the ``export``
+  * Visualize your tree as you are training by using the ``export``
     function.  Use ``max_depth=3`` as an initial tree depth to get a feel for
     how the tree is fitting to your data, and then increase the depth.
 
@@ -407,7 +407,7 @@ The algorithm creates a multiway tree, finding for each node (i.e. in
 a greedy manner) the categorical feature that will yield the largest
 information gain for categorical targets. Trees are grown to their
 maximum size and then a pruning step is usually applied to improve the
-ability of the tree to generalise to unseen data.
+ability of the tree to generalize to unseen data.
 
 C4.5 is the successor to ID3 and removed the restriction that features
 must be categorical by dynamically defining a discrete attribute (based
@@ -427,7 +427,7 @@ it differs in that it supports numerical target variables (regression) and
 does not compute rule sets. CART constructs binary trees using the feature
 and threshold that yield the largest information gain at each node.
 
-scikit-learn uses an optimised version of the CART algorithm; however, scikit-learn
+scikit-learn uses an optimized version of the CART algorithm; however, scikit-learn
 implementation does not support categorical variables for now.
 
 .. _ID3: https://en.wikipedia.org/wiki/ID3_algorithm
diff --git a/doc/related_projects.rst b/doc/related_projects.rst
index 24bfd47781b6f..0f5532bd52357 100644
--- a/doc/related_projects.rst
+++ b/doc/related_projects.rst
@@ -7,7 +7,7 @@ Related Projects
 Projects implementing the scikit-learn estimator API are encouraged to use
 the `scikit-learn-contrib template <https://github.com/scikit-learn-contrib/project-template>`_
 which facilitates best practices for testing and documenting estimators.
-The `scikit-learn-contrib GitHub organisation <https://github.com/scikit-learn-contrib/scikit-learn-contrib>`_
+The `scikit-learn-contrib GitHub organization <https://github.com/scikit-learn-contrib/scikit-learn-contrib>`_
 also accepts high-quality contributions of repositories conforming to this
 template.
 
@@ -45,10 +45,10 @@ enhance the functionality of scikit-learn's estimators.
   operators to design a machine learning pipeline, including data and feature
   preprocessors as well as the estimators. Works as a drop-in replacement for a
   scikit-learn estimator.
-  
+
 - `Featuretools <https://github.com/alteryx/featuretools>`_
-  A framework to perform automated feature engineering. It can be used for 
-  transforming temporal and relational datasets into feature matrices for 
+  A framework to perform automated feature engineering. It can be used for
+  transforming temporal and relational datasets into feature matrices for
   machine learning.
 
 - `Neuraxle <https://github.com/Neuraxio/Neuraxle>`_
@@ -66,9 +66,9 @@ enhance the functionality of scikit-learn's estimators.
 
 **Experimentation frameworks**
 
-- `Neptune <https://neptune.ai/>`_ Metadata store for MLOps, 
-  built for teams that run a lot of experiments.‌ It gives you a single 
-  place to log, store, display, organize, compare, and query all your 
+- `Neptune <https://neptune.ai/>`_ Metadata store for MLOps,
+  built for teams that run a lot of experiments.‌ It gives you a single
+  place to log, store, display, organize, compare, and query all your
   model building metadata.
 
 - `Sacred <https://github.com/IDSIA/Sacred>`_ Tool to help you configure,
@@ -82,7 +82,7 @@ enhance the functionality of scikit-learn's estimators.
   wrapper around scikit-learn that makes it easy to run machine learning
   experiments with multiple learners and large feature sets.
 
-**Model inspection and visualisation**
+**Model inspection and visualization**
 
 - `dtreeviz <https://github.com/parrt/dtreeviz/>`_ A python library for
   decision tree visualization and model interpretation.
@@ -143,7 +143,7 @@ and tasks.
 
 **Structured learning**
 
-- `tslearn <https://github.com/tslearn-team/tslearn>`_ A machine learning library for time series 
+- `tslearn <https://github.com/tslearn-team/tslearn>`_ A machine learning library for time series
   that offers tools for pre-processing and feature extraction as well as dedicated models for clustering, classification and regression.
 
 - `sktime <https://github.com/alan-turing-institute/sktime>`_ A scikit-learn compatible toolbox for machine learning with time series including time series classification/regression and (supervised/panel) forecasting.
@@ -182,7 +182,7 @@ and tasks.
 
 **Federated Learning**
 
-- `Flower <https://flower.dev/>`_ A friendly federated learning framework with a 
+- `Flower <https://flower.dev/>`_ A friendly federated learning framework with a
   unified approach that can federate any workload, any ML framework, and any programming language.
 
 **Broad scope**
@@ -190,7 +190,7 @@ and tasks.
 - `mlxtend <https://github.com/rasbt/mlxtend>`_ Includes a number of additional
   estimators as well as model visualization utilities.
 
-- `scikit-lego <https://github.com/koaning/scikit-lego>`_ A number of scikit-learn compatible 
+- `scikit-lego <https://github.com/koaning/scikit-lego>`_ A number of scikit-learn compatible
   custom transformers, models and metrics, focusing on solving practical industry tasks.
 
 **Other regression and classification**
@@ -354,7 +354,7 @@ and promote community efforts.
   (`source <https://github.com/mehrdad-dev/scikit-learn>`__)
 - `Spanish translation <https://qu4nt.github.io/sklearn-doc-es/>`_
   (`source <https://github.com/qu4nt/sklearn-doc-es>`__)
-  
+
 
 .. rubric:: Footnotes
 
diff --git a/examples/calibration/plot_calibration_multiclass.py b/examples/calibration/plot_calibration_multiclass.py
index 223a7525c09c2..24962a786ea03 100644
--- a/examples/calibration/plot_calibration_multiclass.py
+++ b/examples/calibration/plot_calibration_multiclass.py
@@ -198,7 +198,7 @@ class of an instance (red: class 1, green: class 2, blue: class 3).
 # the true class is 'green') generally point towards the green vertex. This
 # results in fewer over-confident, 0 predicted probabilities and at the same
 # time an increase in the predicted probabilities of the correct class.
-# Thus, the calibrated classifier produces more accurate predicted probablities
+# Thus, the calibrated classifier produces more accurate predicted probabilities
 # that incur a lower :ref:`log loss <log_loss>`
 #
 # We can show this objectively by comparing the :ref:`log loss <log_loss>` of