From 10e22fd8a474f301fb08690efdcecf85aa131f17 Mon Sep 17 00:00:00 2001
From: Alexander Held <45009355+alexander-held@users.noreply.github.com>
Date: Fri, 15 Oct 2021 21:49:18 +0200
Subject: [PATCH] docs: Add and apply codespell as a pre-commit hook (#1645)

* Add and apply codespell pre-commit hook
   - c.f. https://github.com/codespell-project/codespell/
* Configure codespell to run only over *.py, *.md, *.rst files
* Ignore the HEP specific terms "hist" and "gaus"
---
 .github/ISSUE_TEMPLATE/bug-report.yml                |  4 ++--
 .github/ISSUE_TEMPLATE/config.yml                    |  2 +-
 .pre-commit-config.yaml                              |  7 +++++++
 docs/examples/notebooks/Recast.ipynb                 |  2 +-
 .../notebooks/learn/TensorizingInterpolations.ipynb  |  2 +-
 docs/faq.rst                                         |  4 ++--
 docs/governance/ROADMAP.rst                          |  4 ++--
 src/pyhf/constraints.py                              |  2 +-
 src/pyhf/infer/calculators.py                        |  2 +-
 src/pyhf/infer/mle.py                                |  2 +-
 src/pyhf/infer/test_statistics.py                    | 12 ++++++------
 src/pyhf/modifiers/shapefactor.py                    |  2 +-
 src/pyhf/optimize/opt_jax.py                         |  4 ++--
 src/pyhf/pdf.py                                      |  2 +-
 src/pyhf/tensor/jax_backend.py                       |  8 ++++----
 src/pyhf/tensor/numpy_backend.py                     |  8 ++++----
 src/pyhf/tensor/pytorch_backend.py                   |  6 +++---
 src/pyhf/tensor/tensorflow_backend.py                |  6 +++---
 tests/test_infer.py                                  | 10 +++++-----
 19 files changed, 48 insertions(+), 41 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index fff0bdc5aa..ae485e863f 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -116,7 +116,7 @@ body:
     label: Actual Results
     description: >-
       Paste verbatim program or command output.
-      Don't wrap it with tripple backticks &mdash; your whole input will be
+      Don't wrap it with triple backticks &mdash; your whole input will be
       turned into a code snippet automatically.
     render: console
   validations:
@@ -127,7 +127,7 @@ body:
     label: pyhf Version
     description: >-
       Paste verbatim output from `pyhf --version` below, under the prompt line.
-      Don't wrap it with tripple backticks &mdash; your whole input will be
+      Don't wrap it with triple backticks &mdash; your whole input will be
       turned into a code snippet automatically.
     render: console
     placeholder: |
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index 6521ee043b..b8743e39c0 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,7 +1,7 @@
 # Ref: https://help.github.com/en/github/building-a-strong-community/configuring-issue-templates-for-your-repository#configuring-the-template-chooser
 blank_issues_enabled: true
 contact_links:
-- name: 🙋  Useage Questions
+- name: 🙋  Usage Questions
   url: https://github.com/scikit-hep/pyhf/discussions
   about: |
     Use pyhf's GitHub Discussions to ask "How do I do X with pyhf?".
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2224bdb720..331e5fd29d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -57,3 +57,10 @@ repos:
     hooks:
     - id: nbqa-pyupgrade
       additional_dependencies: [pyupgrade==2.29.0]
+
+-   repo: https://github.com/codespell-project/codespell
+    rev: v2.1.0
+    hooks:
+    - id: codespell
+      files: ^.*\.(py|md|rst)$
+      args: ["-w", "-L", "hist,gaus"]
diff --git a/docs/examples/notebooks/Recast.ipynb b/docs/examples/notebooks/Recast.ipynb
index 18fefd4037..4655a7a473 100644
--- a/docs/examples/notebooks/Recast.ipynb
+++ b/docs/examples/notebooks/Recast.ipynb
@@ -68,7 +68,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### The originial statistical Model\n"
+    "### The original statistical Model\n"
    ]
   },
   {
diff --git a/docs/examples/notebooks/learn/TensorizingInterpolations.ipynb b/docs/examples/notebooks/learn/TensorizingInterpolations.ipynb
index e7028c6b31..4cc927e93e 100644
--- a/docs/examples/notebooks/learn/TensorizingInterpolations.ipynb
+++ b/docs/examples/notebooks/learn/TensorizingInterpolations.ipynb
@@ -1184,7 +1184,7 @@
     "        all_histo_deltas_dn = allset_all_histo_deltas_dn[nset]\n",
     "\n",
     "        for nh, histo in enumerate(histoset):\n",
-    "            # bases and exponents need to have an outer product, to esentially tile or repeat over rows/cols\n",
+    "            # bases and exponents need to have an outer product, to essentially tile or repeat over rows/cols\n",
     "            bases_up = np.einsum(\n",
     "                'a,b->ab', np.ones(alphaset.shape), all_histo_deltas_up[nh]\n",
     "            )\n",
diff --git a/docs/faq.rst b/docs/faq.rst
index 136ce16ad3..79a79d4f2c 100644
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -110,10 +110,10 @@ Kyle Cranmer (co-author of :math:`\HiFa{}`) to study if the graph structure and
 differentiation abilities of machine learning frameworks would allow them to be effective
 tools for statistical fits.
 Lukas would give helpful friendly advice on Matthew's project and one night [1]_ over dinner
-in CERN's R1 cafeteria the two were discussing the idea of implimenting :math:`\HiFa{}`
+in CERN's R1 cafeteria the two were discussing the idea of implementing :math:`\HiFa{}`
 in Python using machine learning libraries to drive the computation.
 Continuing the discussion in Lukas's office, Lukas showed Matthew that the core statistical
-machinery could be implimented rather succinctly, and that night
+machinery could be implemented rather succinctly, and that night
 `proceeded to do so <https://github.com/scikit-hep/pyhf/commit/fd32503fb760f070a4047cb867757458b1687599>`_
 and |dubbed the project pyhf|_.
 
diff --git a/docs/governance/ROADMAP.rst b/docs/governance/ROADMAP.rst
index 4b4c553977..b48134d00d 100644
--- a/docs/governance/ROADMAP.rst
+++ b/docs/governance/ROADMAP.rst
@@ -10,7 +10,7 @@ Overview and Goals
 We will follow loosely Seibert’s `Heirarchy of
 Needs <https://twitter.com/FRoscheck/status/1159158552298229763>`__
 
-|Seibert Heirarchy of Needs SciPy 2019| (`Stan
+|Seibert Hierarchy of Needs SciPy 2019| (`Stan
 Seibert <https://github.com/seibert>`__, SciPy 2019)
 
 As a general overview that will include:
@@ -158,7 +158,7 @@ Presentations During Roadmap Timeline
    2019 <https://indico.cern.ch/event/773049/contributions/3476180/>`__
    (November 4-8th, 2019)
 
-.. |Seibert Heirarchy of Needs SciPy 2019| image:: https://pbs.twimg.com/media/EBYojw8XUAERJhZ?format=png
+.. |Seibert Hierarchy of Needs SciPy 2019| image:: https://pbs.twimg.com/media/EBYojw8XUAERJhZ?format=png
 
 .. |check| raw:: html
 
diff --git a/src/pyhf/constraints.py b/src/pyhf/constraints.py
index e5141082b7..03ec7de448 100644
--- a/src/pyhf/constraints.py
+++ b/src/pyhf/constraints.py
@@ -244,7 +244,7 @@ def make_pdf(self, pars):
 
         # similar to expected_data() in constrained_by_poisson
         # we multiply by the appropriate factor to achieve
-        # the desired variance for poisson-type cosntraints
+        # the desired variance for poisson-type constraints
         pois_rates = tensorlib.product(
             tensorlib.stack([nuispars, self.batched_factors]), axis=0
         )
diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
index 26140f8f62..a169c8555f 100644
--- a/src/pyhf/infer/calculators.py
+++ b/src/pyhf/infer/calculators.py
@@ -706,7 +706,7 @@ def __init__(
               :math:`\tilde{q}_{\mu}`, as defined under the Wald approximation in Equation (62)
               of :xref:`arXiv:1007.1727` (:func:`~pyhf.infer.test_statistics.qmu_tilde`), ``'q'``
               performs the calculation using the test statistic :math:`q_{\mu}`
-              (:func:`~pyhf.infer.test_statistics.qmu`), and ``'q0'`` perfoms the calculation using
+              (:func:`~pyhf.infer.test_statistics.qmu`), and ``'q0'`` performs the calculation using
               the discovery test statistic :math:`q_{0}` (:func:`~pyhf.infer.test_statistics.q0`).
             ntoys (:obj:`int`): Number of toys to use (how many times to sample the underlying distributions).
             track_progress (:obj:`bool`): Whether to display the `tqdm` progress bar or not (outputs to `stderr`).
diff --git a/src/pyhf/infer/mle.py b/src/pyhf/infer/mle.py
index 6de6bd84f6..fffff84131 100644
--- a/src/pyhf/infer/mle.py
+++ b/src/pyhf/infer/mle.py
@@ -12,7 +12,7 @@ def __dir__():
 def twice_nll(pars, data, pdf):
     r"""
     Two times the negative log-likelihood of the model parameters, :math:`\left(\mu, \boldsymbol{\theta}\right)`, given the observed data.
-    It is used in the calculation of the test statistic, :math:`t_{\mu}`, as defiend in Equation (8) in :xref:`arXiv:1007.1727`
+    It is used in the calculation of the test statistic, :math:`t_{\mu}`, as defined in Equation (8) in :xref:`arXiv:1007.1727`
 
     .. math::
 
diff --git a/src/pyhf/infer/test_statistics.py b/src/pyhf/infer/test_statistics.py
index 18d75d1738..6c56b15314 100644
--- a/src/pyhf/infer/test_statistics.py
+++ b/src/pyhf/infer/test_statistics.py
@@ -20,7 +20,7 @@ def _qmu_like(
     Clipped version of _tmu_like where the returned test statistic
     is 0 if muhat > 0 else tmu_like_stat.
 
-    If the lower bound of the POI is 0 this automatically implments
+    If the lower bound of the POI is 0 this automatically implements
     qmu_tilde. Otherwise this is qmu (no tilde).
     """
     tensorlib, optimizer = get_backend()
@@ -41,7 +41,7 @@ def _tmu_like(
     """
     Basic Profile Likelihood test statistic.
 
-    If the lower bound of the POI is 0 this automatically implments
+    If the lower bound of the POI is 0 this automatically implements
     tmu_tilde. Otherwise this is tmu (no tilde).
     """
     tensorlib, optimizer = get_backend()
@@ -63,7 +63,7 @@ def _tmu_like(
 def qmu(mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_pars=False):
     r"""
     The test statistic, :math:`q_{\mu}`, for establishing an upper
-    limit on the strength parameter, :math:`\mu`, as defiend in
+    limit on the strength parameter, :math:`\mu`, as defined in
     Equation (14) in :xref:`arXiv:1007.1727`
 
     .. math::
@@ -152,7 +152,7 @@ def qmu_tilde(
     r"""
     The "alternative" test statistic, :math:`\tilde{q}_{\mu}`, for establishing
     an upper limit on the strength parameter, :math:`\mu`, for models with
-    bounded POI, as defiend in Equation (16) in :xref:`arXiv:1007.1727`
+    bounded POI, as defined in Equation (16) in :xref:`arXiv:1007.1727`
 
     .. math::
        :nowrap:
@@ -242,7 +242,7 @@ def qmu_tilde(
 def tmu(mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_pars=False):
     r"""
     The test statistic, :math:`t_{\mu}`, for establishing a two-sided
-    interval on the strength parameter, :math:`\mu`, as defiend in Equation (8)
+    interval on the strength parameter, :math:`\mu`, as defined in Equation (8)
     in :xref:`arXiv:1007.1727`
 
     .. math::
@@ -325,7 +325,7 @@ def tmu_tilde(
     r"""
     The test statistic, :math:`\tilde{t}_{\mu}`, for establishing a two-sided
     interval on the strength parameter, :math:`\mu`, for models with
-    bounded POI, as defiend in Equation (11) in :xref:`arXiv:1007.1727`
+    bounded POI, as defined in Equation (11) in :xref:`arXiv:1007.1727`
 
     .. math::
 
diff --git a/src/pyhf/modifiers/shapefactor.py b/src/pyhf/modifiers/shapefactor.py
index 7bb34b8e81..d888d45102 100644
--- a/src/pyhf/modifiers/shapefactor.py
+++ b/src/pyhf/modifiers/shapefactor.py
@@ -148,7 +148,7 @@ def __init__(self, modifiers, pdfconfig, builder_data, batch_size=None):
             (len(shapefactor_mods), self.batch_size or 1, 1),
         )
         # access field is now
-        # e.g. for a 3 channnel (3 bins, 2 bins, 5 bins) model
+        # e.g. for a 3 channel (3 bins, 2 bins, 5 bins) model
         # [
         #   [0 1 2 0 1 0 1 2 3 4] (number of rows according to batch_size but at least 1)
         #   [0 1 2 0 1 0 1 2 3 4]
diff --git a/src/pyhf/optimize/opt_jax.py b/src/pyhf/optimize/opt_jax.py
index 41725029ef..5567678844 100644
--- a/src/pyhf/optimize/opt_jax.py
+++ b/src/pyhf/optimize/opt_jax.py
@@ -50,7 +50,7 @@ def wrap_objective(objective, data, pdf, stitch_pars, do_grad=False, jit_pieces=
     if do_grad:
 
         def func(pars):
-            # need to conver to tuple to make args hashable
+            # need to convert to tuple to make args hashable
             return _jitted_objective_and_grad(
                 pars,
                 data,
@@ -65,7 +65,7 @@ def func(pars):
     else:
 
         def func(pars):
-            # need to conver to tuple to make args hashable
+            # need to convert to tuple to make args hashable
             return _jitted_objective(
                 pars,
                 data,
diff --git a/src/pyhf/pdf.py b/src/pyhf/pdf.py
index 4643b797f0..cd246495bf 100644
--- a/src/pyhf/pdf.py
+++ b/src/pyhf/pdf.py
@@ -514,7 +514,7 @@ def logpdf(self, maindata, pars):
         Compute the logarithm of the value of the probability density.
 
         Args:
-            maindata (:obj:`tensor`): The main channnel data (a subset of the full data in a HistFactory model)
+            maindata (:obj:`tensor`): The main channel data (a subset of the full data in a HistFactory model)
             pars (:obj:`tensor`): The model parameters
 
         Returns:
diff --git a/src/pyhf/tensor/jax_backend.py b/src/pyhf/tensor/jax_backend.py
index 9ea52c2fd5..f5867ded10 100644
--- a/src/pyhf/tensor/jax_backend.py
+++ b/src/pyhf/tensor/jax_backend.py
@@ -155,7 +155,7 @@ def tile(self, tensor_in, repeats):
 
     def conditional(self, predicate, true_callable, false_callable):
         """
-        Runs a callable conditional on the boolean value of the evaulation of a predicate
+        Runs a callable conditional on the boolean value of the evaluation of a predicate
 
         Example:
 
@@ -169,8 +169,8 @@ def conditional(self, predicate, true_callable, false_callable):
 
         Args:
             predicate (:obj:`scalar`): The logical condition that determines which callable to evaluate
-            true_callable (:obj:`callable`): The callable that is evaluated when the :code:`predicate` evalutes to :code:`true`
-            false_callable (:obj:`callable`): The callable that is evaluated when the :code:`predicate` evalutes to :code:`false`
+            true_callable (:obj:`callable`): The callable that is evaluated when the :code:`predicate` evaluates to :code:`true`
+            false_callable (:obj:`callable`): The callable that is evaluated when the :code:`predicate` evaluates to :code:`false`
 
         Returns:
             JAX ndarray: The output of the callable that was evaluated
@@ -216,7 +216,7 @@ def astensor(self, tensor_in, dtype="float"):
             tensor_in (Number or Tensor): Tensor object
 
         Returns:
-            `jaxlib.xla_extension.DeviceArray`: A multi-dimensional, fixed-size homogenous array.
+            `jaxlib.xla_extension.DeviceArray`: A multi-dimensional, fixed-size homogeneous array.
         """
         # TODO: Remove doctest:+ELLIPSIS when JAX API stabilized
         try:
diff --git a/src/pyhf/tensor/numpy_backend.py b/src/pyhf/tensor/numpy_backend.py
index 1d248300f8..f3601b17a5 100644
--- a/src/pyhf/tensor/numpy_backend.py
+++ b/src/pyhf/tensor/numpy_backend.py
@@ -140,7 +140,7 @@ def tile(self, tensor_in, repeats):
 
     def conditional(self, predicate, true_callable, false_callable):
         """
-        Runs a callable conditional on the boolean value of the evaulation of a predicate
+        Runs a callable conditional on the boolean value of the evaluation of a predicate
 
         Example:
 
@@ -154,8 +154,8 @@ def conditional(self, predicate, true_callable, false_callable):
 
         Args:
             predicate (:obj:`scalar`): The logical condition that determines which callable to evaluate
-            true_callable (:obj:`callable`): The callable that is evaluated when the :code:`predicate` evalutes to :code:`true`
-            false_callable (:obj:`callable`): The callable that is evaluated when the :code:`predicate` evalutes to :code:`false`
+            true_callable (:obj:`callable`): The callable that is evaluated when the :code:`predicate` evaluates to :code:`true`
+            false_callable (:obj:`callable`): The callable that is evaluated when the :code:`predicate` evaluates to :code:`false`
 
         Returns:
             NumPy ndarray: The output of the callable that was evaluated
@@ -201,7 +201,7 @@ def astensor(self, tensor_in, dtype='float'):
             tensor_in (Number or Tensor): Tensor object
 
         Returns:
-            `numpy.ndarray`: A multi-dimensional, fixed-size homogenous array.
+            `numpy.ndarray`: A multi-dimensional, fixed-size homogeneous array.
         """
         try:
             dtype = self.dtypemap[dtype]
diff --git a/src/pyhf/tensor/pytorch_backend.py b/src/pyhf/tensor/pytorch_backend.py
index 153151f524..0b22ea8275 100644
--- a/src/pyhf/tensor/pytorch_backend.py
+++ b/src/pyhf/tensor/pytorch_backend.py
@@ -93,7 +93,7 @@ def erfinv(self, tensor_in):
 
     def conditional(self, predicate, true_callable, false_callable):
         """
-        Runs a callable conditional on the boolean value of the evaulation of a predicate
+        Runs a callable conditional on the boolean value of the evaluation of a predicate
 
         Example:
 
@@ -107,8 +107,8 @@ def conditional(self, predicate, true_callable, false_callable):
 
         Args:
             predicate (:obj:`scalar`): The logical condition that determines which callable to evaluate
-            true_callable (:obj:`callable`): The callable that is evaluated when the :code:`predicate` evalutes to :code:`true`
-            false_callable (:obj:`callable`): The callable that is evaluated when the :code:`predicate` evalutes to :code:`false`
+            true_callable (:obj:`callable`): The callable that is evaluated when the :code:`predicate` evaluates to :code:`true`
+            false_callable (:obj:`callable`): The callable that is evaluated when the :code:`predicate` evaluates to :code:`false`
 
         Returns:
             PyTorch Tensor: The output of the callable that was evaluated
diff --git a/src/pyhf/tensor/tensorflow_backend.py b/src/pyhf/tensor/tensorflow_backend.py
index be41e8f488..1d58d7d20b 100644
--- a/src/pyhf/tensor/tensorflow_backend.py
+++ b/src/pyhf/tensor/tensorflow_backend.py
@@ -129,7 +129,7 @@ def tile(self, tensor_in, repeats):
 
     def conditional(self, predicate, true_callable, false_callable):
         """
-        Runs a callable conditional on the boolean value of the evaulation of a predicate
+        Runs a callable conditional on the boolean value of the evaluation of a predicate
 
         Example:
             >>> import pyhf
@@ -143,8 +143,8 @@ def conditional(self, predicate, true_callable, false_callable):
 
         Args:
             predicate (:obj:`scalar`): The logical condition that determines which callable to evaluate
-            true_callable (:obj:`callable`): The callable that is evaluated when the :code:`predicate` evalutes to :code:`true`
-            false_callable (:obj:`callable`): The callable that is evaluated when the :code:`predicate` evalutes to :code:`false`
+            true_callable (:obj:`callable`): The callable that is evaluated when the :code:`predicate` evaluates to :code:`true`
+            false_callable (:obj:`callable`): The callable that is evaluated when the :code:`predicate` evaluates to :code:`false`
 
         Returns:
             TensorFlow Tensor: The output of the callable that was evaluated
diff --git a/tests/test_infer.py b/tests/test_infer.py
index 520c4a9356..92a3467585 100644
--- a/tests/test_infer.py
+++ b/tests/test_infer.py
@@ -130,7 +130,7 @@ def test_hypotest_return_tail_probs(tmpdir, hypotest_args, test_stat):
 def test_hypotest_return_expected(tmpdir, hypotest_args, test_stat):
     """
     Check that the return structure of pyhf.infer.hypotest with the
-    additon of the return_expected keyword arg is as expected
+    addition of the return_expected keyword arg is as expected
     """
     tb = pyhf.tensorlib
 
@@ -152,7 +152,7 @@ def test_hypotest_return_expected(tmpdir, hypotest_args, test_stat):
 def test_hypotest_return_expected_set(tmpdir, hypotest_args, test_stat):
     """
     Check that the return structure of pyhf.infer.hypotest with the
-    additon of the return_expected_set keyword arg is as expected
+    addition of the return_expected_set keyword arg is as expected
     """
     tb = pyhf.tensorlib
 
@@ -195,7 +195,7 @@ def test_hypotest_return_calculator(
 ):
     """
     Check that the return structure of pyhf.infer.hypotest with the
-    additon of the return_calculator keyword arg is as expected
+    addition of the return_calculator keyword arg is as expected
     """
     *_, model = hypotest_args
 
@@ -423,7 +423,7 @@ def test_emperical_distribution(tmpdir, hypotest_args):
 
 def test_toy_calculator(tmpdir, hypotest_args):
     """
-    Check that the toy calculator is peforming as expected
+    Check that the toy calculator is performing as expected
     """
     np.random.seed(0)
     mu_test, data, model = hypotest_args
@@ -469,7 +469,7 @@ def test_toy_calculator(tmpdir, hypotest_args):
 def test_fixed_poi(tmpdir, hypotest_args):
     """
     Check that the return structure of pyhf.infer.hypotest with the
-    additon of the return_expected keyword arg is as expected
+    addition of the return_expected keyword arg is as expected
     """
 
     _, _, pdf = hypotest_args