Skip to content

Commit

Permalink
chore: Move from lintian to a sphinx spelling plugin (canonical#3639)
Browse files Browse the repository at this point in the history
  • Loading branch information
holmanb committed Dec 8, 2023
1 parent 3e7caf3 commit 305ec6b
Show file tree
Hide file tree
Showing 8 changed files with 409 additions and 43 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/check_format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,12 @@ jobs:
- name: "Install dependencies"
run: |
sudo DEBIAN_FRONTEND=noninteractive apt-get -qy update
sudo DEBIAN_FRONTEND=noninteractive apt-get -qy install tox lintian
sudo DEBIAN_FRONTEND=noninteractive apt-get -qy install tox
- name: "Spellcheck"
run: |
make check_spelling
tox
env:
TOXENV: doc-spelling
- name: "Build docs"
env:
TOXENV: doc
Expand Down
43 changes: 2 additions & 41 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -124,47 +124,8 @@ fmt:
fmt-tip:
tox -e do_format_tip && tox -e check_format_tip

# Spell check && filter false positives
_CHECK_SPELLING := find doc -type f -exec spellintian {} + | \
grep -v -e 'doc/rtd/topics/cli.rst: modules modules' \
-e 'doc/examples/cloud-config-mcollective.txt: WARNING WARNING' \
-e 'doc/examples/cloud-config-power-state.txt: Bye Bye' \
-e 'doc/examples/cloud-config.txt: Bye Bye' \
-e 'doc/rtd/topics/cli.rst: DOCS DOCS' \
-e 'doc/summit/2023_summit_shared_notes.md: Moustafa Moustafa' \
-e 'dependant'


# For CI we require a failing return code when spellintian finds spelling errors
check_spelling:
@! $(_CHECK_SPELLING)

# Manipulate the output of spellintian into a valid "sed" command which is run
# to fix the error
#
# Example spellintian output:
#
# doc/examples/kernel-cmdline.txt: everthing -> everything
#
# The "fix_spelling" target manipulates the above output into the following command
# and runs that command.
#
# sed -i "s/everthing/everything/g" doc/examples/kernel-cmdline.txt
#
# awk notes:
#
# -F ': | -> ' means use the strings ": " or " -> " as field delimeters
# \046 is octal for double quote
# $$2 will contain the second field, ($ must be escaped because this is in a Makefile)
#
# Limitation: duplicate words with newline between them are not automatically fixed
fix_spelling:
@$(_CHECK_SPELLING) | \
sed 's/ (duplicate word)//g' | \
awk -F ': | -> ' '{printf "sed -i \047s/%s/%s/g\047 %s\n", $$2, $$3, $$1}' | \
sh

.PHONY: all check test lint clean rpm srpm deb deb-src yaml
.PHONY: check_version clean_pyc
.PHONY: unittest style-check fix_spelling render-template benchmark-generator
.PHONY: clean_pytest clean_packaging check_spelling clean_release doc
.PHONY: unittest style-check render-template benchmark-generator
.PHONY: clean_pytest clean_packaging clean_release doc
1 change: 1 addition & 0 deletions doc-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ sphinx==7.1.2
sphinx-design
sphinx-copybutton
sphinx-notfound-page
sphinxcontrib-spelling
9 changes: 9 additions & 0 deletions doc/rtd/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,17 @@
"sphinx.ext.autodoc",
"sphinx.ext.autosectionlabel",
"sphinx.ext.viewcode",
"sphinxcontrib.spelling",
]


# Spelling settings for sphinxcontrib.spelling
# https://docs.ubuntu.com/styleguide/en/
spelling_warning = True

# Uses case-independent spelling matches from doc/rtd/spelling_word_list.txt
spelling_filters = ["spelling.WordListFilter"]

# The suffix of source filenames.
source_suffix = ".rst"

Expand Down
79 changes: 79 additions & 0 deletions doc/rtd/spelling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import pathlib
import re

import enchant


class WordListFilter(enchant.tokenize.Filter):
word_list = "spelling_word_list.txt"
regex_list = "spelling_regex_list.txt"

def __init__(self, *args, **kwargs):
"""Use two files for ignoring correctly spelled words
- spelling_word_list.txt: a list of exact matches to ignore
- spelling_regex_list.txt: a list of regular expressions to ignore
Splits tokens on "/" and "-".
"""
super().__init__(*args, *kwargs)
directory = pathlib.Path(__file__).parent
with open(directory.joinpath(self.word_list)) as f:
lines = f.read().splitlines()
self._validate_lines(lines)
self.word_set = set(lines)
print(f"Loaded {self.word_list}: {lines})")
with open(directory.joinpath(self.regex_list)) as f:
regex_lines = f.read().splitlines()
self.regex_set = set(regex_lines)
print(f"Loaded {self.regex_list}: {regex_lines}")

def _validate_lines(self, lines):
"""Assert that the word_list file is legible and orderly"""
for line in lines:
if line != line.lower():
raise Exception(
f"Uppercase characters in {self.word_list} detected. "
"Please use lowercase characters for legibility."
)
if lines != sorted(lines):
first_missordered = next_item = previous_item = None
for item_a, item_b in zip(lines, sorted(lines)):
if first_missordered:
next_item = item_a
break
elif item_a != item_b:
first_missordered = item_a
else:
previous_item = item_a
unordered = (
f"[..., {previous_item}, {first_missordered}, "
f"{next_item}, ...]"
)
raise Exception(
f"Unsorted {self.word_list} detected. "
f"Please sort for legibility. Unordered list: {unordered}"
)

def _in_word_list(self, word):
"""Lowercase match the set of words in spelling_word_list.txt"""
return word.lower() in self.word_set

def _in_word_regex(self, word):
"""Regex match the expressions in spelling_regex_list.txt"""
for regex in self.regex_set:
out = re.search(regex, word)
if out:
return True

def _skip(self, word):
"""Skip words and regex expressions in the allowlist files"""
return self._in_word_list(word) or self._in_word_regex(word)

def _split(self, word):
"""split words into sub-tokens on - and /"""
if "-" in word or "/" in word:
for i, token in enumerate(re.split("-|/", word)):
if self._skip(token):
continue
yield token, i
14 changes: 14 additions & 0 deletions doc/rtd/spelling_regex_list.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
\.py$
\.sources$
\.list$
\.yml$
\.yaml$
cloud-init
ami\-
ubuntu\:
IPv[46]
^/
$/
ecdsa-sha2-nistp
ed25519
1.0/config/user
Loading

0 comments on commit 305ec6b

Please sign in to comment.