-
-
Notifications
You must be signed in to change notification settings - Fork 5
/
test_pygments.py
217 lines (167 loc) · 7.78 KB
/
test_pygments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# Copyright Kevin Deldycke <kevin@deldycke.com> and contributors.
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from __future__ import annotations
import sys
import tarfile
from importlib import metadata
from operator import itemgetter
from pathlib import Path
if sys.version_info >= (3, 11):
import tomllib
else:
import tomli as tomllib # type: ignore[import-not-found]
import requests
from boltons.strutils import camel2under
from boltons.typeutils import issubclass
from pygments.filter import Filter
from pygments.filters import get_filter_by_name
from pygments.formatter import Formatter
from pygments.formatters import get_formatter_by_name
from pygments.lexer import Lexer
from pygments.lexers import find_lexer_class_by_name, get_lexer_by_name
from click_extra import pygments as extra_pygments
from click_extra.pygments import DEFAULT_TOKEN_TYPE, collect_session_lexers
PROJECT_ROOT = Path(__file__).parent.parent
def test_ansi_lexers_candidates(tmp_path):
"""Look into Pygments test suite to find all ANSI lexers candidates.
Good candidates for ANSI colorization are lexers that are producing
``Generic.Output`` tokens, which are often used by REPL-like and scripting
terminal to render text in a console.
The list is manually maintained in Click Extra code, and this test is here to
detect new candidates from new releases of Pygments.
.. attention::
The Pygments source code is downloaded from GitHub in the form of an archive,
and extracted in a temporary folder.
The version of Pygments used for this test is the one installed in the current
environment.
.. danger:: Security check
While extracting the archive, we double check we are not fed an archive
exploiting relative ``..`` or ``.`` path attacks.
"""
version = metadata.version("pygments")
source_url = (
f"https://github.com/pygments/pygments/archive/refs/tags/{version}.tar.gz"
)
base_folder = f"pygments-{version}"
archive_path = tmp_path / f"{base_folder}.tar.gz"
# Download the source distribution from GitHub.
with requests.get(source_url) as response:
assert response.ok
archive_path.write_bytes(response.content)
assert archive_path.exists()
assert archive_path.is_file()
assert archive_path.stat().st_size > 0
# Locations of lexer artifacts in test suite.
parser_token_traces = {
str(tmp_path / base_folder / "tests" / "examplefiles" / "*" / "*.output"),
str(tmp_path / base_folder / "tests" / "snippets" / "*" / "*.txt"),
}
# Browse the downloaded package to find the test suite, and inspect the
# traces of parsed tokens used as gold master for lexers tests.
lexer_candidates = set()
with tarfile.open(archive_path, "r:gz") as tar:
for member in tar.getmembers():
# Skip non-test files.
if not member.isfile():
continue
# XXX Security check of relative ``..`` or ``.`` path attacks.
filename = tmp_path.joinpath(member.name).resolve()
assert filename.is_relative_to(tmp_path)
# Skip files that are not part of the test suite data.
match = False
for pattern in parser_token_traces:
if filename.match(pattern):
match = True
break
if not match:
continue
file = tar.extractfile(member)
# Skip empty files.
if not file:
continue
content = file.read().decode("utf-8")
# Skip lexers that are rendering generic, terminal-like output tokens.
if f" {'.'.join(DEFAULT_TOKEN_TYPE)}\n" not in content:
continue
# Extarct lexer alias from the test file path.
lexer_candidates.add(filename.parent.name)
assert lexer_candidates
lexer_classes = {find_lexer_class_by_name(alias) for alias in lexer_candidates}
# We cannot test for strict equality yet, as some ANSI-ready lexers do not
# have any test artifacts producing ``Generic.Output`` tokens.
assert lexer_classes <= set(collect_session_lexers())
def collect_classes(klass, prefix="Ansi"):
"""Returns all classes defined in ``click_extra.pygments`` that are a subclass of
``klass``, and whose name starts with the provided ``prefix``."""
klasses = {}
for name, var in extra_pygments.__dict__.items():
if issubclass(var, klass) and name.startswith(prefix):
klasses[name] = var
return klasses
def get_pyproject_section(*section_path: str) -> dict[str, str]:
"""Descends into the TOML tree of ``pyproject.toml`` to reach the value specified by
``section_path``."""
toml_path = PROJECT_ROOT.joinpath("pyproject.toml").resolve()
section: dict = tomllib.loads(toml_path.read_text(encoding="utf-8"))
for section_id in section_path:
section = section[section_id]
return section
def check_entry_points(entry_points: dict[str, str], *section_path: str) -> None:
entry_points = dict(sorted(entry_points.items(), key=itemgetter(0)))
project_entry_points = get_pyproject_section(*section_path)
assert project_entry_points == entry_points
def test_formatter_entry_points():
entry_points = {}
for name in collect_classes(Formatter):
entry_id = camel2under(name).replace("_", "-")
entry_points[entry_id] = f"click_extra.pygments:{name}"
check_entry_points(entry_points, "project", "entry-points", "pygments.formatters")
def test_filter_entry_points():
entry_points = {}
for name in collect_classes(Filter):
entry_id = camel2under(name).replace("_", "-")
entry_points[entry_id] = f"click_extra.pygments:{name}"
check_entry_points(entry_points, "project", "entry-points", "pygments.filters")
def test_lexer_entry_points():
entry_points = {}
for lexer in collect_session_lexers():
# Check an ANSI lexer variant is available for import from Click Extra.
ansi_lexer_id = f"Ansi{lexer.__name__}"
assert ansi_lexer_id in extra_pygments.__dict__
# Transform ANSI lexer class ID into entry point ID.
entry_id = "-".join(
w for w in camel2under(ansi_lexer_id).split("_") if w != "lexer"
)
# Generate the lexer entry point.
class_path = f"click_extra.pygments:{ansi_lexer_id}"
entry_points[entry_id] = class_path
check_entry_points(entry_points, "project", "entry-points", "pygments.lexers")
def test_registered_formatters():
for klass in collect_classes(Formatter).values():
for alias in klass.aliases:
get_formatter_by_name(alias)
def test_registered_filters():
for name in collect_classes(Filter):
entry_id = camel2under(name).replace("_", "-")
get_filter_by_name(entry_id)
def test_registered_lexers():
for klass in collect_classes(Lexer).values():
for alias in klass.aliases:
get_lexer_by_name(alias)
def test_ansi_lexers_doc():
doc_content = PROJECT_ROOT.joinpath("docs/pygments.md").read_text(encoding="utf-8")
for lexer in collect_session_lexers():
assert lexer.__name__ in doc_content