Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create binary wheels with mypyc #242

Merged
merged 5 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Limit inline nesting to prevent mypyc stack overflow
  • Loading branch information
hukkin committed Nov 27, 2024
commit 4c2c363e57b01dc4b9ae6926dbe71c8a91cd1223
44 changes: 34 additions & 10 deletions src/tomli/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from collections.abc import Iterable
import string
import sys
from types import MappingProxyType
from typing import IO, Any, Final, NamedTuple
import warnings
Expand All @@ -20,6 +21,17 @@
)
from ._types import Key, ParseFloat, Pos

# Inline tables/arrays are implemented using recursion. Pathologically
# nested documents cause pure Python to raise RecursionError (which is OK),
# but mypyc binary wheels will crash unrecoverably (not OK). According to
# mypyc docs this will be fixed in the future:
# https://mypyc.readthedocs.io/en/latest/differences_from_python.html#stack-overflows
# Before mypyc's fix is in, recursion needs to be limited by this library.
# Choosing `sys.getrecursionlimit()` as maximum inline table/array nesting
# level, as it allows more nesting than pure Python, but still seems a far
# lower number than where mypyc binaries crash.
MAX_INLINE_NESTING: Final = sys.getrecursionlimit()

ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))

# Neither of these sets include quotation mark or backslash. They are
Expand Down Expand Up @@ -393,7 +405,7 @@ def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
def key_value_rule(
src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat
) -> Pos:
pos, key, value = parse_key_value_pair(src, pos, parse_float)
pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl=0)
key_parent, key_stem = key[:-1], key[-1]
abs_key_parent = header + key_parent

Expand Down Expand Up @@ -425,7 +437,7 @@ def key_value_rule(


def parse_key_value_pair(
src: str, pos: Pos, parse_float: ParseFloat
src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
) -> tuple[Pos, Key, Any]:
pos, key = parse_key(src, pos)
try:
Expand All @@ -436,7 +448,7 @@ def parse_key_value_pair(
raise TOMLDecodeError("Expected '=' after a key in a key/value pair", src, pos)
pos += 1
pos = skip_chars(src, pos, TOML_WS)
pos, value = parse_value(src, pos, parse_float)
pos, value = parse_value(src, pos, parse_float, nest_lvl)
return pos, key, value


Expand Down Expand Up @@ -479,15 +491,17 @@ def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]:
return parse_basic_str(src, pos, multiline=False)


def parse_array(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, list]:
def parse_array(
src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
) -> tuple[Pos, list]:
pos += 1
array: list = []

pos = skip_comments_and_array_ws(src, pos)
if src.startswith("]", pos):
return pos + 1, array
while True:
pos, val = parse_value(src, pos, parse_float)
pos, val = parse_value(src, pos, parse_float, nest_lvl)
array.append(val)
pos = skip_comments_and_array_ws(src, pos)

Expand All @@ -503,7 +517,9 @@ def parse_array(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, list]
return pos + 1, array


def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, dict]:
def parse_inline_table(
src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
) -> tuple[Pos, dict]:
pos += 1
nested_dict = NestedDict()
flags = Flags()
Expand All @@ -512,7 +528,7 @@ def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos
if src.startswith("}", pos):
return pos + 1, nested_dict.dict
while True:
pos, key, value = parse_key_value_pair(src, pos, parse_float)
pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl)
key_parent, key_stem = key[:-1], key[-1]
if flags.is_(key, Flags.FROZEN):
raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos)
Expand Down Expand Up @@ -654,8 +670,16 @@ def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]:


def parse_value( # noqa: C901
src: str, pos: Pos, parse_float: ParseFloat
src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
) -> tuple[Pos, Any]:
if nest_lvl > MAX_INLINE_NESTING:
# Pure Python should have raised RecursionError already.
# This ensures mypyc binaries eventually do the same.
raise RecursionError(
"TOML inline arrays/tables are nested more than the allowed"
f" {MAX_INLINE_NESTING} levels"
)

try:
char: str | None = src[pos]
except IndexError:
Expand Down Expand Up @@ -685,11 +709,11 @@ def parse_value( # noqa: C901

# Arrays
if char == "[":
return parse_array(src, pos, parse_float)
return parse_array(src, pos, parse_float, nest_lvl + 1)

# Inline tables
if char == "{":
return parse_inline_table(src, pos, parse_float)
return parse_inline_table(src, pos, parse_float, nest_lvl + 1)

# Dates and times
datetime_match = RE_DATETIME.match(src, pos)
Expand Down
21 changes: 21 additions & 0 deletions tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import datetime
from decimal import Decimal as D
from pathlib import Path
import sys
import tempfile
import unittest

Expand Down Expand Up @@ -103,7 +104,27 @@ def test_inline_array_recursion_limit(self):
recursive_array_toml = "arr = " + nest_count * "[" + nest_count * "]"
tomllib.loads(recursive_array_toml)

nest_count = sys.getrecursionlimit() + 2
recursive_array_toml = "arr = " + nest_count * "[" + nest_count * "]"
with self.assertRaisesRegex(
RecursionError,
r"maximum recursion depth exceeded"
r"|"
r"TOML inline arrays/tables are nested more than the allowed [0-9]+ levels",
):
tomllib.loads(recursive_array_toml)

def test_inline_table_recursion_limit(self):
nest_count = 310
recursive_table_toml = nest_count * "key = {" + nest_count * "}"
tomllib.loads(recursive_table_toml)

nest_count = sys.getrecursionlimit() + 2
recursive_table_toml = nest_count * "key = {" + nest_count * "}"
with self.assertRaisesRegex(
RecursionError,
r"maximum recursion depth exceeded"
r"|"
r"TOML inline arrays/tables are nested more than the allowed [0-9]+ levels",
):
tomllib.loads(recursive_table_toml)
Loading