diff --git a/crates/ruff_python_parser/resources/inline/err/node_range_with_gaps.py b/crates/ruff_python_parser/resources/inline/err/node_range_with_gaps.py new file mode 100644 index 0000000000000..97c46925ebff2 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/err/node_range_with_gaps.py @@ -0,0 +1,3 @@ +def foo # comment +def bar(): ... +def baz diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs index 6313f2169427b..d1c9b7e2a4409 100644 --- a/crates/ruff_python_parser/src/parser/mod.rs +++ b/crates/ruff_python_parser/src/parser/mod.rs @@ -261,12 +261,59 @@ impl<'src> Parser<'src> { } fn node_range(&self, start: TextSize) -> TextRange { - // It's possible during error recovery that the parsing didn't consume any tokens. In that case, - // `last_token_end` still points to the end of the previous token but `start` is the start of the current token. - // Calling `TextRange::new(start, self.last_token_end)` would panic in that case because `start > end`. - // This path "detects" this case and creates an empty range instead. - if self.node_start() == start { - TextRange::empty(start) + // It's possible during error recovery that the parsing didn't consume any tokens. In that + // case, `last_token_end` still points to the end of the previous token but `start` is the + // start of the current token. Calling `TextRange::new(start, self.last_token_end)` would + // panic in that case because `start > end`. This path "detects" this case and creates an + // empty range instead. + // + // The reason it's `<=` instead of just `==` is because there could be whitespaces between + // the two tokens. For example: + // + // ```python + // # last token end + // # | current token (newline) start + // # v v + // def foo \n + // # ^ + // # assume there's trailing whitespace here + // ``` + // + // Or, there could tokens that are considered "trivia" and thus aren't emitted by the token + // source. These are comments and non-logical newlines. For example: + // + // ```python + // # last token end + // # v + // def foo # comment\n + // # ^ current token (newline) start + // ``` + // + // In either of the above cases, there's a "gap" between the end of the last token and start + // of the current token. + if self.last_token_end <= start { + // We need to create an empty range at the last token end instead of the start because + // otherwise this node range will fall outside the range of it's parent node. Taking + // the above example: + // + // ```python + // if True: + // # function start + // # | function end + // # v v + // def foo # comment + // # ^ current token start + // ``` + // + // Here, the current token start is the start of parameter range but the function ends + // at `foo`. Even if there's a function body, the range of parameters would still be + // before the comment. + + // test_err node_range_with_gaps + // def foo # comment + // def bar(): ... + // def baz + TextRange::empty(self.last_token_end) } else { TextRange::new(start, self.last_token_end) } diff --git a/crates/ruff_python_parser/src/parser/statement.rs b/crates/ruff_python_parser/src/parser/statement.rs index 4edf574021253..9a768bf9179c5 100644 --- a/crates/ruff_python_parser/src/parser/statement.rs +++ b/crates/ruff_python_parser/src/parser/statement.rs @@ -1663,23 +1663,19 @@ impl<'src> Parser<'src> { // x = 10 let type_params = self.try_parse_type_params(); + // test_ok function_def_parameter_range + // def foo( + // first: int, + // second: int, + // ) -> int: ... + // test_err function_def_unclosed_parameter_list // def foo(a: int, b: // def foo(): // return 42 // def foo(a: int, b: str // x = 10 - let parameters_start = self.node_start(); - self.expect(TokenKind::Lpar); - let mut parameters = self.parse_parameters(FunctionKind::FunctionDef); - self.expect(TokenKind::Rpar); - - // test_ok function_def_parameter_range - // def foo( - // first: int, - // second: int, - // ) -> int: ... - parameters.range = self.node_range(parameters_start); + let parameters = self.parse_parameters(FunctionKind::FunctionDef); let returns = if self.eat(TokenKind::Rarrow) { if self.at_expr() { @@ -2844,19 +2840,16 @@ impl<'src> Parser<'src> { pub(super) fn parse_parameters(&mut self, function_kind: FunctionKind) -> ast::Parameters { let start = self.node_start(); + if matches!(function_kind, FunctionKind::FunctionDef) { + self.expect(TokenKind::Lpar); + } + // TODO(dhruvmanila): This has the same problem as `parse_match_pattern_mapping` // has where if there are multiple kwarg or vararg, the last one will win and // the parser will drop the previous ones. Another thing is the vararg and kwarg // uses `Parameter` (not `ParameterWithDefault`) which means that the parser cannot // recover well from `*args=(1, 2)`. - let mut parameters = ast::Parameters { - range: TextRange::default(), - posonlyargs: vec![], - args: vec![], - kwonlyargs: vec![], - vararg: None, - kwarg: None, - }; + let mut parameters = ast::Parameters::empty(TextRange::default()); let mut seen_default_param = false; // `a=10` let mut seen_positional_only_separator = false; // `/` @@ -3094,6 +3087,10 @@ impl<'src> Parser<'src> { self.add_error(ParseErrorType::ExpectedKeywordParam, star_range); } + if matches!(function_kind, FunctionKind::FunctionDef) { + self.expect(TokenKind::Rpar); + } + parameters.range = self.node_range(start); // test_err params_duplicate_names diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@node_range_with_gaps.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@node_range_with_gaps.py.snap new file mode 100644 index 0000000000000..1456d1a7a8ede --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@node_range_with_gaps.py.snap @@ -0,0 +1,122 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/err/node_range_with_gaps.py +--- +## AST + +``` +Module( + ModModule { + range: 0..41, + body: [ + FunctionDef( + StmtFunctionDef { + range: 0..7, + is_async: false, + decorator_list: [], + name: Identifier { + id: "foo", + range: 4..7, + }, + type_params: None, + parameters: Parameters { + range: 7..7, + posonlyargs: [], + args: [], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + returns: None, + body: [], + }, + ), + FunctionDef( + StmtFunctionDef { + range: 18..32, + is_async: false, + decorator_list: [], + name: Identifier { + id: "bar", + range: 22..25, + }, + type_params: None, + parameters: Parameters { + range: 25..27, + posonlyargs: [], + args: [], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + returns: None, + body: [ + Expr( + StmtExpr { + range: 29..32, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 29..32, + }, + ), + }, + ), + ], + }, + ), + FunctionDef( + StmtFunctionDef { + range: 33..40, + is_async: false, + decorator_list: [], + name: Identifier { + id: "baz", + range: 37..40, + }, + type_params: None, + parameters: Parameters { + range: 40..40, + posonlyargs: [], + args: [], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + returns: None, + body: [], + }, + ), + ], + }, +) +``` +## Errors + + | +1 | def foo # comment + | ^ Syntax Error: Expected '(', found newline +2 | def bar(): ... +3 | def baz + | + + + | +1 | def foo # comment +2 | def bar(): ... + | ^^^ Syntax Error: Expected ')', found 'def' +3 | def baz + | + + + | +1 | def foo # comment +2 | def bar(): ... +3 | def baz + | ^ Syntax Error: Expected '(', found newline + | + + + | +2 | def bar(): ... +3 | def baz + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@unterminated_fstring_newline_recovery.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@unterminated_fstring_newline_recovery.py.snap index 06cf6d494b53c..2807538b954af 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@unterminated_fstring_newline_recovery.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@unterminated_fstring_newline_recovery.py.snap @@ -167,7 +167,7 @@ Module( conversion: None, format_spec: Some( FStringFormatSpec { - range: 43..43, + range: 42..42, elements: [], }, ),