Use Tokens from parsed type annotation or parsed source (#11740)

## Summary This PR fixes a bug where the checker would require the tokens for an invalid offset w.r.t. the source code. Taking the source code from the linked issue as an example: ```py relese_version :"0.0is 64" ``` Now, this isn't really a valid type annotation but that's what this PR is fixing. Regardless of whether it's valid or not, Ruff shouldn't panic. The checker would visit the parsed type annotation (`0.0is 64`) and try to detect any violations. Certain rule logic requests the tokens for the same but it would fail because the lexer would only have the `String` token considering original source code. This worked before because the lexer was invoked again for each rule logic. The solution is to store the parsed type annotation on the checker if it's in a typing context and use the tokens from that instead if it's available. This is enforced by creating a new API on the checker to get the tokens. But, this means that there are two ways to get the tokens via the checker API. I want to restrict this in a follow-up PR (#11741) to only expose `tokens` and `comment_ranges` as methods and restrict access to the parsed source code. fixes: #11736 ## Test Plan - [x] Add a test case for `F632` rule and update the snapshot - [x] Check all affected rules - [x] No ecosystem changes
astral-sh · Jun 5, 2024 · b021b5b · b021b5b
1 parent eed6d78
commit b021b5b
Show file tree

Hide file tree

Showing 21 changed files with 159 additions and 31 deletions.
diff --git a/crates/ruff_linter/resources/test/fixtures/pyflakes/F632.py b/crates/ruff_linter/resources/test/fixtures/pyflakes/F632.py
@@ -29,3 +29,6 @@
 # Regression test for
 if values[1is not None ] is not '-':
     pass
+
+# Regression test for https://github.com/astral-sh/ruff/issues/11736
+variable: "123 is not y"
diff --git a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP012.py b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP012.py
@@ -80,3 +80,7 @@
 # Regression test for: https://github.com/astral-sh/ruff/issues/7455#issuecomment-1722459882
 def _match_ignore(line):
 	input=stdin and'\n'.encode()or None
+
+# Not a valid type annotation but this test shouldn't result in a panic.
+# Refer: https://github.com/astral-sh/ruff/issues/11736
+x: '"foo".encode("utf-8")'
diff --git a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP015_1.py b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP015_1.py
@@ -0,0 +1,4 @@
+# Not a valid type annotation but this test shouldn't result in a panic.
+# Refer: https://github.com/astral-sh/ruff/issues/11736
+x: 'open("foo", "r")'
+
diff --git a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP031_0.py b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP031_0.py
@@ -125,3 +125,7 @@
 'Hello %s' % bar.baz
 
 'Hello %s' % bar['bop']
+
+# Not a valid type annotation but this test shouldn't result in a panic.
+# Refer: https://github.com/astral-sh/ruff/issues/11736
+x: "'%s + %s' % (1, 2)"
diff --git a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP032_0.py b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP032_0.py
@@ -265,3 +265,7 @@ async def c():
 
 # The call should be removed, but the string itself should remain.
 "".format(self.project)
+
+# Not a valid type annotation but this test shouldn't result in a panic.
+# Refer: https://github.com/astral-sh/ruff/issues/11736
+x: "'{} + {}'.format(x, y)"
diff --git a/crates/ruff_linter/src/checkers/ast/mod.rs b/crates/ruff_linter/src/checkers/ast/mod.rs
@@ -49,7 +49,7 @@ use ruff_python_ast::{helpers, str, visitor, PySourceType};
 use ruff_python_codegen::{Generator, Stylist};
 use ruff_python_index::Indexer;
 use ruff_python_parser::typing::{parse_type_annotation, AnnotationKind};
-use ruff_python_parser::Parsed;
+use ruff_python_parser::{Parsed, Tokens};
 use ruff_python_semantic::all::{DunderAllDefinition, DunderAllFlags};
 use ruff_python_semantic::analyze::{imports, typing};
 use ruff_python_semantic::{
@@ -176,8 +176,10 @@ impl ExpectedDocstringKind {
 }
 
 pub(crate) struct Checker<'a> {
-    /// The parsed [`Parsed`].
+    /// The [`Parsed`] output for the source code.
     parsed: &'a Parsed<ModModule>,
+    /// The [`Parsed`] output for the type annotation the checker is currently in.
+    parsed_type_annotation: Option<&'a Parsed<ModExpression>>,
     /// The [`Path`] to the file under analysis.
     path: &'a Path,
     /// The [`Path`] to the package containing the current file.
@@ -243,6 +245,7 @@ impl<'a> Checker<'a> {
     ) -> Checker<'a> {
         Checker {
             parsed,
+            parsed_type_annotation: None,
             settings,
             noqa_line_for,
             noqa,
@@ -328,6 +331,16 @@ impl<'a> Checker<'a> {
         self.parsed
     }
 
+    /// Returns the [`Tokens`] for the parsed type annotation if the checker is in a typing context
+    /// or the parsed source code.
+    pub(crate) fn tokens(&self) -> &'a Tokens {
+        if let Some(parsed_type_annotation) = self.parsed_type_annotation {
+            parsed_type_annotation.tokens()
+        } else {
+            self.parsed.tokens()
+        }
+    }
+
     /// The [`Locator`] for the current file, which enables extraction of source code from byte
     /// offsets.
     pub(crate) const fn locator(&self) -> &'a Locator<'a> {
@@ -2160,6 +2173,7 @@ impl<'a> Checker<'a> {
                     parse_type_annotation(string_expr, self.locator.contents())
                 {
                     let parsed_annotation = allocator.alloc(parsed_annotation);
+                    self.parsed_type_annotation = Some(parsed_annotation);
 
                     let annotation = string_expr.value.to_str();
                     let range = string_expr.range();
@@ -2187,6 +2201,7 @@ impl<'a> Checker<'a> {
                     self.semantic.flags |=
                         SemanticModelFlags::TYPE_DEFINITION | type_definition_flag;
                     self.visit_expr(parsed_annotation.expr());
+                    self.parsed_type_annotation = None;
                 } else {
                     if self.enabled(Rule::ForwardAnnotationSyntaxError) {
                         self.diagnostics.push(Diagnostic::new(

diff --git a/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs b/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs
@@ -96,7 +96,7 @@ pub(crate) fn invalid_literal_comparison(
         {
             let mut diagnostic = Diagnostic::new(IsLiteral { cmp_op: op.into() }, expr.range());
             if lazy_located.is_none() {
-                lazy_located = Some(locate_cmp_ops(expr, checker.parsed().tokens()));
+                lazy_located = Some(locate_cmp_ops(expr, checker.tokens()));
             }
             if let Some(located_op) = lazy_located.as_ref().and_then(|located| located.get(index)) {
                 assert_eq!(located_op.op, *op);

diff --git a/crates/ruff_linter/src/rules/pyflakes/rules/unused_variable.rs b/crates/ruff_linter/src/rules/pyflakes/rules/unused_variable.rs
@@ -170,11 +170,10 @@ fn remove_unused_variable(binding: &Binding, checker: &Checker) -> Option<Fix> {
                     )
                     .unwrap_or(target.range())
                     .start();
-                    let end =
-                        match_token_after(checker.parsed().tokens(), target.end(), |token| {
-                            token == TokenKind::Equal
-                        })?
-                        .start();
+                    let end = match_token_after(checker.tokens(), target.end(), |token| {
+                        token == TokenKind::Equal
+                    })?
+                    .start();
                     let edit = Edit::deletion(start, end);
                     Some(Fix::unsafe_edit(edit))
                 } else {
@@ -206,10 +205,9 @@ fn remove_unused_variable(binding: &Binding, checker: &Checker) -> Option<Fix> {
                 // If the expression is complex (`x = foo()`), remove the assignment,
                 // but preserve the right-hand side.
                 let start = statement.start();
-                let end = match_token_after(checker.parsed().tokens(), start, |token| {
-                    token == TokenKind::Equal
-                })?
-                .start();
+                let end =
+                    match_token_after(checker.tokens(), start, |token| token == TokenKind::Equal)?
+                        .start();
                 let edit = Edit::deletion(start, end);
                 Some(Fix::unsafe_edit(edit))
             } else {
@@ -228,19 +226,17 @@ fn remove_unused_variable(binding: &Binding, checker: &Checker) -> Option<Fix> {
             if let Some(optional_vars) = &item.optional_vars {
                 if optional_vars.range() == binding.range() {
                     // Find the first token before the `as` keyword.
-                    let start = match_token_before(
-                        checker.parsed().tokens(),
-                        item.context_expr.start(),
-                        |token| token == TokenKind::As,
-                    )?
-                    .end();
+                    let start =
+                        match_token_before(checker.tokens(), item.context_expr.start(), |token| {
+                            token == TokenKind::As
+                        })?
+                        .end();
 
                     // Find the first colon, comma, or closing bracket after the `as` keyword.
-                    let end =
-                        match_token_or_closing_brace(checker.parsed().tokens(), start, |token| {
-                            token == TokenKind::Colon || token == TokenKind::Comma
-                        })?
-                        .start();
+                    let end = match_token_or_closing_brace(checker.tokens(), start, |token| {
+                        token == TokenKind::Colon || token == TokenKind::Comma
+                    })?
+                    .start();
 
                     let edit = Edit::deletion(start, end);
                     return Some(Fix::unsafe_edit(edit));

diff --git a/...inter/src/rules/pyflakes/snapshots/ruff_linter__rules__pyflakes__tests__F632_F632.py.snap b/...inter/src/rules/pyflakes/snapshots/ruff_linter__rules__pyflakes__tests__F632_F632.py.snap
@@ -203,6 +203,8 @@ F632.py:30:4: F632 [*] Use `!=` to compare constant literals
 30    |-if values[1is not None ] is not '-':
    30 |+if values[1is not None ] != '-':
 31 31 |     pass
+32 32 | 
+33 33 | # Regression test for https://github.com/astral-sh/ruff/issues/11736
 
 F632.py:30:11: F632 [*] Use `!=` to compare constant literals
    |
@@ -220,5 +222,20 @@ F632.py:30:11: F632 [*] Use `!=` to compare constant literals
 30    |-if values[1is not None ] is not '-':
    30 |+if values[1!= None ] is not '-':
 31 31 |     pass
+32 32 | 
+33 33 | # Regression test for https://github.com/astral-sh/ruff/issues/11736
 
+F632.py:34:12: F632 [*] Use `!=` to compare constant literals
+   |
+33 | # Regression test for https://github.com/astral-sh/ruff/issues/11736
+34 | variable: "123 is not y"
+   |            ^^^^^^^^^^^^ F632
+   |
+   = help: Replace `is not` with `!=`
 
+ℹ Safe fix
+31 31 |     pass
+32 32 | 
+33 33 | # Regression test for https://github.com/astral-sh/ruff/issues/11736
+34    |-variable: "123 is not y"
+   34 |+variable: "123 != y"
diff --git a/crates/ruff_linter/src/rules/pyupgrade/mod.rs b/crates/ruff_linter/src/rules/pyupgrade/mod.rs
@@ -58,6 +58,7 @@ mod tests {
     #[test_case(Rule::QuotedAnnotation, Path::new("UP037_0.py"))]
     #[test_case(Rule::QuotedAnnotation, Path::new("UP037_1.py"))]
     #[test_case(Rule::RedundantOpenModes, Path::new("UP015.py"))]
+    #[test_case(Rule::RedundantOpenModes, Path::new("UP015_1.py"))]
     #[test_case(Rule::ReplaceStdoutStderr, Path::new("UP022.py"))]
     #[test_case(Rule::ReplaceUniversalNewlines, Path::new("UP021.py"))]
     #[test_case(Rule::SuperCallWithParameters, Path::new("UP008.py"))]

diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs
@@ -719,7 +719,7 @@ pub(crate) fn deprecated_import(checker: &mut Checker, import_from_stmt: &StmtIm
         module,
         checker.locator(),
         checker.stylist(),
-        checker.parsed().tokens(),
+        checker.tokens(),
         checker.settings.target_version,
     );
 

diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs
@@ -409,7 +409,7 @@ pub(crate) fn f_strings(checker: &mut Checker, call: &ast::ExprCall, summary: &F
     };
 
     let mut patches: Vec<(TextRange, FStringConversion)> = vec![];
-    let mut tokens = checker.parsed().tokens().in_range(call.func.range()).iter();
+    let mut tokens = checker.tokens().in_range(call.func.range()).iter();
     let end = loop {
         let Some(token) = tokens.next() else {
             unreachable!("Should break from the `Tok::Dot` arm");

diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs
@@ -460,7 +460,7 @@ pub(crate) fn printf_string_formatting(
     }
 
     if let Some(prev_end) = prev_end {
-        for token in checker.parsed().tokens().after(prev_end) {
+        for token in checker.tokens().after(prev_end) {
             match token.kind() {
                 // If we hit a right paren, we have to preserve it.
                 TokenKind::Rpar => {

diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/redundant_open_modes.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/redundant_open_modes.rs
@@ -79,7 +79,7 @@ pub(crate) fn redundant_open_modes(checker: &mut Checker, call: &ast::ExprCall)
                                 call,
                                 &keyword.value,
                                 mode.replacement_value(),
-                                checker.parsed().tokens(),
+                                checker.tokens(),
                             ));
                         }
                     }
@@ -93,7 +93,7 @@ pub(crate) fn redundant_open_modes(checker: &mut Checker, call: &ast::ExprCall)
                         call,
                         mode_param,
                         mode.replacement_value(),
-                        checker.parsed().tokens(),
+                        checker.tokens(),
                     ));
                 }
             }

diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs
@@ -165,7 +165,7 @@ pub(crate) fn unnecessary_encode_utf8(checker: &mut Checker, call: &ast::ExprCal
                     diagnostic.set_fix(replace_with_bytes_literal(
                         checker.locator(),
                         call,
-                        checker.parsed().tokens(),
+                        checker.tokens(),
                     ));
                     checker.diagnostics.push(diagnostic);
                 } else if let EncodingArg::Keyword(kwarg) = encoding_arg {

diff --git a/..._linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP012.py.snap b/..._linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP012.py.snap
@@ -557,6 +557,8 @@ UP012.py:82:17: UP012 [*] Unnecessary call to `encode` as UTF-8
 81 | def _match_ignore(line):
 82 |     input=stdin and'\n'.encode()or None
    |                    ^^^^^^^^^^^^^ UP012
+83 | 
+84 | # Not a valid type annotation but this test shouldn't result in a panic.
    |
    = help: Rewrite as bytes literal
 
@@ -566,5 +568,22 @@ UP012.py:82:17: UP012 [*] Unnecessary call to `encode` as UTF-8
 81 81 | def _match_ignore(line):
 82    |-	input=stdin and'\n'.encode()or None
    82 |+	input=stdin and b'\n' or None
+83 83 | 
+84 84 | # Not a valid type annotation but this test shouldn't result in a panic.
+85 85 | # Refer: https://github.com/astral-sh/ruff/issues/11736
 
+UP012.py:86:5: UP012 [*] Unnecessary call to `encode` as UTF-8
+   |
+84 | # Not a valid type annotation but this test shouldn't result in a panic.
+85 | # Refer: https://github.com/astral-sh/ruff/issues/11736
+86 | x: '"foo".encode("utf-8")'
+   |     ^^^^^^^^^^^^^^^^^^^^^ UP012
+   |
+   = help: Rewrite as bytes literal
 
+ℹ Safe fix
+83 83 | 
+84 84 | # Not a valid type annotation but this test shouldn't result in a panic.
+85 85 | # Refer: https://github.com/astral-sh/ruff/issues/11736
+86    |-x: '"foo".encode("utf-8")'
+   86 |+x: 'b"foo"'
diff --git a/...inter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP015_1.py.snap b/...inter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP015_1.py.snap
@@ -0,0 +1,18 @@
+---
+source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
+---
+UP015_1.py:3:5: UP015 [*] Unnecessary open mode parameters
+  |
+1 | # Not a valid type annotation but this test shouldn't result in a panic.
+2 | # Refer: https://github.com/astral-sh/ruff/issues/11736
+3 | x: 'open("foo", "r")'
+  |     ^^^^^^^^^^^^^^^^ UP015
+  |
+  = help: Remove open mode parameters
+
+ℹ Safe fix
+1 1 | # Not a valid type annotation but this test shouldn't result in a panic.
+2 2 | # Refer: https://github.com/astral-sh/ruff/issues/11736
+3   |-x: 'open("foo", "r")'
+  3 |+x: 'open("foo")'
+4 4 |
diff --git a/...inter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP031_0.py.snap b/...inter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP031_0.py.snap
@@ -978,13 +978,16 @@ UP031_0.py:125:1: UP031 [*] Use format specifiers instead of percent format
     125 |+'Hello {}'.format(bar.baz)
 126 126 | 
 127 127 | 'Hello %s' % bar['bop']
+128 128 | 
 
 UP031_0.py:127:1: UP031 [*] Use format specifiers instead of percent format
     |
 125 | 'Hello %s' % bar.baz
 126 | 
 127 | 'Hello %s' % bar['bop']
     | ^^^^^^^^^^^^^^^^^^^^^^^ UP031
+128 | 
+129 | # Not a valid type annotation but this test shouldn't result in a panic.
     |
     = help: Replace with format specifiers
 
@@ -994,3 +997,22 @@ UP031_0.py:127:1: UP031 [*] Use format specifiers instead of percent format
 126 126 | 
 127     |-'Hello %s' % bar['bop']
     127 |+'Hello {}'.format(bar['bop'])
+128 128 | 
+129 129 | # Not a valid type annotation but this test shouldn't result in a panic.
+130 130 | # Refer: https://github.com/astral-sh/ruff/issues/11736
+
+UP031_0.py:131:5: UP031 [*] Use format specifiers instead of percent format
+    |
+129 | # Not a valid type annotation but this test shouldn't result in a panic.
+130 | # Refer: https://github.com/astral-sh/ruff/issues/11736
+131 | x: "'%s + %s' % (1, 2)"
+    |     ^^^^^^^^^^^^^^^^^^ UP031
+    |
+    = help: Replace with format specifiers
+
+ℹ Unsafe fix
+128 128 | 
+129 129 | # Not a valid type annotation but this test shouldn't result in a panic.
+130 130 | # Refer: https://github.com/astral-sh/ruff/issues/11736
+131     |-x: "'%s + %s' % (1, 2)"
+    131 |+x: "'{} + {}'.format(1, 2)"
diff --git a/...inter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP032_0.py.snap b/...inter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP032_0.py.snap
@@ -1361,6 +1361,8 @@ UP032_0.py:267:1: UP032 [*] Use f-string instead of `format` call
 266 | # The call should be removed, but the string itself should remain.
 267 | "".format(self.project)
     | ^^^^^^^^^^^^^^^^^^^^^^^ UP032
+268 | 
+269 | # Not a valid type annotation but this test shouldn't result in a panic.
     |
     = help: Convert to f-string
 
@@ -1370,3 +1372,22 @@ UP032_0.py:267:1: UP032 [*] Use f-string instead of `format` call
 266 266 | # The call should be removed, but the string itself should remain.
 267     |-"".format(self.project)
     267 |+""
+268 268 | 
+269 269 | # Not a valid type annotation but this test shouldn't result in a panic.
+270 270 | # Refer: https://github.com/astral-sh/ruff/issues/11736
+
+UP032_0.py:271:5: UP032 [*] Use f-string instead of `format` call
+    |
+269 | # Not a valid type annotation but this test shouldn't result in a panic.
+270 | # Refer: https://github.com/astral-sh/ruff/issues/11736
+271 | x: "'{} + {}'.format(x, y)"
+    |     ^^^^^^^^^^^^^^^^^^^^^^ UP032
+    |
+    = help: Convert to f-string
+
+ℹ Safe fix
+268 268 | 
+269 269 | # Not a valid type annotation but this test shouldn't result in a panic.
+270 270 | # Refer: https://github.com/astral-sh/ruff/issues/11736
+271     |-x: "'{} + {}'.format(x, y)"
+    271 |+x: "f'{x} + {y}'"
diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs
@@ -216,7 +216,7 @@ fn create_fix(
                 range,
                 kind,
                 locator,
-                checker.parsed().tokens(),
+                checker.tokens(),
                 string_items,
             )?;
             assert_eq!(value.len(), elts.len());

diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs
@@ -210,7 +210,7 @@ impl<'a> StringLiteralDisplay<'a> {
                     self.range(),
                     *sequence_kind,
                     locator,
-                    checker.parsed().tokens(),
+                    checker.tokens(),
                     elements,
                 )?;
                 assert_eq!(analyzed_sequence.len(), self.elts.len());