From e93f378635ddda6b0f26ff47cbe54109c3c20fbc Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Fri, 5 May 2023 09:37:56 +0200 Subject: [PATCH] Refactor whitespace around operator (#4223) --- .../missing_whitespace_after_keyword.rs | 9 +- .../missing_whitespace_around_operator.rs | 259 +++++++++++------- ...ules__pycodestyle__tests__E225_E22.py.snap | 72 ++++- ...ules__pycodestyle__tests__E226_E22.py.snap | 19 ++ crates/ruff_python_ast/src/token_kind.rs | 80 ++---- 5 files changed, 269 insertions(+), 170 deletions(-) diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_after_keyword.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_after_keyword.rs index a754becd5445d..6460713f55960 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_after_keyword.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_after_keyword.rs @@ -1,11 +1,9 @@ -use itertools::Itertools; -use ruff_text_size::TextRange; - use crate::checkers::logical_lines::LogicalLinesContext; use crate::rules::pycodestyle::rules::logical_lines::LogicalLine; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::token_kind::TokenKind; +use ruff_text_size::TextRange; #[violation] pub struct MissingWhitespaceAfterKeyword; @@ -22,7 +20,10 @@ pub(crate) fn missing_whitespace_after_keyword( line: &LogicalLine, context: &mut LogicalLinesContext, ) { - for (tok0, tok1) in line.tokens().iter().tuple_windows() { + for window in line.tokens().windows(2) { + let tok0 = &window[0]; + let tok1 = &window[1]; + let tok0_kind = tok0.kind(); let tok1_kind = tok1.kind(); diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs index edd064fcd1955..5200a8bdf55fd 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs @@ -1,10 +1,9 @@ use crate::checkers::logical_lines::LogicalLinesContext; -use ruff_diagnostics::Violation; +use crate::rules::pycodestyle::rules::logical_lines::{LogicalLine, LogicalLineToken}; +use ruff_diagnostics::{DiagnosticKind, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::token_kind::TokenKind; -use ruff_text_size::{TextRange, TextSize}; - -use crate::rules::pycodestyle::rules::logical_lines::LogicalLine; +use ruff_text_size::TextRange; // E225 #[violation] @@ -56,131 +55,179 @@ pub(crate) fn missing_whitespace_around_operator( line: &LogicalLine, context: &mut LogicalLinesContext, ) { - #[derive(Copy, Clone, Eq, PartialEq)] - enum NeedsSpace { - Yes, - No, - Unset, - } - - let mut needs_space_main = NeedsSpace::No; - let mut needs_space_aux = NeedsSpace::Unset; - let mut prev_end_aux = TextSize::default(); let mut parens = 0u32; - let mut prev_type: TokenKind = TokenKind::EndOfFile; - let mut prev_end = TextSize::default(); + let mut prev_token: Option<&LogicalLineToken> = None; + let mut tokens = line.tokens().iter().peekable(); - for token in line.tokens() { + while let Some(token) = tokens.next() { let kind = token.kind(); - if kind.is_skip_comment() { + if kind.is_trivia() { continue; } match kind { TokenKind::Lpar | TokenKind::Lambda => parens += 1, - TokenKind::Rpar => parens -= 1, + TokenKind::Rpar => parens = parens.saturating_sub(1), _ => {} }; - let needs_space = needs_space_main == NeedsSpace::Yes - || needs_space_aux != NeedsSpace::Unset - || prev_end_aux != TextSize::new(0); - if needs_space { - if token.start() > prev_end { - if needs_space_main != NeedsSpace::Yes && needs_space_aux != NeedsSpace::Yes { + let needs_space = if kind == TokenKind::Equal && parens > 0 { + // Allow keyword args or defaults: foo(bar=None). + NeedsSpace::No + } else if kind == TokenKind::Slash { + // Tolerate the "/" operator in function definition + // For more info see PEP570 + + // `def f(a, /, b):` or `def f(a, b, /):` or `f = lambda a, /:` + // ^ ^ ^ + let slash_in_func = matches!( + tokens.peek().map(|t| t.kind()), + Some(TokenKind::Comma | TokenKind::Rpar | TokenKind::Colon) + ); + + NeedsSpace::from(!slash_in_func) + } else if kind.is_unary() || kind == TokenKind::DoubleStar { + let is_binary = prev_token.map_or(false, |prev_token| { + let prev_kind = prev_token.kind(); + + // Check if the operator is used as a binary operator. + // Allow unary operators: -123, -x, +1. + // Allow argument unpacking: foo(*args, **kwargs) + matches!( + prev_kind, + TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace + ) || !(prev_kind.is_operator() + || prev_kind.is_keyword() + || prev_kind.is_soft_keyword()) + }); + + if is_binary { + if kind == TokenKind::DoubleStar { + // Enforce consistent spacing, but don't enforce whitespaces. + NeedsSpace::Optional + } else { + NeedsSpace::Yes + } + } else { + NeedsSpace::No + } + } else if is_whitespace_needed(kind) { + NeedsSpace::Yes + } else { + NeedsSpace::No + }; + + if needs_space != NeedsSpace::No { + let has_leading_trivia = prev_token.map_or(true, |prev| { + prev.end() < token.start() || prev.kind().is_trivia() + }); + + let has_trailing_trivia = tokens.peek().map_or(true, |next| { + token.end() < next.start() || next.kind().is_trivia() + }); + + match (has_leading_trivia, has_trailing_trivia) { + // Operator with trailing but no leading space, enforce consistent spacing + (false, true) => { context.push( MissingWhitespaceAroundOperator, - TextRange::empty(prev_end_aux), + TextRange::empty(token.start()), ); } - needs_space_main = NeedsSpace::No; - needs_space_aux = NeedsSpace::Unset; - prev_end_aux = TextSize::new(0); - } else if kind == TokenKind::Greater - && matches!(prev_type, TokenKind::Less | TokenKind::Minus) - { - // Tolerate the "<>" operator, even if running Python 3 - // Deal with Python 3's annotated return value "->" - } else if prev_type == TokenKind::Slash - && matches!(kind, TokenKind::Comma | TokenKind::Rpar | TokenKind::Colon) - || (prev_type == TokenKind::Rpar && kind == TokenKind::Colon) - { - // Tolerate the "/" operator in function definition - // For more info see PEP570 - } else { - if needs_space_main == NeedsSpace::Yes || needs_space_aux == NeedsSpace::Yes { - context.push(MissingWhitespaceAroundOperator, TextRange::empty(prev_end)); - } else if prev_type != TokenKind::DoubleStar { - if prev_type == TokenKind::Percent { - context.push( - MissingWhitespaceAroundModuloOperator, - TextRange::empty(prev_end_aux), - ); - } else if !prev_type.is_arithmetic() { - context.push( - MissingWhitespaceAroundBitwiseOrShiftOperator, - TextRange::empty(prev_end_aux), - ); - } else { + // Operator with leading but no trailing space, enforce consistent spacing. + (true, false) => { + context.push( + MissingWhitespaceAroundOperator, + TextRange::empty(token.end()), + ); + } + // Operator with no space, require spaces if it is required by the operator. + (false, false) => { + if needs_space == NeedsSpace::Yes { context.push( - MissingWhitespaceAroundArithmeticOperator, - TextRange::empty(prev_end_aux), + diagnostic_kind_for_operator(kind), + TextRange::empty(token.start()), ); } } - needs_space_main = NeedsSpace::No; - needs_space_aux = NeedsSpace::Unset; - prev_end_aux = TextSize::new(0); - } - } else if (kind.is_operator() || matches!(kind, TokenKind::Name)) - && prev_end != TextSize::default() - { - if kind == TokenKind::Equal && parens > 0 { - // Allow keyword args or defaults: foo(bar=None). - } else if kind.is_whitespace_needed() { - needs_space_main = NeedsSpace::Yes; - needs_space_aux = NeedsSpace::Unset; - prev_end_aux = TextSize::new(0); - } else if kind.is_unary() { - // Check if the operator is used as a binary operator - // Allow unary operators: -123, -x, +1. - // Allow argument unpacking: foo(*args, **kwargs) - if (matches!( - prev_type, - TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace - )) || (!prev_type.is_operator() - && !prev_type.is_keyword() - && !prev_type.is_soft_keyword()) - { - needs_space_main = NeedsSpace::Unset; - needs_space_aux = NeedsSpace::Unset; - prev_end_aux = TextSize::new(0); + (true, true) => { + // Operator has leading and trailing space, all good } - } else if kind.is_whitespace_optional() { - needs_space_main = NeedsSpace::Unset; - needs_space_aux = NeedsSpace::Unset; - prev_end_aux = TextSize::new(0); } + } - if needs_space_main == NeedsSpace::Unset { - // Surrounding space is optional, but ensure that - // trailing space matches opening space - prev_end_aux = prev_end; - needs_space_aux = if token.start() == prev_end { - NeedsSpace::No - } else { - NeedsSpace::Yes - }; - } else if needs_space_main == NeedsSpace::Yes && token.start() == prev_end_aux { - // A needed opening space was not found - context.push(MissingWhitespaceAroundOperator, TextRange::empty(prev_end)); - needs_space_main = NeedsSpace::No; - needs_space_aux = NeedsSpace::Unset; - prev_end_aux = TextSize::new(0); - } + prev_token = Some(token); + } +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +enum NeedsSpace { + /// Needs a leading and trailing space. + Yes, + + /// Doesn't need a leading or trailing space. Or in other words, we don't care how many + /// leading or trailing spaces that token has. + No, + + /// Needs consistent leading and trailing spacing. The operator needs spacing if + /// * it has a leading space + /// * it has a trailing space + Optional, +} + +impl From for NeedsSpace { + fn from(value: bool) -> Self { + if value { + NeedsSpace::Yes + } else { + NeedsSpace::No } - prev_type = kind; - prev_end = token.end(); } } + +fn diagnostic_kind_for_operator(operator: TokenKind) -> DiagnosticKind { + if operator == TokenKind::Percent { + DiagnosticKind::from(MissingWhitespaceAroundModuloOperator) + } else if operator.is_bitwise_or_shift() { + DiagnosticKind::from(MissingWhitespaceAroundBitwiseOrShiftOperator) + } else if operator.is_arithmetic() { + DiagnosticKind::from(MissingWhitespaceAroundArithmeticOperator) + } else { + DiagnosticKind::from(MissingWhitespaceAroundOperator) + } +} + +fn is_whitespace_needed(kind: TokenKind) -> bool { + matches!( + kind, + TokenKind::DoubleStarEqual + | TokenKind::StarEqual + | TokenKind::SlashEqual + | TokenKind::DoubleSlashEqual + | TokenKind::PlusEqual + | TokenKind::MinusEqual + | TokenKind::NotEqual + | TokenKind::Less + | TokenKind::Greater + | TokenKind::PercentEqual + | TokenKind::CircumflexEqual + | TokenKind::AmperEqual + | TokenKind::VbarEqual + | TokenKind::EqEqual + | TokenKind::LessEqual + | TokenKind::GreaterEqual + | TokenKind::LeftShiftEqual + | TokenKind::RightShiftEqual + | TokenKind::Equal + | TokenKind::And + | TokenKind::Or + | TokenKind::In + | TokenKind::Is + | TokenKind::Rarrow + | TokenKind::ColonEqual + | TokenKind::Slash + | TokenKind::Percent + ) || kind.is_arithmetic() + || kind.is_bitwise_or_shift() +} diff --git a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E225_E22.py.snap b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E225_E22.py.snap index 0d8ec1b68c0d3..61728196aacf6 100644 --- a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E225_E22.py.snap +++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E225_E22.py.snap @@ -10,6 +10,16 @@ E22.py:54:13: E225 Missing whitespace around operator 57 | submitted+= 1 | +E22.py:56:10: E225 Missing whitespace around operator + | +56 | submitted +=1 +57 | #: E225 +58 | submitted+= 1 + | E225 +59 | #: E225 +60 | c =-1 + | + E22.py:58:4: E225 Missing whitespace around operator | 58 | submitted+= 1 @@ -100,12 +110,12 @@ E22.py:74:12: E225 Missing whitespace around operator 78 | i=i+ 1 | -E22.py:76:3: E225 Missing whitespace around operator +E22.py:76:2: E225 Missing whitespace around operator | 76 | _1kB = _1MB>> 10 77 | #: E225 E225 78 | i=i+ 1 - | E225 + | E225 79 | #: E225 E225 80 | i=i +1 | @@ -120,12 +130,12 @@ E22.py:76:4: E225 Missing whitespace around operator 80 | i=i +1 | -E22.py:78:3: E225 Missing whitespace around operator +E22.py:78:2: E225 Missing whitespace around operator | 78 | i=i+ 1 79 | #: E225 E225 80 | i=i +1 - | E225 + | E225 81 | #: E225 82 | i = 1and 1 | @@ -140,6 +150,46 @@ E22.py:78:6: E225 Missing whitespace around operator 82 | i = 1and 1 | +E22.py:80:6: E225 Missing whitespace around operator + | +80 | i=i +1 +81 | #: E225 +82 | i = 1and 1 + | E225 +83 | #: E225 +84 | i = 1or 0 + | + +E22.py:82:6: E225 Missing whitespace around operator + | +82 | i = 1and 1 +83 | #: E225 +84 | i = 1or 0 + | E225 +85 | #: E225 +86 | 1is 1 + | + +E22.py:84:2: E225 Missing whitespace around operator + | +84 | i = 1or 0 +85 | #: E225 +86 | 1is 1 + | E225 +87 | #: E225 +88 | 1in [] + | + +E22.py:86:2: E225 Missing whitespace around operator + | +86 | 1is 1 +87 | #: E225 +88 | 1in [] + | E225 +89 | #: E225 +90 | i = 1 @2 + | + E22.py:88:8: E225 Missing whitespace around operator | 88 | 1in [] @@ -160,12 +210,12 @@ E22.py:90:6: E225 Missing whitespace around operator 94 | i=i+1 | -E22.py:92:3: E225 Missing whitespace around operator +E22.py:92:2: E225 Missing whitespace around operator | 92 | i = 1@ 2 93 | #: E225 E226 94 | i=i+1 - | E225 + | E225 95 | #: E225 E226 96 | i =i+1 | @@ -180,6 +230,16 @@ E22.py:94:4: E225 Missing whitespace around operator 98 | i= i+1 | +E22.py:96:2: E225 Missing whitespace around operator + | + 96 | i =i+1 + 97 | #: E225 E226 + 98 | i= i+1 + | E225 + 99 | #: E225 E226 +100 | c = (a +b)*(a - b) + | + E22.py:98:9: E225 Missing whitespace around operator | 98 | i= i+1 diff --git a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E226_E22.py.snap b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E226_E22.py.snap index c13c83a120483..769264c53d61a 100644 --- a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E226_E22.py.snap +++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E226_E22.py.snap @@ -50,6 +50,15 @@ E22.py:100:11: E226 Missing whitespace around arithmetic operator 103 | #: | +E22.py:104:6: E226 Missing whitespace around arithmetic operator + | +104 | #: E226 +105 | z = 2//30 + | E226 +106 | #: E226 E226 +107 | c = (a+b) * (a-b) + | + E22.py:106:7: E226 Missing whitespace around arithmetic operator | 106 | z = 2//30 @@ -120,4 +129,14 @@ E22.py:116:12: E226 Missing whitespace around arithmetic operator 120 | def halves(n): | +E22.py:119:14: E226 Missing whitespace around arithmetic operator + | +119 | #: E226 +120 | def halves(n): +121 | return (i//2 for i in range(n)) + | E226 +122 | #: E227 +123 | _1kB = _1MB>>10 + | + diff --git a/crates/ruff_python_ast/src/token_kind.rs b/crates/ruff_python_ast/src/token_kind.rs index 2f44ea2e1ec7f..b465e39860824 100644 --- a/crates/ruff_python_ast/src/token_kind.rs +++ b/crates/ruff_python_ast/src/token_kind.rs @@ -167,61 +167,9 @@ pub enum TokenKind { } impl TokenKind { - #[inline] - pub const fn is_whitespace_needed(&self) -> bool { - matches!( - self, - TokenKind::DoubleStarEqual - | TokenKind::StarEqual - | TokenKind::SlashEqual - | TokenKind::DoubleSlashEqual - | TokenKind::PlusEqual - | TokenKind::MinusEqual - | TokenKind::NotEqual - | TokenKind::Less - | TokenKind::Greater - | TokenKind::PercentEqual - | TokenKind::CircumflexEqual - | TokenKind::AmperEqual - | TokenKind::VbarEqual - | TokenKind::EqEqual - | TokenKind::LessEqual - | TokenKind::GreaterEqual - | TokenKind::LeftShiftEqual - | TokenKind::RightShiftEqual - | TokenKind::Equal - | TokenKind::And - | TokenKind::Or - | TokenKind::In - | TokenKind::Is - | TokenKind::Rarrow - ) - } - - #[inline] - pub const fn is_whitespace_optional(&self) -> bool { - self.is_arithmetic() - || matches!( - self, - TokenKind::CircumFlex - | TokenKind::Amper - | TokenKind::Vbar - | TokenKind::LeftShift - | TokenKind::RightShift - | TokenKind::Percent - ) - } - #[inline] pub const fn is_unary(&self) -> bool { - matches!( - self, - TokenKind::Plus - | TokenKind::Minus - | TokenKind::Star - | TokenKind::DoubleStar - | TokenKind::RightShift - ) + matches!(self, TokenKind::Plus | TokenKind::Minus | TokenKind::Star) } #[inline] @@ -315,6 +263,11 @@ impl TokenKind { | TokenKind::Ellipsis | TokenKind::ColonEqual | TokenKind::Colon + | TokenKind::And + | TokenKind::Or + | TokenKind::Not + | TokenKind::In + | TokenKind::Is ) } @@ -324,7 +277,7 @@ impl TokenKind { } #[inline] - pub const fn is_skip_comment(&self) -> bool { + pub const fn is_trivia(&self) -> bool { matches!( self, TokenKind::Newline @@ -344,10 +297,29 @@ impl TokenKind { | TokenKind::Plus | TokenKind::Minus | TokenKind::Slash + | TokenKind::DoubleSlash | TokenKind::At ) } + #[inline] + pub const fn is_bitwise_or_shift(&self) -> bool { + matches!( + self, + TokenKind::LeftShift + | TokenKind::LeftShiftEqual + | TokenKind::RightShift + | TokenKind::RightShiftEqual + | TokenKind::Amper + | TokenKind::AmperEqual + | TokenKind::Vbar + | TokenKind::VbarEqual + | TokenKind::CircumFlex + | TokenKind::CircumflexEqual + | TokenKind::Tilde + ) + } + #[inline] pub const fn is_soft_keyword(&self) -> bool { matches!(self, TokenKind::Match | TokenKind::Case)