Skip to content

Commit

Permalink
Move triple-quoted string detection into Indexer method (#4495)
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh authored May 18, 2023
1 parent 0e4d174 commit d3b1834
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 33 deletions.
3 changes: 1 addition & 2 deletions crates/ruff/src/checkers/physical_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ pub(crate) fn check_physical_lines(

let mut commented_lines_iter = indexer.comment_ranges().iter().peekable();
let mut doc_lines_iter = doc_lines.iter().peekable();
let string_lines = indexer.triple_quoted_string_ranges();

for (index, line) in locator.contents().universal_newlines().enumerate() {
while commented_lines_iter
Expand Down Expand Up @@ -151,7 +150,7 @@ pub(crate) fn check_physical_lines(
}

if enforce_tab_indentation {
if let Some(diagnostic) = tab_indentation(&line, string_lines) {
if let Some(diagnostic) = tab_indentation(&line, indexer) {
diagnostics.push(diagnostic);
}
}
Expand Down
27 changes: 7 additions & 20 deletions crates/ruff/src/rules/pycodestyle/rules/tab_indentation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::newlines::Line;
use ruff_python_ast::source_code::Indexer;
use ruff_python_ast::whitespace::leading_space;

#[violation]
Expand All @@ -16,31 +17,17 @@ impl Violation for TabIndentation {
}

/// W191
pub(crate) fn tab_indentation(line: &Line, string_ranges: &[TextRange]) -> Option<Diagnostic> {
pub(crate) fn tab_indentation(line: &Line, indexer: &Indexer) -> Option<Diagnostic> {
let indent = leading_space(line);
if let Some(tab_index) = indent.find('\t') {
let tab_offset = line.start() + TextSize::try_from(tab_index).unwrap();

let string_range_index = string_ranges.binary_search_by(|range| {
if tab_offset < range.start() {
std::cmp::Ordering::Greater
} else if range.contains(tab_offset) {
std::cmp::Ordering::Equal
} else {
std::cmp::Ordering::Less
}
});

// If the tab character is within a multi-line string, abort.
if string_range_index.is_ok() {
None
} else {
Some(Diagnostic::new(
let tab_offset = line.start() + TextSize::try_from(tab_index).unwrap();
if indexer.triple_quoted_string_range(tab_offset).is_none() {
return Some(Diagnostic::new(
TabIndentation,
TextRange::at(line.start(), indent.text_len()),
))
));
}
} else {
None
}
None
}
33 changes: 22 additions & 11 deletions crates/ruff_python_ast/src/source_code/indexer.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
//! Struct used to index source code, to enable efficient lookup of tokens that
//! are omitted from the AST (e.g., commented lines).

use crate::source_code::Locator;
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::lexer::LexResult;
use rustpython_parser::{StringKind, Tok};

use crate::source_code::Locator;

pub struct Indexer {
/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
comment_ranges: Vec<TextRange>,
Expand Down Expand Up @@ -105,18 +106,28 @@ impl Indexer {
&self.continuation_lines
}

/// Return a slice of all ranges that include a triple-quoted string. The ranges are sorted by
/// [`TextRange::start`] in increasing order. No two ranges are overlapping.
pub fn triple_quoted_string_ranges(&self) -> &[TextRange] {
&self.triple_quoted_string_ranges
}

/// Returns `true` if the given offset is part of a continuation line.
pub fn is_continuation(&self, offset: TextSize, locator: &Locator) -> bool {
let line_start = locator.line_start(offset);
self.continuation_lines.binary_search(&line_start).is_ok()
}

/// Return the [`TextRange`] of the triple-quoted-string containing a given offset.
pub fn triple_quoted_string_range(&self, offset: TextSize) -> Option<TextRange> {
let Ok(string_range_index) = self.triple_quoted_string_ranges.binary_search_by(|range| {
if offset < range.start() {
std::cmp::Ordering::Greater
} else if range.contains(offset) {
std::cmp::Ordering::Equal
} else {
std::cmp::Ordering::Less
}
}) else {
return None;
};
Some(self.triple_quoted_string_ranges[string_range_index])
}

/// Return the [`TextRange`] of the f-string containing a given offset.
pub fn f_string_range(&self, offset: TextSize) -> Option<TextRange> {
let Ok(string_range_index) = self.f_string_ranges.binary_search_by(|range| {
Expand Down Expand Up @@ -228,7 +239,7 @@ import os
let contents = r#""this is a single-quoted string""#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(indexer.triple_quoted_string_ranges(), []);
assert_eq!(indexer.triple_quoted_string_ranges, []);

let contents = r#"
"""
Expand All @@ -238,7 +249,7 @@ import os
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.triple_quoted_string_ranges(),
indexer.triple_quoted_string_ranges,
[TextRange::new(TextSize::from(13), TextSize::from(71))]
);

Expand All @@ -250,7 +261,7 @@ import os
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.triple_quoted_string_ranges(),
indexer.triple_quoted_string_ranges,
[TextRange::new(TextSize::from(13), TextSize::from(107))]
);

Expand All @@ -267,7 +278,7 @@ import os
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.triple_quoted_string_ranges(),
indexer.triple_quoted_string_ranges,
&[
TextRange::new(TextSize::from(13), TextSize::from(85)),
TextRange::new(TextSize::from(98), TextSize::from(161))
Expand Down

0 comments on commit d3b1834

Please sign in to comment.