Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow underscores in integer literals #3746

Merged
merged 4 commits into from
Dec 9, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 44 additions & 8 deletions compiler/noirc_frontend/src/lexer/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
position: Position,
done: bool,
skip_comments: bool,
skip_whitespaces: bool,

Check warning on line 19 in compiler/noirc_frontend/src/lexer/lexer.rs

View workflow job for this annotation

GitHub Actions / Spellcheck / Spellcheck

Unknown word (whitespaces)
}

pub type SpannedTokenResult = Result<SpannedToken, LexerErrorKind>;
Expand All @@ -43,7 +43,7 @@
position: 0,
done: false,
skip_comments: true,
skip_whitespaces: true,

Check warning on line 46 in compiler/noirc_frontend/src/lexer/lexer.rs

View workflow job for this annotation

GitHub Actions / Spellcheck / Spellcheck

Unknown word (whitespaces)
}
}

Expand All @@ -52,8 +52,8 @@
self
}

pub fn skip_whitespaces(mut self, flag: bool) -> Self {

Check warning on line 55 in compiler/noirc_frontend/src/lexer/lexer.rs

View workflow job for this annotation

GitHub Actions / Spellcheck / Spellcheck

Unknown word (whitespaces)
self.skip_whitespaces = flag;

Check warning on line 56 in compiler/noirc_frontend/src/lexer/lexer.rs

View workflow job for this annotation

GitHub Actions / Spellcheck / Spellcheck

Unknown word (whitespaces)
self
}

Expand Down Expand Up @@ -96,7 +96,7 @@
match self.next_char() {
Some(x) if x.is_whitespace() => {
let spanned = self.eat_whitespace(x);
if self.skip_whitespaces {

Check warning on line 99 in compiler/noirc_frontend/src/lexer/lexer.rs

View workflow job for this annotation

GitHub Actions / Spellcheck / Spellcheck

Unknown word (whitespaces)
self.next_token()
} else {
Ok(spanned)
Expand Down Expand Up @@ -323,11 +323,29 @@
let start = self.position;

let integer_str = self.eat_while(Some(initial_char), |ch| {
ch.is_ascii_digit() | ch.is_ascii_hexdigit() | (ch == 'x')
ch.is_ascii_digit() | ch.is_ascii_hexdigit() | (ch == 'x') | (ch == '_')
});

let end = self.position;

// We want to enforce some simple rules about usage of underscores:
// 1. Underscores cannot appear at the end of a integer literal. e.g. 0x123_.
// 2. There cannot be more than one underscore consecutively, e.g. 0x5__5, 5__5.
//
// We're not concerned with an underscore at the beginning of a decimal literal
// such as `_5` as this would be lexed into an ident rather than an integer literal.
let invalid_underscore_location = integer_str.ends_with('_');
let consecutive_underscores = integer_str.contains("__");
if invalid_underscore_location || consecutive_underscores {
return Err(LexerErrorKind::InvalidIntegerLiteral {
span: Span::inclusive(start, end),
found: integer_str,
});
}

// Underscores needs to be stripped out before the literal can be converted to a `FieldElement.
let integer_str = integer_str.replace('_', "");

let integer = match FieldElement::try_from_str(&integer_str) {
None => {
return Err(LexerErrorKind::InvalidIntegerLiteral {
Expand Down Expand Up @@ -930,15 +948,33 @@
}

#[test]
fn test_eat_hex_int() {
let input = "0x05";

let expected = vec![Token::Int(5_i128.into())];
let mut lexer = Lexer::new(input);
fn test_eat_integer_literals() {
let test_cases: Vec<(&str, Token)> = vec![
("0x05", Token::Int(5_i128.into())),
("5", Token::Int(5_i128.into())),
("0x1234_5678", Token::Int(0x1234_5678_u128.into())),
("0x_01", Token::Int(0x1_u128.into())),
("1_000_000", Token::Int(1_000_000_u128.into())),
];

for token in expected.into_iter() {
for (input, expected_token) in test_cases {
let mut lexer = Lexer::new(input);
let got = lexer.next_token().unwrap();
assert_eq!(got, token);
assert_eq!(got.token(), &expected_token);
}
}

#[test]
fn test_reject_invalid_underscores_in_integer_literal() {
let test_cases: Vec<&str> = vec!["0x05_", "5_", "5__5", "0x5__5"];

for input in test_cases {
let mut lexer = Lexer::new(input);
let token = lexer.next_token();
assert!(
matches!(token, Err(LexerErrorKind::InvalidIntegerLiteral { .. })),
"expected {input} to throw error"
);
}
}

Expand Down
Loading