From 6d5e7cb406d0f6f228be42567203f23ecc7028b7 Mon Sep 17 00:00:00 2001 From: fef Date: Fri, 29 Jul 2022 03:37:20 +0200 Subject: [PATCH] lex: fix erroneous keyword recognition I tried to parse a function called "fn_with_callback" for the upcoming function support and realized that the lexer mistook the first two letters for the respective keyword. So that is fixed now, i guess. --- src/lex/mod.rs | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/lex/mod.rs b/src/lex/mod.rs index b744d8d..574a7e5 100644 --- a/src/lex/mod.rs +++ b/src/lex/mod.rs @@ -192,7 +192,11 @@ impl Lexer { if kinds.contains(&t.kind) { Ok(t) } else { - self.syntax_error(format!("Expected one of {:?}, got {}", kinds, t.kind)) + self.syntax_error(if kinds.len() == 1 { + format!("Expected {}, got {}", kinds[0], t.kind) + } else { + format!("Expected one of {:?}, got {}", kinds, t.kind) + }) } } Some(Err(e)) => Err(e), @@ -213,7 +217,18 @@ impl Lexer { // keywords are always at least 2 characters long as per the language spec let first_char = kw.raw.chars().next().unwrap(); if current == first_char && self.skip_if_match(&kw.raw[1..]) { - return self.token_ok(kw.kind); + // We need to account for identifiers that just happen to start + // with a keyword name. For example, "settings" begins with the + // "set" keyword, but is obviously still an identifier. + let is_really_a_keyword = match self.cursor.peek() { + Some(c) if c.is_ident_part() => false, + _ => true, + }; + if is_really_a_keyword { + return self.token_ok(kw.kind); + } else { + break; + } } } @@ -222,7 +237,7 @@ impl Lexer { fn read_ident(&mut self) -> Result { for c in &mut self.cursor { - if !c.is_ascii_alphanumeric() && c != '_' { + if !c.is_ident_part() { self.cursor.prev(); break; } @@ -326,3 +341,17 @@ impl Lexer { )) } } + +trait IsIdentifier { + fn is_ident_start(&self) -> bool; + fn is_ident_part(&self) -> bool; +} + +impl IsIdentifier for char { + fn is_ident_start(&self) -> bool { + self.is_ascii_alphabetic() || self == &'_' + } + fn is_ident_part(&self) -> bool { + self.is_ascii_alphanumeric() || self == &'_' + } +}