diff --git a/src/lex/mod.rs b/src/lex/mod.rs index e6b97f0..b8274d8 100644 --- a/src/lex/mod.rs +++ b/src/lex/mod.rs @@ -4,11 +4,13 @@ use std::str::Chars; mod cursor; use cursor::Cursor; -mod token; +pub(crate) mod token; use token::Token; pub struct Lexer<'a> { cursor: Cursor<'a>, + history: Vec, + offset: usize, token_line: usize, token_col: usize, } @@ -39,7 +41,13 @@ impl Iterator for Lexer<'_> { type Item = Result; fn next(&mut self) -> Option> { - Some(match self.cursor.next()? { + if self.offset > 0 { + let tmp = self.history[self.history.len() - self.offset]; + self.offset -= 1; + return Some(Ok(tmp)); + } + + let result = match self.cursor.next()? { c if c.is_ascii_whitespace() => { self.cursor.skip_whitespace(); self.cursor.chop(); @@ -47,11 +55,18 @@ impl Iterator for Lexer<'_> { } ',' => self.token_ok(token::Kind::Comma), ';' => self.token_ok(token::Kind::Semi), - '=' => self.token_ok(token::Kind::Eq), '{' => self.token_ok(token::Kind::OBrace), '}' => self.token_ok(token::Kind::CBrace), '[' => self.token_ok(token::Kind::OBracket), ']' => self.token_ok(token::Kind::CBracket), + + '=' => self.token_ok(token::Kind::Eq), + '+' => self.token_ok(token::Kind::Plus), + '-' => self.token_ok(token::Kind::Minus), + '*' => self.token_ok(token::Kind::Asterisk), + '/' => self.token_ok(token::Kind::Slash), + '%' => self.token_ok(token::Kind::Percent), + '#' => self.read_comment(), '"' => self.read_string_literal(), '0' => self.read_prefix_int_literal(), @@ -59,7 +74,12 @@ impl Iterator for Lexer<'_> { _c @ 'A'..='Z' => self.read_ident(), _c @ 'a'..='z' => self.read_keyword_or_ident(), // keywords are always lowercase c => self.syntax_error(format!("Unexpected character '{}'", c)), - }) + }; + + if let Ok(token) = result { + self.history.push(token); + } + Some(result) } } @@ -67,11 +87,43 @@ impl<'a> Lexer<'a> { pub fn new(stream: Chars<'a>) -> Lexer<'a> { Lexer { cursor: Cursor::new(stream), + history: Vec::new(), + offset: 0, token_line: 1, token_col: 1, } } + pub fn peek(&mut self) -> Option> { + let t = self.next()?; + self.prev(); + Some(t) + } + + pub fn prev(&mut self) -> Option<&Token> { + let prev = self.history.last()?; + self.offset += 1; + Some(prev) + } + + pub fn expect_kind(&mut self, kind: token::Kind) -> Result { + match self.next() { + Some(t) => if t?.kind == kind { + Ok(t?) + } else { + self.syntax_error(format!("Expected {}, got {}", kind, t?.kind)) + } + None => self.syntax_error("Unexpected EOF"), + } + } + + pub fn require_next(&mut self) -> Result { + match self.next() { + Some(t) => t, + None => self.syntax_error("Unexpected EOF"), + } + } + fn read_keyword_or_ident(&mut self) -> Result { let current = self.cursor.current().unwrap(); for kw in &KEYWORDS { @@ -124,7 +176,7 @@ impl<'a> Lexer<'a> { Some('o') => self.read_int_literal(8), Some('b') => self.read_int_literal(2), Some(c) => self.syntax_error(format!("Unexpected character '{}'", c)), - None => self.syntax_error(String::from("Unexpected end-of-file")), + None => self.syntax_error("Unexpected end-of-file"), } } @@ -175,7 +227,7 @@ impl<'a> Lexer<'a> { true } - fn syntax_error(&mut self, msg: String) -> Result { + fn syntax_error(&mut self, msg: &str) -> Result { Err(SyntaxError { line: self.cursor.line(), col: self.cursor.col(), diff --git a/src/lex/token.rs b/src/lex/token.rs index ae749c1..3f971e9 100644 --- a/src/lex/token.rs +++ b/src/lex/token.rs @@ -18,17 +18,23 @@ impl fmt::Display for Token { } } -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq)] pub enum Kind { Ident, OBrace, CBrace, OBracket, CBracket, - Eq, Comma, Semi, + Eq, + Plus, + Minus, + Asterisk, + Slash, + Percent, + DependKeyword, IncludeKeyword, ModuleKeyword, @@ -52,10 +58,16 @@ impl fmt::Display for Kind { Kind::CBrace => "cbrace", Kind::OBracket => "obracket", Kind::CBracket => "cbracket", - Kind::Eq => "eq", Kind::Comma => "comma", Kind::Semi => "semi", + Kind::Eq => "eq", + Kind::Plus => "plus", + Kind::Minus => "minus", + Kind::Asterisk => "asterisk", + Kind::Slash => "slash", + Kind::Percent => "percent", + Kind::DependKeyword => "keyword", Kind::IncludeKeyword => "keyword", Kind::ModuleKeyword => "keyword",