lex: support backwards seeking and more tokens

The lexer now records every token it encounters
internally such that peeking and other useful
stuff is possible.  Will come in handy for the
parser.  Also, while i was at it, i also added
more primitive tokens like + - * / because i'm
probably gonna need it later anyway.
main
anna 2 years ago
parent 8ae3bb2f57
commit 68254757a3
Signed by: fef
GPG Key ID: EC22E476DC2D3D84

@ -4,11 +4,13 @@ use std::str::Chars;
mod cursor;
use cursor::Cursor;
mod token;
pub(crate) mod token;
use token::Token;
pub struct Lexer<'a> {
cursor: Cursor<'a>,
history: Vec<Token>,
offset: usize,
token_line: usize,
token_col: usize,
}
@ -39,7 +41,13 @@ impl Iterator for Lexer<'_> {
type Item = Result<Token, SyntaxError>;
fn next(&mut self) -> Option<Result<Token, SyntaxError>> {
Some(match self.cursor.next()? {
if self.offset > 0 {
let tmp = self.history[self.history.len() - self.offset];
self.offset -= 1;
return Some(Ok(tmp));
}
let result = match self.cursor.next()? {
c if c.is_ascii_whitespace() => {
self.cursor.skip_whitespace();
self.cursor.chop();
@ -47,11 +55,18 @@ impl Iterator for Lexer<'_> {
}
',' => self.token_ok(token::Kind::Comma),
';' => self.token_ok(token::Kind::Semi),
'=' => self.token_ok(token::Kind::Eq),
'{' => self.token_ok(token::Kind::OBrace),
'}' => self.token_ok(token::Kind::CBrace),
'[' => self.token_ok(token::Kind::OBracket),
']' => self.token_ok(token::Kind::CBracket),
'=' => self.token_ok(token::Kind::Eq),
'+' => self.token_ok(token::Kind::Plus),
'-' => self.token_ok(token::Kind::Minus),
'*' => self.token_ok(token::Kind::Asterisk),
'/' => self.token_ok(token::Kind::Slash),
'%' => self.token_ok(token::Kind::Percent),
'#' => self.read_comment(),
'"' => self.read_string_literal(),
'0' => self.read_prefix_int_literal(),
@ -59,7 +74,12 @@ impl Iterator for Lexer<'_> {
_c @ 'A'..='Z' => self.read_ident(),
_c @ 'a'..='z' => self.read_keyword_or_ident(), // keywords are always lowercase
c => self.syntax_error(format!("Unexpected character '{}'", c)),
})
};
if let Ok(token) = result {
self.history.push(token);
}
Some(result)
}
}
@ -67,11 +87,43 @@ impl<'a> Lexer<'a> {
pub fn new(stream: Chars<'a>) -> Lexer<'a> {
Lexer {
cursor: Cursor::new(stream),
history: Vec::new(),
offset: 0,
token_line: 1,
token_col: 1,
}
}
pub fn peek(&mut self) -> Option<Result<Token, SyntaxError>> {
let t = self.next()?;
self.prev();
Some(t)
}
pub fn prev(&mut self) -> Option<&Token> {
let prev = self.history.last()?;
self.offset += 1;
Some(prev)
}
pub fn expect_kind(&mut self, kind: token::Kind) -> Result<Token, SyntaxError> {
match self.next() {
Some(t) => if t?.kind == kind {
Ok(t?)
} else {
self.syntax_error(format!("Expected {}, got {}", kind, t?.kind))
}
None => self.syntax_error("Unexpected EOF"),
}
}
pub fn require_next(&mut self) -> Result<Token, SyntaxError> {
match self.next() {
Some(t) => t,
None => self.syntax_error("Unexpected EOF"),
}
}
fn read_keyword_or_ident(&mut self) -> Result<Token, SyntaxError> {
let current = self.cursor.current().unwrap();
for kw in &KEYWORDS {
@ -124,7 +176,7 @@ impl<'a> Lexer<'a> {
Some('o') => self.read_int_literal(8),
Some('b') => self.read_int_literal(2),
Some(c) => self.syntax_error(format!("Unexpected character '{}'", c)),
None => self.syntax_error(String::from("Unexpected end-of-file")),
None => self.syntax_error("Unexpected end-of-file"),
}
}
@ -175,7 +227,7 @@ impl<'a> Lexer<'a> {
true
}
fn syntax_error<T>(&mut self, msg: String) -> Result<T, SyntaxError> {
fn syntax_error<T>(&mut self, msg: &str) -> Result<T, SyntaxError> {
Err(SyntaxError {
line: self.cursor.line(),
col: self.cursor.col(),

@ -18,17 +18,23 @@ impl fmt::Display for Token {
}
}
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Kind {
Ident,
OBrace,
CBrace,
OBracket,
CBracket,
Eq,
Comma,
Semi,
Eq,
Plus,
Minus,
Asterisk,
Slash,
Percent,
DependKeyword,
IncludeKeyword,
ModuleKeyword,
@ -52,10 +58,16 @@ impl fmt::Display for Kind {
Kind::CBrace => "cbrace",
Kind::OBracket => "obracket",
Kind::CBracket => "cbracket",
Kind::Eq => "eq",
Kind::Comma => "comma",
Kind::Semi => "semi",
Kind::Eq => "eq",
Kind::Plus => "plus",
Kind::Minus => "minus",
Kind::Asterisk => "asterisk",
Kind::Slash => "slash",
Kind::Percent => "percent",
Kind::DependKeyword => "keyword",
Kind::IncludeKeyword => "keyword",
Kind::ModuleKeyword => "keyword",

Loading…
Cancel
Save