@ -4,11 +4,13 @@ use std::str::Chars;
mod cursor ;
mod cursor ;
use cursor ::Cursor ;
use cursor ::Cursor ;
mod token ;
pub ( crate ) mod token ;
use token ::Token ;
use token ::Token ;
pub struct Lexer < ' a > {
pub struct Lexer < ' a > {
cursor : Cursor < ' a > ,
cursor : Cursor < ' a > ,
history : Vec < Token > ,
offset : usize ,
token_line : usize ,
token_line : usize ,
token_col : usize ,
token_col : usize ,
}
}
@ -39,7 +41,13 @@ impl Iterator for Lexer<'_> {
type Item = Result < Token , SyntaxError > ;
type Item = Result < Token , SyntaxError > ;
fn next ( & mut self ) -> Option < Result < Token , SyntaxError > > {
fn next ( & mut self ) -> Option < Result < Token , SyntaxError > > {
Some ( match self . cursor . next ( ) ? {
if self . offset > 0 {
let tmp = self . history [ self . history . len ( ) - self . offset ] ;
self . offset - = 1 ;
return Some ( Ok ( tmp ) ) ;
}
let result = match self . cursor . next ( ) ? {
c if c . is_ascii_whitespace ( ) = > {
c if c . is_ascii_whitespace ( ) = > {
self . cursor . skip_whitespace ( ) ;
self . cursor . skip_whitespace ( ) ;
self . cursor . chop ( ) ;
self . cursor . chop ( ) ;
@ -47,11 +55,18 @@ impl Iterator for Lexer<'_> {
}
}
',' = > self . token_ok ( token ::Kind ::Comma ) ,
',' = > self . token_ok ( token ::Kind ::Comma ) ,
';' = > self . token_ok ( token ::Kind ::Semi ) ,
';' = > self . token_ok ( token ::Kind ::Semi ) ,
'=' = > self . token_ok ( token ::Kind ::Eq ) ,
'{' = > self . token_ok ( token ::Kind ::OBrace ) ,
'{' = > self . token_ok ( token ::Kind ::OBrace ) ,
'}' = > self . token_ok ( token ::Kind ::CBrace ) ,
'}' = > self . token_ok ( token ::Kind ::CBrace ) ,
'[' = > self . token_ok ( token ::Kind ::OBracket ) ,
'[' = > self . token_ok ( token ::Kind ::OBracket ) ,
']' = > self . token_ok ( token ::Kind ::CBracket ) ,
']' = > self . token_ok ( token ::Kind ::CBracket ) ,
'=' = > self . token_ok ( token ::Kind ::Eq ) ,
'+' = > self . token_ok ( token ::Kind ::Plus ) ,
'-' = > self . token_ok ( token ::Kind ::Minus ) ,
'*' = > self . token_ok ( token ::Kind ::Asterisk ) ,
'/' = > self . token_ok ( token ::Kind ::Slash ) ,
'%' = > self . token_ok ( token ::Kind ::Percent ) ,
'#' = > self . read_comment ( ) ,
'#' = > self . read_comment ( ) ,
'"' = > self . read_string_literal ( ) ,
'"' = > self . read_string_literal ( ) ,
'0' = > self . read_prefix_int_literal ( ) ,
'0' = > self . read_prefix_int_literal ( ) ,
@ -59,7 +74,12 @@ impl Iterator for Lexer<'_> {
_c @ 'A' ..= 'Z' = > self . read_ident ( ) ,
_c @ 'A' ..= 'Z' = > self . read_ident ( ) ,
_c @ 'a' ..= 'z' = > self . read_keyword_or_ident ( ) , // keywords are always lowercase
_c @ 'a' ..= 'z' = > self . read_keyword_or_ident ( ) , // keywords are always lowercase
c = > self . syntax_error ( format! ( "Unexpected character '{}'" , c ) ) ,
c = > self . syntax_error ( format! ( "Unexpected character '{}'" , c ) ) ,
} )
} ;
if let Ok ( token ) = result {
self . history . push ( token ) ;
}
Some ( result )
}
}
}
}
@ -67,11 +87,43 @@ impl<'a> Lexer<'a> {
pub fn new ( stream : Chars < ' a > ) -> Lexer < ' a > {
pub fn new ( stream : Chars < ' a > ) -> Lexer < ' a > {
Lexer {
Lexer {
cursor : Cursor ::new ( stream ) ,
cursor : Cursor ::new ( stream ) ,
history : Vec ::new ( ) ,
offset : 0 ,
token_line : 1 ,
token_line : 1 ,
token_col : 1 ,
token_col : 1 ,
}
}
}
}
pub fn peek ( & mut self ) -> Option < Result < Token , SyntaxError > > {
let t = self . next ( ) ? ;
self . prev ( ) ;
Some ( t )
}
pub fn prev ( & mut self ) -> Option < & Token > {
let prev = self . history . last ( ) ? ;
self . offset + = 1 ;
Some ( prev )
}
pub fn expect_kind ( & mut self , kind : token ::Kind ) -> Result < Token , SyntaxError > {
match self . next ( ) {
Some ( t ) = > if t ? . kind = = kind {
Ok ( t ? )
} else {
self . syntax_error ( format! ( "Expected {}, got {}" , kind , t ? . kind ) )
}
None = > self . syntax_error ( "Unexpected EOF" ) ,
}
}
pub fn require_next ( & mut self ) -> Result < Token , SyntaxError > {
match self . next ( ) {
Some ( t ) = > t ,
None = > self . syntax_error ( "Unexpected EOF" ) ,
}
}
fn read_keyword_or_ident ( & mut self ) -> Result < Token , SyntaxError > {
fn read_keyword_or_ident ( & mut self ) -> Result < Token , SyntaxError > {
let current = self . cursor . current ( ) . unwrap ( ) ;
let current = self . cursor . current ( ) . unwrap ( ) ;
for kw in & KEYWORDS {
for kw in & KEYWORDS {
@ -124,7 +176,7 @@ impl<'a> Lexer<'a> {
Some ( 'o' ) = > self . read_int_literal ( 8 ) ,
Some ( 'o' ) = > self . read_int_literal ( 8 ) ,
Some ( 'b' ) = > self . read_int_literal ( 2 ) ,
Some ( 'b' ) = > self . read_int_literal ( 2 ) ,
Some ( c ) = > self . syntax_error ( format! ( "Unexpected character '{}'" , c ) ) ,
Some ( c ) = > self . syntax_error ( format! ( "Unexpected character '{}'" , c ) ) ,
None = > self . syntax_error ( String ::from ( "Unexpected end-of-file" ) ) ,
None = > self . syntax_error ( "Unexpected end-of-file" ) ,
}
}
}
}
@ -175,7 +227,7 @@ impl<'a> Lexer<'a> {
true
true
}
}
fn syntax_error < T > ( & mut self , msg : String ) -> Result < T , SyntaxError > {
fn syntax_error < T > ( & mut self , msg : & str ) -> Result < T , SyntaxError > {
Err ( SyntaxError {
Err ( SyntaxError {
line : self . cursor . line ( ) ,
line : self . cursor . line ( ) ,
col : self . cursor . col ( ) ,
col : self . cursor . col ( ) ,