cursor: refactor to only work on strings

main
anna 2 years ago
parent c7fc83f844
commit c7e4244972
Signed by: fef
GPG Key ID: EC22E476DC2D3D84

@ -1,27 +1,27 @@
use std::fmt; use std::{fmt, io};
use std::fmt::Formatter;
use crate::ast::tree::Type; use crate::ast::tree::Type;
use crate::lex::token::Position;
/// This is just a wrapper for the actual error types. /// This is just a wrapper for the actual error types.
/// I have no idea whether this is good design (probably not), /// I have no idea whether this is good design (probably not),
/// but idc for now. Shouldn't be too hard to change the API /// but idc for now. Shouldn't be too hard to change the API
/// later on bc each component of the compiler has its own /// later on bc each component of the compiler has its own
/// wrappers for instantiating errors anyway. /// wrappers for instantiating errors anyway.
#[derive(Debug)]
pub struct Error { pub struct Error {
e: Box<dyn ErrorDetails>, e: Box<dyn ErrorDetails>,
} }
impl Error { impl Error {
pub fn syntax_error(file: String, line: usize, col: usize, msg: String) -> Error { pub fn syntax_error(pos: Position, msg: String) -> Error {
Error { Error {
e: Box::new(SyntaxError::new(file, line, col, msg)) e: Box::new(SyntaxError::new(pos, msg)),
} }
} }
pub fn type_error(file: String, line: usize, col: usize, expected: Type, actual: Type) -> Error { pub fn type_error(pos: Position, expected: Type, actual: Type) -> Error {
Error { Error {
e: Box::new(TypeError::new(file, line, col, expected, actual)) e: Box::new(TypeError::new(pos, expected, actual)),
} }
} }
@ -56,13 +56,6 @@ trait ErrorDetails {
fn name(&self) -> &str; fn name(&self) -> &str;
} }
#[derive(Debug)]
struct Position {
file: String,
line: usize,
col: usize,
}
#[derive(Debug)] #[derive(Debug)]
pub struct SyntaxError { pub struct SyntaxError {
pos: Position, pos: Position,
@ -72,31 +65,35 @@ pub struct SyntaxError {
#[derive(Debug)] #[derive(Debug)]
pub struct TypeError { pub struct TypeError {
pos: Position, pos: Position,
expected: Type, msg: String,
actual: Type,
} }
impl fmt::Display for dyn ErrorDetails { impl fmt::Debug for Error {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} in {}:{}:{}: {}", self.name(), self.file(), self.line(), self.col(), self.msg()) let e = &self.e;
write!(
f,
"{} in {}:{}:{}: {}",
e.name(),
e.file(),
e.line(),
e.col(),
e.msg()
)
} }
} }
impl SyntaxError { impl SyntaxError {
pub fn new(file: String, line: usize, col: usize, msg: String) -> SyntaxError { pub fn new(pos: Position, msg: String) -> SyntaxError {
SyntaxError { SyntaxError { pos, msg }
pos: Position { file, line, col },
msg,
}
} }
} }
impl TypeError { impl TypeError {
pub fn new(file: String, line: usize, col: usize, expected: Type, actual: Type) -> TypeError { pub fn new(pos: Position, expected: Type, actual: Type) -> TypeError {
TypeError { TypeError {
pos: Position { file, line, col }, pos,
expected, msg: format!("Expected type {}, got {} instead", expected, actual),
actual,
} }
} }
} }
@ -137,7 +134,7 @@ impl ErrorDetails for TypeError {
} }
fn msg(&self) -> &String { fn msg(&self) -> &String {
&format!("Expected type {}, got {} instead", self.expected, self.actual) &self.msg
} }
fn name(&self) -> &str { fn name(&self) -> &str {

@ -1,52 +1,41 @@
use std::str::Chars; /// Convenience helper for iterating over the characters in a string.
/// Supports backwards seeking and tracks line/column numbers.
/// A cursor for iterating over individual characters in a stream. pub struct Cursor {
/// Supports backwards seeking. raw: Vec<char>, // array of all characters
pub struct Cursor<'a> { pos: usize, // index (in `raw`) of the *next* character to be read
stream: Chars<'a>, line_lengths: Vec<usize>, // previous line lengths (for seeking back)
offset: usize, // where we pull our characters from line: usize, // current line (counting from 1)
history: Vec<char>, // how many chars we are behind the stream position (when seeking back) col: usize, // current column (counting from 1)
line_lengths: Vec<usize>, // complete list of all characters we've read so far chop: usize, // value of `pos` when `chop()` was called the last time
line: usize, // length of all previous lines (for seeking back) current: Option<char>, // current character
col: usize, // current line (starting from 1)
pos: usize, // current column in line (starting from 1)
chop: usize,
current: Option<char>,
} }
impl Iterator for Cursor<'_> { impl Iterator for Cursor {
type Item = char; type Item = char;
fn next(&mut self) -> Option<char> { fn next(&mut self) -> Option<char> {
let c = if self.offset > 0 { if self.pos < self.raw.len() {
let tmp = self.history[self.history.len() - self.offset]; let c = self.raw[self.pos];
self.offset -= 1; self.pos += 1;
tmp
} else {
let tmp = self.stream.next()?;
self.history.push(tmp);
tmp
};
self.current = Some(c); if c == '\n' {
self.new_line();
} else {
self.col += 1;
}
if c == '\n' { self.current = Some(c);
self.new_line(); Some(c)
} else { } else {
self.col += 1; None
} }
self.pos += 1;
Some(c)
} }
} }
impl<'a> Cursor<'a> { impl Cursor {
pub fn new(stream: Chars<'a>) -> Cursor<'a> { pub fn new(raw: String) -> Cursor {
Cursor { Cursor {
stream, raw: Vec::from_iter(raw.chars()),
offset: 0,
history: Vec::new(),
line_lengths: Vec::new(), line_lengths: Vec::new(),
line: 1, line: 1,
col: 0, // increments in first call to next() col: 0, // increments in first call to next()
@ -58,18 +47,20 @@ impl<'a> Cursor<'a> {
/// Reverse the cursor by a single character. /// Reverse the cursor by a single character.
pub fn prev(&mut self) -> Option<char> { pub fn prev(&mut self) -> Option<char> {
if self.history.len() == 0 { if self.pos > 0 {
None self.pos -= 1;
} else { let c = self.raw[self.pos];
self.offset += 1;
let c = self.history[self.history.len() - self.offset];
if self.col == 0 { if self.col == 0 {
self.prev_line(); self.prev_line();
} else { } else {
self.col -= 1; self.col -= 1;
} }
self.pos -= 1;
self.current = Some(c);
Some(c) Some(c)
} else {
None
} }
} }
@ -103,22 +94,20 @@ impl<'a> Cursor<'a> {
/// Return the next character without actually advancing the cursor. /// Return the next character without actually advancing the cursor.
pub fn peek(&mut self) -> Option<char> { pub fn peek(&mut self) -> Option<char> {
if self.offset > 0 { let c = self.next()?;
Some(self.history[self.history.len() - self.offset]) self.prev();
} else { Some(c)
let c = self.next()?;
self.prev();
Some(c)
}
} }
pub fn skip_whitespace(&mut self) { pub fn skip_whitespace(&mut self) {
self.seek_while(|c| c.is_ascii_whitespace()); self.seek_while(|c| c.is_ascii_whitespace());
} }
/// Return a string of every character since
/// the last time this method was called.
pub fn chop(&mut self) -> String { pub fn chop(&mut self) -> String {
assert!(self.pos >= self.chop); assert!(self.pos >= self.chop);
let s = String::from_iter(self.history[self.chop..self.pos].into_iter()); let s = String::from_iter(self.raw[self.chop..self.pos].into_iter());
self.chop = self.pos; self.chop = self.pos;
s s
} }
@ -149,7 +138,7 @@ impl<'a> Cursor<'a> {
fn prev_line(&mut self) { fn prev_line(&mut self) {
assert!(self.line > 0); assert!(self.line > 0);
assert!(self.col == 0); assert_eq!(self.col, 0);
self.col = self.line_lengths.pop().unwrap(); self.col = self.line_lengths.pop().unwrap();
self.line -= 1; self.line -= 1;
} }

@ -1,16 +1,14 @@
use std::str::Chars;
mod cursor; mod cursor;
use cursor::Cursor; use cursor::Cursor;
pub(crate) mod token; pub(crate) mod token;
use token::Token; use token::{Position, Token};
use crate::error::Error; use crate::error::Error;
pub struct Lexer<'a> { pub struct Lexer {
file: String, file: String,
cursor: Cursor<'a>, cursor: Cursor,
history: Vec<Token>, history: Vec<Token>,
offset: usize, offset: usize,
token_line: usize, token_line: usize,
@ -27,6 +25,7 @@ struct KeywordMap {
} }
const fn kw(raw: &'static str, kind: token::Kind) -> KeywordMap { const fn kw(raw: &'static str, kind: token::Kind) -> KeywordMap {
assert!(raw.len() >= 2);
KeywordMap { raw, kind } KeywordMap { raw, kind }
} }
@ -39,7 +38,7 @@ static KEYWORDS: [KeywordMap; 6] = [
kw("type", token::Kind::TypeKeyword), kw("type", token::Kind::TypeKeyword),
]; ];
impl Iterator for Lexer<'_> { impl Iterator for Lexer {
type Item = Result<Token, Error>; type Item = Result<Token, Error>;
fn next(&mut self) -> Option<Result<Token, Error>> { fn next(&mut self) -> Option<Result<Token, Error>> {
@ -70,7 +69,8 @@ impl Iterator for Lexer<'_> {
'%' => self.token_ok(token::Kind::Percent), '%' => self.token_ok(token::Kind::Percent),
'#' => { '#' => {
self.read_comment().unwrap(); // this can't fail // this can't fail
self.read_comment().unwrap();
// we don't need comments for now and they would // we don't need comments for now and they would
// only confuse the parser so let's just Not // only confuse the parser so let's just Not
self.next()? self.next()?
@ -90,11 +90,11 @@ impl Iterator for Lexer<'_> {
} }
} }
impl<'a> Lexer<'a> { impl Lexer {
pub fn new(file: String, stream: Chars<'a>) -> Lexer<'a> { pub fn new(filename: String, raw: String) -> Lexer {
Lexer { Lexer {
file, file: filename,
cursor: Cursor::new(stream), cursor: Cursor::new(raw),
history: Vec::new(), history: Vec::new(),
offset: 0, offset: 0,
token_line: 1, token_line: 1,
@ -123,12 +123,18 @@ impl<'a> Lexer<'a> {
} }
pub fn expect_kind(&mut self, kind: token::Kind) -> Result<Token, Error> { pub fn expect_kind(&mut self, kind: token::Kind) -> Result<Token, Error> {
self.expect_kinds(&[kind])
}
pub fn expect_kinds(&mut self, kinds: &[token::Kind]) -> Result<Token, Error> {
match self.next() { match self.next() {
Some(Ok(t)) => if t.kind == kind { Some(Ok(t)) => {
Ok(t) if kinds.contains(&t.kind) {
} else { Ok(t)
self.syntax_error(format!("Expected {}, got {}", kind, t.kind)) } else {
}, self.syntax_error(format!("Expected one of {:?}, got {}", kinds, t.kind))
}
}
Some(Err(e)) => Err(e), Some(Err(e)) => Err(e),
None => self.syntax_error(String::from("Unexpected EOF")), None => self.syntax_error(String::from("Unexpected EOF")),
} }
@ -189,7 +195,7 @@ impl<'a> Lexer<'a> {
} }
fn read_prefix_int_literal(&mut self) -> Result<Token, Error> { fn read_prefix_int_literal(&mut self) -> Result<Token, Error> {
assert_eq!(self.cursor.next(), Some('0')); assert_eq!(self.cursor.current(), Some('0'));
match self.cursor.next() { match self.cursor.next() {
Some('x') => self.read_int_literal(16), Some('x') => self.read_int_literal(16),
Some('o') => self.read_int_literal(8), Some('o') => self.read_int_literal(8),
@ -215,8 +221,11 @@ impl<'a> Lexer<'a> {
fn token(&mut self, kind: token::Kind, raw: String) -> Token { fn token(&mut self, kind: token::Kind, raw: String) -> Token {
let t = Token { let t = Token {
kind, kind,
line: self.token_line, pos: Position {
col: self.token_col, file: self.file.clone(),
line: self.token_line,
col: self.token_col,
},
raw, raw,
}; };
self.token_line = self.cursor.line(); self.token_line = self.cursor.line();
@ -248,10 +257,12 @@ impl<'a> Lexer<'a> {
fn syntax_error<T>(&mut self, msg: String) -> Result<T, Error> { fn syntax_error<T>(&mut self, msg: String) -> Result<T, Error> {
Err(Error::syntax_error( Err(Error::syntax_error(
self.file.clone(), Position {
self.cursor.line(), file: self.file.clone(),
self.cursor.col(), line: self.cursor.line(),
msg col: self.cursor.col(),
},
msg,
)) ))
} }
} }

@ -4,14 +4,18 @@ use std::fmt;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Token { pub struct Token {
pub kind: Kind, pub kind: Kind,
/// line of the first character (starting from 1) pub pos: Position,
pub line: usize,
/// column of the first character (starting from 1)
pub col: usize,
/// raw text /// raw text
pub raw: String, pub raw: String,
} }
#[derive(Debug, Clone)]
pub struct Position {
pub file: String,
pub line: usize,
pub col: usize,
}
impl fmt::Display for Token { impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}: \"{}\"", self.kind, self.raw) write!(f, "{}: \"{}\"", self.kind, self.raw)

@ -7,7 +7,7 @@ mod error;
fn main() { fn main() {
let s = fs::read_to_string("test.gaybuild").unwrap(); let s = fs::read_to_string("test.gaybuild").unwrap();
let lexer = Lexer::new(String::from("test.gaybuild"), s.chars()); let lexer = Lexer::new(String::from("test.gaybuild"), s);
for token in lexer { for token in lexer {
println!("{}", token.unwrap()); println!("{}", token.unwrap());
} }

Loading…
Cancel
Save