cursor: refactor to only work on strings

main
anna 2 years ago
parent c7fc83f844
commit c7e4244972
Signed by: fef
GPG Key ID: EC22E476DC2D3D84

@ -1,27 +1,27 @@
use std::fmt;
use std::fmt::Formatter;
use std::{fmt, io};
use crate::ast::tree::Type;
use crate::lex::token::Position;
/// This is just a wrapper for the actual error types.
/// I have no idea whether this is good design (probably not),
/// but idc for now. Shouldn't be too hard to change the API
/// later on bc each component of the compiler has its own
/// wrappers for instantiating errors anyway.
#[derive(Debug)]
pub struct Error {
e: Box<dyn ErrorDetails>,
}
impl Error {
pub fn syntax_error(file: String, line: usize, col: usize, msg: String) -> Error {
pub fn syntax_error(pos: Position, msg: String) -> Error {
Error {
e: Box::new(SyntaxError::new(file, line, col, msg))
e: Box::new(SyntaxError::new(pos, msg)),
}
}
pub fn type_error(file: String, line: usize, col: usize, expected: Type, actual: Type) -> Error {
pub fn type_error(pos: Position, expected: Type, actual: Type) -> Error {
Error {
e: Box::new(TypeError::new(file, line, col, expected, actual))
e: Box::new(TypeError::new(pos, expected, actual)),
}
}
@ -56,13 +56,6 @@ trait ErrorDetails {
fn name(&self) -> &str;
}
#[derive(Debug)]
struct Position {
file: String,
line: usize,
col: usize,
}
#[derive(Debug)]
pub struct SyntaxError {
pos: Position,
@ -72,31 +65,35 @@ pub struct SyntaxError {
#[derive(Debug)]
pub struct TypeError {
pos: Position,
expected: Type,
actual: Type,
msg: String,
}
impl fmt::Display for dyn ErrorDetails {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{} in {}:{}:{}: {}", self.name(), self.file(), self.line(), self.col(), self.msg())
impl fmt::Debug for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let e = &self.e;
write!(
f,
"{} in {}:{}:{}: {}",
e.name(),
e.file(),
e.line(),
e.col(),
e.msg()
)
}
}
impl SyntaxError {
pub fn new(file: String, line: usize, col: usize, msg: String) -> SyntaxError {
SyntaxError {
pos: Position { file, line, col },
msg,
}
pub fn new(pos: Position, msg: String) -> SyntaxError {
SyntaxError { pos, msg }
}
}
impl TypeError {
pub fn new(file: String, line: usize, col: usize, expected: Type, actual: Type) -> TypeError {
pub fn new(pos: Position, expected: Type, actual: Type) -> TypeError {
TypeError {
pos: Position { file, line, col },
expected,
actual,
pos,
msg: format!("Expected type {}, got {} instead", expected, actual),
}
}
}
@ -137,7 +134,7 @@ impl ErrorDetails for TypeError {
}
fn msg(&self) -> &String {
&format!("Expected type {}, got {} instead", self.expected, self.actual)
&self.msg
}
fn name(&self) -> &str {

@ -1,52 +1,41 @@
use std::str::Chars;
/// A cursor for iterating over individual characters in a stream.
/// Supports backwards seeking.
pub struct Cursor<'a> {
stream: Chars<'a>,
offset: usize, // where we pull our characters from
history: Vec<char>, // how many chars we are behind the stream position (when seeking back)
line_lengths: Vec<usize>, // complete list of all characters we've read so far
line: usize, // length of all previous lines (for seeking back)
col: usize, // current line (starting from 1)
pos: usize, // current column in line (starting from 1)
chop: usize,
current: Option<char>,
/// Convenience helper for iterating over the characters in a string.
/// Supports backwards seeking and tracks line/column numbers.
pub struct Cursor {
raw: Vec<char>, // array of all characters
pos: usize, // index (in `raw`) of the *next* character to be read
line_lengths: Vec<usize>, // previous line lengths (for seeking back)
line: usize, // current line (counting from 1)
col: usize, // current column (counting from 1)
chop: usize, // value of `pos` when `chop()` was called the last time
current: Option<char>, // current character
}
impl Iterator for Cursor<'_> {
impl Iterator for Cursor {
type Item = char;
fn next(&mut self) -> Option<char> {
let c = if self.offset > 0 {
let tmp = self.history[self.history.len() - self.offset];
self.offset -= 1;
tmp
} else {
let tmp = self.stream.next()?;
self.history.push(tmp);
tmp
};
if self.pos < self.raw.len() {
let c = self.raw[self.pos];
self.pos += 1;
self.current = Some(c);
if c == '\n' {
self.new_line();
} else {
self.col += 1;
}
if c == '\n' {
self.new_line();
self.current = Some(c);
Some(c)
} else {
self.col += 1;
None
}
self.pos += 1;
Some(c)
}
}
impl<'a> Cursor<'a> {
pub fn new(stream: Chars<'a>) -> Cursor<'a> {
impl Cursor {
pub fn new(raw: String) -> Cursor {
Cursor {
stream,
offset: 0,
history: Vec::new(),
raw: Vec::from_iter(raw.chars()),
line_lengths: Vec::new(),
line: 1,
col: 0, // increments in first call to next()
@ -58,18 +47,20 @@ impl<'a> Cursor<'a> {
/// Reverse the cursor by a single character.
pub fn prev(&mut self) -> Option<char> {
if self.history.len() == 0 {
None
} else {
self.offset += 1;
let c = self.history[self.history.len() - self.offset];
if self.pos > 0 {
self.pos -= 1;
let c = self.raw[self.pos];
if self.col == 0 {
self.prev_line();
} else {
self.col -= 1;
}
self.pos -= 1;
self.current = Some(c);
Some(c)
} else {
None
}
}
@ -103,22 +94,20 @@ impl<'a> Cursor<'a> {
/// Return the next character without actually advancing the cursor.
pub fn peek(&mut self) -> Option<char> {
if self.offset > 0 {
Some(self.history[self.history.len() - self.offset])
} else {
let c = self.next()?;
self.prev();
Some(c)
}
let c = self.next()?;
self.prev();
Some(c)
}
pub fn skip_whitespace(&mut self) {
self.seek_while(|c| c.is_ascii_whitespace());
}
/// Return a string of every character since
/// the last time this method was called.
pub fn chop(&mut self) -> String {
assert!(self.pos >= self.chop);
let s = String::from_iter(self.history[self.chop..self.pos].into_iter());
let s = String::from_iter(self.raw[self.chop..self.pos].into_iter());
self.chop = self.pos;
s
}
@ -149,7 +138,7 @@ impl<'a> Cursor<'a> {
fn prev_line(&mut self) {
assert!(self.line > 0);
assert!(self.col == 0);
assert_eq!(self.col, 0);
self.col = self.line_lengths.pop().unwrap();
self.line -= 1;
}

@ -1,16 +1,14 @@
use std::str::Chars;
mod cursor;
use cursor::Cursor;
pub(crate) mod token;
use token::Token;
use token::{Position, Token};
use crate::error::Error;
pub struct Lexer<'a> {
pub struct Lexer {
file: String,
cursor: Cursor<'a>,
cursor: Cursor,
history: Vec<Token>,
offset: usize,
token_line: usize,
@ -27,6 +25,7 @@ struct KeywordMap {
}
const fn kw(raw: &'static str, kind: token::Kind) -> KeywordMap {
assert!(raw.len() >= 2);
KeywordMap { raw, kind }
}
@ -39,7 +38,7 @@ static KEYWORDS: [KeywordMap; 6] = [
kw("type", token::Kind::TypeKeyword),
];
impl Iterator for Lexer<'_> {
impl Iterator for Lexer {
type Item = Result<Token, Error>;
fn next(&mut self) -> Option<Result<Token, Error>> {
@ -70,7 +69,8 @@ impl Iterator for Lexer<'_> {
'%' => self.token_ok(token::Kind::Percent),
'#' => {
self.read_comment().unwrap(); // this can't fail
// this can't fail
self.read_comment().unwrap();
// we don't need comments for now and they would
// only confuse the parser so let's just Not
self.next()?
@ -90,11 +90,11 @@ impl Iterator for Lexer<'_> {
}
}
impl<'a> Lexer<'a> {
pub fn new(file: String, stream: Chars<'a>) -> Lexer<'a> {
impl Lexer {
pub fn new(filename: String, raw: String) -> Lexer {
Lexer {
file,
cursor: Cursor::new(stream),
file: filename,
cursor: Cursor::new(raw),
history: Vec::new(),
offset: 0,
token_line: 1,
@ -123,12 +123,18 @@ impl<'a> Lexer<'a> {
}
pub fn expect_kind(&mut self, kind: token::Kind) -> Result<Token, Error> {
self.expect_kinds(&[kind])
}
pub fn expect_kinds(&mut self, kinds: &[token::Kind]) -> Result<Token, Error> {
match self.next() {
Some(Ok(t)) => if t.kind == kind {
Ok(t)
} else {
self.syntax_error(format!("Expected {}, got {}", kind, t.kind))
},
Some(Ok(t)) => {
if kinds.contains(&t.kind) {
Ok(t)
} else {
self.syntax_error(format!("Expected one of {:?}, got {}", kinds, t.kind))
}
}
Some(Err(e)) => Err(e),
None => self.syntax_error(String::from("Unexpected EOF")),
}
@ -189,7 +195,7 @@ impl<'a> Lexer<'a> {
}
fn read_prefix_int_literal(&mut self) -> Result<Token, Error> {
assert_eq!(self.cursor.next(), Some('0'));
assert_eq!(self.cursor.current(), Some('0'));
match self.cursor.next() {
Some('x') => self.read_int_literal(16),
Some('o') => self.read_int_literal(8),
@ -215,8 +221,11 @@ impl<'a> Lexer<'a> {
fn token(&mut self, kind: token::Kind, raw: String) -> Token {
let t = Token {
kind,
line: self.token_line,
col: self.token_col,
pos: Position {
file: self.file.clone(),
line: self.token_line,
col: self.token_col,
},
raw,
};
self.token_line = self.cursor.line();
@ -248,10 +257,12 @@ impl<'a> Lexer<'a> {
fn syntax_error<T>(&mut self, msg: String) -> Result<T, Error> {
Err(Error::syntax_error(
self.file.clone(),
self.cursor.line(),
self.cursor.col(),
msg
Position {
file: self.file.clone(),
line: self.cursor.line(),
col: self.cursor.col(),
},
msg,
))
}
}

@ -4,14 +4,18 @@ use std::fmt;
#[derive(Debug, Clone)]
pub struct Token {
pub kind: Kind,
/// line of the first character (starting from 1)
pub line: usize,
/// column of the first character (starting from 1)
pub col: usize,
pub pos: Position,
/// raw text
pub raw: String,
}
#[derive(Debug, Clone)]
pub struct Position {
pub file: String,
pub line: usize,
pub col: usize,
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}: \"{}\"", self.kind, self.raw)

@ -7,7 +7,7 @@ mod error;
fn main() {
let s = fs::read_to_string("test.gaybuild").unwrap();
let lexer = Lexer::new(String::from("test.gaybuild"), s.chars());
let lexer = Lexer::new(String::from("test.gaybuild"), s);
for token in lexer {
println!("{}", token.unwrap());
}

Loading…
Cancel
Save