lex: fix incorrect cursor position tracking
It works. Finally. This also fixes a stupid off-by-one error that made the first token disappear.
This commit is contained in:
parent
ec8c8916b7
commit
de42513728
2 changed files with 45 additions and 43 deletions
|
@ -2,33 +2,36 @@
|
|||
/// Supports backwards seeking and tracks line/column numbers.
|
||||
pub struct Cursor {
|
||||
raw: Vec<char>, // array of all characters
|
||||
pos: usize, // index (in `raw`) of the *next* character to be read
|
||||
pos: isize, // index (in `raw`) of the *current* character
|
||||
line_lengths: Vec<usize>, // previous line lengths (for seeking back)
|
||||
line: usize, // current line (counting from 1)
|
||||
col: usize, // current column (counting from 1)
|
||||
chop: usize, // value of `pos` when `chop()` was called the last time
|
||||
current: Option<char>, // current character
|
||||
chop: usize, // position of first character to include in next `chop()`
|
||||
current: Option<char>, // current character (last value returned by `next()`)
|
||||
}
|
||||
|
||||
impl Iterator for Cursor {
|
||||
type Item = char;
|
||||
|
||||
fn next(&mut self) -> Option<char> {
|
||||
if self.pos < self.raw.len() {
|
||||
let c = self.raw[self.pos];
|
||||
self.pos += 1;
|
||||
|
||||
if self.pos < self.raw.len() as isize {
|
||||
if self.current == Some('\n') {
|
||||
self.new_line();
|
||||
self.line_lengths.push(self.col);
|
||||
self.line += 1;
|
||||
self.col = 1;
|
||||
} else {
|
||||
self.col += 1;
|
||||
}
|
||||
|
||||
self.current = Some(c);
|
||||
Some(c)
|
||||
} else {
|
||||
None
|
||||
self.pos += 1;
|
||||
if self.pos < self.raw.len() as isize {
|
||||
self.current = Some(self.raw[self.pos as usize]);
|
||||
} else {
|
||||
self.current = None;
|
||||
}
|
||||
}
|
||||
|
||||
self.current
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -38,8 +41,8 @@ impl Cursor {
|
|||
raw: Vec::from_iter(raw.chars()),
|
||||
line_lengths: Vec::new(),
|
||||
line: 1,
|
||||
col: 0, // increments in first call to next()
|
||||
pos: 0,
|
||||
col: 0,
|
||||
pos: -1,
|
||||
chop: 0,
|
||||
current: None,
|
||||
}
|
||||
|
@ -49,18 +52,19 @@ impl Cursor {
|
|||
pub fn prev(&mut self) -> Option<char> {
|
||||
if self.pos > 0 {
|
||||
self.pos -= 1;
|
||||
let c = self.raw[self.pos];
|
||||
self.current = Some(self.raw[self.pos as usize]);
|
||||
|
||||
self.col -= 1;
|
||||
if self.col == 0 {
|
||||
self.prev_line();
|
||||
self.line -= 1;
|
||||
self.col = self.line_lengths.pop().unwrap();
|
||||
}
|
||||
|
||||
self.current = Some(c);
|
||||
Some(c)
|
||||
} else {
|
||||
None
|
||||
} else if self.pos == 0 {
|
||||
self.pos = -1;
|
||||
self.current = None;
|
||||
}
|
||||
|
||||
self.current
|
||||
}
|
||||
|
||||
/// Seek backward and return all characters that were encountered.
|
||||
|
@ -80,11 +84,11 @@ impl Cursor {
|
|||
/// Seek forward until the `test` callback returns false.
|
||||
pub fn seek_while(&mut self, test: fn(c: char) -> bool) -> Vec<char> {
|
||||
let mut v = Vec::new();
|
||||
while let Some(c) = self.peek() {
|
||||
while let Some(c) = self.next() {
|
||||
if test(c) {
|
||||
v.push(c);
|
||||
self.next();
|
||||
} else {
|
||||
self.prev();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -105,10 +109,15 @@ impl Cursor {
|
|||
/// Return a string of every character since
|
||||
/// the last time this method was called.
|
||||
pub fn chop(&mut self) -> String {
|
||||
assert!(self.pos >= self.chop);
|
||||
let s = String::from_iter(self.raw[self.chop..self.pos].into_iter());
|
||||
self.chop = self.pos;
|
||||
s
|
||||
let first = self.chop;
|
||||
let last = if self.pos == self.raw.len() as isize {
|
||||
self.pos - 1
|
||||
} else {
|
||||
self.pos
|
||||
} as usize;
|
||||
assert!(first <= last);
|
||||
self.chop = last + 1;
|
||||
String::from_iter(self.raw[first..=last].into_iter())
|
||||
}
|
||||
|
||||
/// Return the line number (starting from 1) of the last
|
||||
|
@ -128,17 +137,4 @@ impl Cursor {
|
|||
pub fn current(&self) -> Option<char> {
|
||||
self.current
|
||||
}
|
||||
|
||||
fn new_line(&mut self) {
|
||||
self.line_lengths.push(self.col);
|
||||
self.col = 1;
|
||||
self.line += 1;
|
||||
}
|
||||
|
||||
fn prev_line(&mut self) {
|
||||
assert!(self.line > 0);
|
||||
assert_eq!(self.col, 0);
|
||||
self.col = self.line_lengths.pop().unwrap();
|
||||
self.line -= 1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,8 +68,11 @@ impl Iterator for Lexer {
|
|||
c if c.is_ascii_whitespace() => {
|
||||
self.cursor.skip_whitespace();
|
||||
self.cursor.chop();
|
||||
self.token_line = self.cursor.line();
|
||||
self.token_col = self.cursor.col();
|
||||
if self.cursor.next().is_some() {
|
||||
self.token_line = self.cursor.line();
|
||||
self.token_col = self.cursor.col();
|
||||
self.cursor.prev();
|
||||
}
|
||||
self.next()?
|
||||
}
|
||||
',' => self.token_ok(token::Kind::Comma),
|
||||
|
@ -180,11 +183,14 @@ impl Lexer {
|
|||
}
|
||||
|
||||
pub fn prev(&mut self) -> Option<&Token> {
|
||||
if self.offset < self.history.len() - 1 {
|
||||
if self.offset + 1 < self.history.len() {
|
||||
self.offset += 1;
|
||||
let prev = &self.history[self.history.len() - self.offset];
|
||||
Some(prev)
|
||||
} else {
|
||||
if self.history.len() == 1 {
|
||||
self.offset = 1;
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue