ast: add expression support

This adds the basic set of expressions:
assignments, binary/unary expressions, function
calls, and array accesses.  Binary operator
precedence is also taken into account.  What's
still missing are parenthesized expressions, but
those should be easy to do.
main
anna 2 years ago
parent f71684bc38
commit e33c2022c5
Signed by: fef
GPG Key ID: EC22E476DC2D3D84

@ -7,7 +7,9 @@ use crate::lex::Lexer;
use std::fs; use std::fs;
use std::io; use std::io;
use crate::ast::tree::Operator;
#[derive(PartialEq)]
enum Scope { enum Scope {
File, File,
Target, Target,
@ -41,40 +43,48 @@ impl Parser {
let mut nodes = Vec::new(); let mut nodes = Vec::new();
self.scope.push(Scope::File); self.scope.push(Scope::File);
while let Some(result) = self.lexer.next() { while self.lexer.peek().is_some() {
let token = result?; nodes.push(self.parse_stmt()?);
let node = match token.kind {
token::Kind::TargetKeyword => self.parse_target(),
token::Kind::SetKeyword => self.parse_set_expr(),
_ => self.syntax_error(format!("Unexpected token {}", token.kind), &token),
}?;
nodes.push(node);
} }
self.scope.pop();
Ok(tree::Node::File { Ok(tree::Node::File {
name: self.filename.clone(), name: self.filename.clone(),
content: nodes, content: nodes,
}) })
} }
fn parse_target(&mut self) -> Result<tree::Node, Error> { fn parse_stmt(&mut self) -> Result<tree::Node, Error> {
let token = self.lexer.peek_or_err()?;
match token.kind {
token::Kind::DependKeyword => self.parse_depend_stmt(),
token::Kind::SetKeyword => self.parse_set_stmt(),
token::Kind::SourceKeyword => self.parse_source_stmt(),
token::Kind::TargetKeyword => self.parse_target_stmt(),
token::Kind::TypeKeyword => self.parse_type_stmt(),
token::Kind::Ident => self.parse_expr_stmt(),
_ => self.syntax_error(format!("Unexpected token {}", token), &token),
}
}
fn parse_target_stmt(&mut self) -> Result<tree::Node, Error> {
self.assert_scope(Scope::File)?;
self.assert_scope_not(Scope::Target)?;
self.scope.push(Scope::Target); self.scope.push(Scope::Target);
self.lexer.expect_kind(token::Kind::TargetKeyword)?;
let name_token = self.lexer.expect_kind(token::Kind::Ident)?; let name_token = self.lexer.expect_kind(token::Kind::Ident)?;
self.lexer.expect_kind(token::Kind::OBrace)?; self.lexer.expect_kind(token::Kind::OBrace)?;
let mut children = Vec::new(); let mut children = Vec::new();
while let Some(result) = self.lexer.next() { while let Some(result) = self.lexer.peek() {
let token = result?; match result?.kind {
match token.kind { token::Kind::CBrace => {
token::Kind::DependKeyword => children.push(self.parse_depend_expr()?), self.lexer.next();
token::Kind::TypeKeyword => children.push(self.parse_type_expr()?), break;
token::Kind::SourceKeyword => children.push(self.parse_source_expr()?),
token::Kind::SetKeyword => children.push(self.parse_set_expr()?),
token::Kind::CBrace => break,
_ => {
return self.syntax_error(format!("Unexpected token \"{}\"", token.raw), &token)
} }
_ => children.push(self.parse_stmt()?),
} }
} }
@ -85,32 +95,205 @@ impl Parser {
}) })
} }
fn parse_depend_expr(&mut self) -> Result<tree::Node, Error> { fn parse_depend_stmt(&mut self) -> Result<tree::Node, Error> {
self.assert_scope(Scope::Target)?;
self.scope.push(Scope::DepList); self.scope.push(Scope::DepList);
let rvalue = self.parse_rvalue(&[token::Kind::Semi])?; self.lexer.expect_kind(token::Kind::DependKeyword)?;
let rvalue = self.parse_expr(&[token::Kind::Semi])?;
self.scope.pop(); self.scope.pop();
Ok(tree::Node::DepList(Box::new(rvalue))) Ok(tree::Node::DepList(Box::new(rvalue)))
} }
fn parse_set_expr(&mut self) -> Result<tree::Node, Error> { fn parse_set_stmt(&mut self) -> Result<tree::Node, Error> {
let lvalue = self.parse_lvalue(&[token::Kind::Eq])?; self.assert_scope(Scope::File)?;
let rvalue = self.parse_rvalue(&[token::Kind::Semi])?; self.lexer.expect_kind(token::Kind::SetKeyword)?;
Ok(tree::Node::SetExpr { let expr = self.parse_expr(&[token::Kind::Semi])?;
name: Box::new(lvalue), match expr {
val: Box::new(rvalue), tree::Node::BinaryExpr { op, lhs, rhs } => {
}) if op == Operator::Eq {
Ok(tree::Node::SetExpr {
name: lhs,
val: rhs,
})
} else {
self.syntax_error(format!("Invalid operator"), self.lexer.current().unwrap())
}
}
_ => self.syntax_error(format!("Expected an assignment"), self.lexer.current().unwrap())
}
} }
fn parse_type_expr(&mut self) -> Result<tree::Node, Error> { fn parse_type_stmt(&mut self) -> Result<tree::Node, Error> {
let rvalue = self.parse_rvalue(&[token::Kind::Semi])?; self.assert_scope(Scope::Target)?;
Ok(tree::Node::TypeExpr(Box::new(rvalue))) self.lexer.expect_kind(token::Kind::TypeKeyword)?;
let expr = self.parse_expr(&[token::Kind::Semi])?;
Ok(tree::Node::TypeExpr(Box::new(expr)))
} }
fn parse_source_expr(&mut self) -> Result<tree::Node, Error> { fn parse_source_stmt(&mut self) -> Result<tree::Node, Error> {
self.assert_scope(Scope::Target)?;
self.lexer.expect_kind(token::Kind::SourceKeyword)?;
self.scope.push(Scope::SourceList); self.scope.push(Scope::SourceList);
let rvalue = self.parse_rvalue(&[token::Kind::Semi])?; let source = self.parse_expr(&[token::Kind::Semi])?;
self.scope.pop(); self.scope.pop();
Ok(tree::Node::SourceList(Box::new(rvalue))) Ok(tree::Node::SourceList(Box::new(source)))
}
fn parse_expr_stmt(&mut self) -> Result<tree::Node, Error> {
self.parse_expr(&[token::Kind::Semi])
}
fn parse_expr(&mut self, terminators: &[token::Kind]) -> Result<tree::Node, Error> {
self.assert_scope(Scope::File)?;
let expr = if let Some(result) = self.lexer.peek() {
let token = result?;
if !token.kind.is_start_of_expr() {
self.syntax_error(String::from("Expected an expression"), &token)
} else {
self.parse_assignment_expr_or_higher(terminators)
}
} else {
self.syntax_error(
String::from("Unexpected EOF"),
&self.lexer.current().unwrap(),
)
};
expr
}
fn parse_assignment_expr_or_higher(&mut self, terminators: &[token::Kind]) -> Result<tree::Node, Error> {
let lhs = self.parse_primary_expr()?;
if let Some(Ok(token)) = self.lexer.peek() {
if token.kind.is_assignment_op() {
let op_token = self.lexer.require_next()?;
let op = Operator::from_token(&op_token)?;
let rhs = self.parse_binary_expr_or_higher(terminators)?;
return Ok(tree::Node::BinaryExpr {
op,
lhs: Box::new(lhs),
rhs: Box::new(rhs),
});
} else {
self.lexer.expect_kinds(terminators)?;
}
}
Ok(lhs)
}
/// Binary expressions are generally left associative.
/// However, things get a little more tricky when taking the fact that there
/// are 9 different levels of precedence into account.
fn parse_binary_expr_or_higher(&mut self, terminators: &[token::Kind]) -> Result<tree::Node, Error> {
let mut expr = self.parse_unary_expr_or_higher()?;
while let Some(Ok(token)) = self.lexer.peek() {
if terminators.contains(&token.kind) {
self.lexer.next();
break;
}
let op = Operator::from_token(&token)?;
self.lexer.next();
let precedence = token.kind.binary_op_precedence().unwrap();
expr = tree::Node::BinaryExpr {
op,
lhs: Box::new(expr),
rhs: Box::new(self.parse_partial_binary_expr(precedence, terminators)?),
};
}
Ok(expr)
}
fn parse_partial_binary_expr(&mut self, precedence: u32, terminators: &[token::Kind]) -> Result<tree::Node, Error> {
let mut lhs = self.parse_unary_expr_or_higher()?;
while let Some(Ok(token)) = self.lexer.peek() {
if let Some(new_precedence) = token.kind.binary_op_precedence() {
if new_precedence > precedence {
let op = Operator::from_token(&token)?;
self.lexer.next();
lhs = tree::Node::BinaryExpr {
op,
lhs: Box::new(lhs),
rhs: Box::new(self.parse_partial_binary_expr(new_precedence, terminators)?),
};
} else {
break;
}
} else {
break;
}
}
Ok(lhs)
}
fn parse_unary_expr_or_higher(&mut self) -> Result<tree::Node, Error> {
if let Some(result) = self.lexer.peek() {
let token = result?;
if token.kind == token::Kind::Bang || token.kind == token::Kind::Minus {
self.lexer.next(); // consume unary operator token
let op = Operator::from_token(&token)?;
let expr = self.parse_primary_expr()?;
return Ok(tree::Node::UnaryExpr {
op,
node: Box::new(expr),
});
}
}
self.parse_primary_expr()
}
fn parse_primary_expr(&mut self) -> Result<tree::Node, Error> {
let token = self.lexer.require_next()?;
match token.kind {
token::Kind::Ident => {
let ident = tree::Node::Ident(String::from(token.raw));
self.parse_primary_expr_rest(ident)
}
token::Kind::IntLiteral => {
let raw = token.raw;
let num = match raw.chars().nth(1) {
Some('x') => i128::from_str_radix(&raw[2..], 16).unwrap(),
Some('o') => i128::from_str_radix(&raw[2..], 8).unwrap(),
Some('b') => i128::from_str_radix(&raw[2..], 2).unwrap(),
_ => raw.parse().unwrap(),
};
Ok(tree::Node::Int(num))
}
token::Kind::StringLiteral => Ok(tree::Node::String(token.raw)),
token::Kind::OBracket => self.parse_array(),
_ => self.syntax_error(format!("Unexpected token {}", token.kind), &token),
}
}
fn parse_primary_expr_rest(&mut self, start: tree::Node) -> Result<tree::Node, Error> {
if let Some(Ok(token)) = self.lexer.peek() {
match token.kind {
token::Kind::OParen => {
// function call
self.lexer.next();
let params = self.parse_param_list()?;
Ok(tree::Node::CallExpr {
func: Box::new(start),
params,
})
}
token::Kind::OBracket => {
// array index
self.lexer.next();
let index = self.parse_expr(&[token::Kind::CBracket])?;
Ok(tree::Node::ArrayExpr {
array: Box::new(start),
index: Box::new(index),
})
}
_ => Ok(start),
}
} else {
Ok(start)
}
} }
fn parse_array(&mut self) -> Result<tree::Node, Error> { fn parse_array(&mut self) -> Result<tree::Node, Error> {
@ -120,31 +303,53 @@ impl Parser {
self.lexer.next(); self.lexer.next();
break; break;
} else { } else {
elements.push(self.parse_rvalue(&[token::Kind::Comma, token::Kind::CBracket])?); elements.push(self.parse_expr(&[token::Kind::Comma, token::Kind::CBracket])?);
} }
} }
Ok(tree::Node::Array(elements)) Ok(tree::Node::Array(elements))
} }
fn parse_lvalue(&mut self, terminators: &[token::Kind]) -> Result<tree::Node, Error> { fn parse_param_list(&mut self) -> Result<Vec<tree::Node>, Error> {
// this will be expanded when we have array accesses and similar stuff let mut params = Vec::new();
let ident_token = self.lexer.expect_kind(token::Kind::Ident)?; while let Some(result) = self.lexer.peek() {
self.lexer.expect_kinds(terminators)?; match result?.kind {
Ok(tree::Node::Ident(ident_token.raw)) token::Kind::CParen => {
self.lexer.next();
break;
},
_ => {
params.push(self.parse_expr(&[token::Kind::Comma, token::Kind::CParen])?);
if self.lexer.current().unwrap().kind == token::Kind::CParen {
break;
}
},
}
}
Ok(params)
}
fn assert_scope(&self, scope: Scope) -> Result<(), Error> {
if self.scope.contains(&scope) {
Ok(())
} else {
let token = self.lexer.current().unwrap();
self.syntax_error(
format!("Token {} cannot be used in this context", token),
token,
)
}
} }
fn parse_rvalue(&mut self, terminators: &[token::Kind]) -> Result<tree::Node, Error> { fn assert_scope_not(&self, scope: Scope) -> Result<(), Error> {
// this will also be expanded to support more complex expressions, naturally if self.scope.contains(&scope) {
let token = self.lexer.require_next()?; let token = self.lexer.current().unwrap();
let node = match token.kind { self.syntax_error(
token::Kind::Ident => Ok(tree::Node::Ident(token.raw)), format!("Token {} cannot be used in this context", token),
token::Kind::IntLiteral => Ok(tree::Node::Int(token.raw.parse().unwrap())), token,
token::Kind::StringLiteral => Ok(tree::Node::String(token.raw)), )
token::Kind::OBracket => self.parse_array(), } else {
k => self.syntax_error(format!("Unexpected {}", k), &token), Ok(())
}?; }
self.lexer.expect_kinds(terminators)?;
Ok(node)
} }
fn syntax_error<T>(&self, msg: String, token: &Token) -> Result<T, Error> { fn syntax_error<T>(&self, msg: String, token: &Token) -> Result<T, Error> {

@ -14,6 +14,16 @@ pub enum Node {
Int(i128), Int(i128),
String(String), String(String),
Array(Vec<Node>), Array(Vec<Node>),
ArrayExpr {
// array access
array: Box<Node>,
index: Box<Node>,
},
CallExpr {
// function call
func: Box<Node>,
params: Vec<Node>,
},
UnaryExpr { UnaryExpr {
op: Operator, op: Operator,
node: Box<Node>, node: Box<Node>,
@ -74,56 +84,6 @@ pub enum Operator {
CaretEq, CaretEq,
} }
impl Operator {
pub const fn precedence(&self) -> u8 {
match self {
Operator::Eq => 2,
Operator::PlusEq => 2,
Operator::MinusEq => 2,
Operator::AsteriskEq => 2,
Operator::AsteriskAsteriskEq => 2,
Operator::SlashEq => 2,
Operator::PercentEq => 2,
Operator::AmpEq => 2,
Operator::PipeEq => 2,
Operator::CaretEq => 2,
Operator::GtGtEq => 2,
Operator::LtLtEq => 2,
Operator::PipePipe => 3,
Operator::AmpAmp => 4,
Operator::EqEq => 5,
Operator::BangEq => 5,
Operator::Lt => 5,
Operator::LtEq => 5,
Operator::Gt => 5,
Operator::GtEq => 5,
Operator::Pipe => 6,
Operator::Caret => 7,
Operator::Amp => 8,
Operator::GtGt => 9,
Operator::LtLt => 9,
Operator::Plus => 10,
Operator::Minus => 10,
Operator::Asterisk => 11,
Operator::Slash => 11,
Operator::Percent => 11,
Operator::AsteriskAsterisk => 12,
Operator::Bang => 13,
}
}
}
impl Node { impl Node {
pub fn walk(&self, cb: fn(node: &Node, depth: u32)) { pub fn walk(&self, cb: fn(node: &Node, depth: u32)) {
self.visit(cb, 0); self.visit(cb, 0);
@ -140,6 +100,16 @@ impl Node {
node.visit(cb, depth); node.visit(cb, depth);
} }
} }
Node::ArrayExpr { array, index } => {
array.visit(cb, depth);
index.visit(cb, depth);
}
Node::CallExpr { func, params } => {
func.visit(cb, depth);
for p in params {
p.visit(cb, depth);
}
}
Node::UnaryExpr { op, node } => node.visit(cb, depth), Node::UnaryExpr { op, node } => node.visit(cb, depth),
Node::BinaryExpr { op, lhs, rhs } => { Node::BinaryExpr { op, lhs, rhs } => {
lhs.visit(cb, depth); lhs.visit(cb, depth);
@ -182,6 +152,8 @@ impl fmt::Display for Node {
Node::DepList(_) => "depend", Node::DepList(_) => "depend",
Node::SourceList(_) => "source", Node::SourceList(_) => "source",
Node::Array(_) => "<array>", Node::Array(_) => "<array>",
Node::ArrayExpr { array, index } => "<array-access>",
Node::CallExpr { func, params } => "<call>",
Node::UnaryExpr { op, node } => op.raw(), Node::UnaryExpr { op, node } => op.raw(),
Node::BinaryExpr { op, lhs, rhs } => op.raw(), Node::BinaryExpr { op, lhs, rhs } => op.raw(),
Node::TypeExpr(_) => "type", Node::TypeExpr(_) => "type",

@ -1,4 +1,4 @@
use std::{fmt, io}; use std::fmt;
use crate::ast::tree::Type; use crate::ast::tree::Type;
use crate::lex::token::Position; use crate::lex::token::Position;

@ -146,6 +146,12 @@ impl Lexer {
Some(t) Some(t)
} }
pub fn peek_or_err(&mut self) -> Result<Token, Error> {
let token = self.require_next()?;
self.prev();
Ok(token)
}
pub fn prev(&mut self) -> Option<&Token> { pub fn prev(&mut self) -> Option<&Token> {
if self.offset < self.history.len() - 1 { if self.offset < self.history.len() - 1 {
self.offset += 1; self.offset += 1;

@ -153,7 +153,7 @@ impl Kind {
Kind::Slash => Some(11), Kind::Slash => Some(11),
Kind::Percent => Some(11), Kind::Percent => Some(11),
Kind::AsteriskAsterisk => Some(12), Kind::AsteriskAsterisk => Some(12),
_ => None _ => None,
} }
} }
} }

@ -9,6 +9,9 @@ set BUILD_PREFIX = "build";
# a target is a single component. # a target is a single component.
# targets can depend on other targets. # targets can depend on other targets.
target kern { target kern {
owo = 1 + 2 * 3 * 4 - 5;
# the type keyword defines whether the target is supposed to be # the type keyword defines whether the target is supposed to be
# compiled into an executable binary (exe) or a library (lib). # compiled into an executable binary (exe) or a library (lib).
type exe; type exe;
@ -23,6 +26,7 @@ target kern {
} }
target libk { target libk {
print("hello, world");
type lib; type lib;
depend arch; depend arch;
source "libk/lib.rs"; source "libk/lib.rs";

Loading…
Cancel
Save