You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

728 lines
26 KiB
Rust

pub(crate) mod tree;
use crate::error::Error;
use crate::lex::token;
use crate::lex::token::Token;
use crate::lex::Lexer;
use crate::ast::tree::{File, Operator};
use std::fs;
use std::io;
#[derive(PartialEq)]
enum Scope {
File,
Target,
DepList,
SourceList,
Function,
Loop,
}
struct Parser {
lexer: Lexer,
scope: Vec<Scope>,
filename: String,
}
pub fn parse(filename: String) -> io::Result<Result<tree::Node, Error>> {
let raw: String = fs::read_to_string(filename.clone())?;
let mut p = Parser::new(filename, raw);
Ok(p.parse_file())
}
/// All of the functions expect the leading token to not be consumed yet,
/// meaning you need to use `self.lexer.peek()` when determining what other
/// parsing function to call next. They consume every token up to and
/// *including* the terminating one (like a semicolon or closing brace).
///
/// Note: For now, the production rules for grammatical elements are more
/// like "freestyle guidelines" to help people understand the code.
/// In their final form, they will most likely be much more restrictive.
impl Parser {
pub fn new(filename: String, raw: String) -> Parser {
let lexer = Lexer::new(filename.clone(), raw);
Parser {
lexer,
scope: Vec::new(),
filename,
}
}
/// ```notrust
/// File
/// : Statement [ File ]
/// ```
pub fn parse_file(&mut self) -> Result<tree::Node, Error> {
let mut nodes = Vec::new();
self.scope.push(Scope::File);
while self.lexer.peek().is_some() {
nodes.push(self.parse_stmt()?);
}
self.scope.pop();
Ok(tree::Node::File(
token::Position {
file: self.filename.clone(),
line: 0,
col: 0,
},
File {
name: self.filename.clone(),
content: nodes,
},
))
}
/// ```notrust
/// Statement
/// : DependStatement
/// | SetStatement
/// | SourceStatement
/// | TargetStatement
/// | TypeStatement
/// | ExpressionStatement
/// ```
fn parse_stmt(&mut self) -> Result<tree::Node, Error> {
let token = self.lexer.peek_or_err()?;
match token.kind {
token::Kind::BreakKeyword => self.parse_break_stmt(),
token::Kind::DependKeyword => self.parse_depend_stmt(),
token::Kind::FnKeyword => self.parse_fn(false),
token::Kind::ForKeyword => self.parse_for_stmt(),
token::Kind::IfKeyword => self.parse_if_stmt(),
token::Kind::ReturnKeyword => self.parse_return_stmt(),
token::Kind::SetKeyword => self.parse_set_stmt(),
token::Kind::SourceKeyword => self.parse_source_stmt(),
token::Kind::TargetKeyword => self.parse_target_stmt(),
token::Kind::TypeKeyword => self.parse_type_stmt(),
token::Kind::WhileKeyword => self.parse_while_stmt(),
k if k.is_start_of_expr() => self.parse_expr_stmt(),
_ => self.syntax_error(format!("Unexpected token {}", token), &token),
}
}
/// ```notrust
/// BreakStatement
/// : "break" ";"
/// ```
fn parse_break_stmt(&mut self) -> Result<tree::Node, Error> {
let break_keyword = self.lexer.expect_kind(token::Kind::BreakKeyword)?;
self.assert_scope(Scope::Loop)?;
self.lexer.expect_kind(token::Kind::Semi)?;
Ok(tree::Node::Break(break_keyword.pos))
}
/// ```notrust
/// BlockStatement
/// : "{" [ StatementList ] "}"
///
/// StatementList
/// : Statement [ StatementList ]
/// ```
fn parse_block_stmt(&mut self) -> Result<tree::Node, Error> {
let mut nodes = Vec::new();
let obrace = self.lexer.expect_kind(token::Kind::OBrace)?;
while let Some(result) = self.lexer.peek() {
match result?.kind {
token::Kind::CBrace => {
self.lexer.next();
break;
}
_ => nodes.push(self.parse_stmt()?),
}
}
Ok(tree::Node::Block(obrace.pos, nodes))
}
/// ```notrust
/// ReturnStatement
/// : "return" Expression ";"
/// ```
fn parse_return_stmt(&mut self) -> Result<tree::Node, Error> {
self.assert_scope(Scope::Function)?;
let return_keyword = self.lexer.expect_kind(token::Kind::ReturnKeyword)?;
let expr = self.parse_expr(&[token::Kind::Semi])?;
Ok(tree::Node::ReturnStmt(return_keyword.pos, Box::new(expr)))
}
/// ```notrust
/// TargetStatement
/// : "target" Expression BlockStatement
/// ```
fn parse_target_stmt(&mut self) -> Result<tree::Node, Error> {
self.assert_scope(Scope::File)?;
self.assert_scope_not(Scope::Target)?;
self.scope.push(Scope::Target);
let target_keyword = self.lexer.expect_kind(token::Kind::TargetKeyword)?;
let name_token = self.lexer.expect_kind(token::Kind::Ident)?;
let name = tree::Node::Ident(name_token.pos, name_token.raw);
let children = self.parse_block_stmt()?;
self.scope.pop();
Ok(tree::Node::make_target_stmt(
target_keyword.pos,
name,
children,
))
}
/// ```notrust
/// DependStatement
/// : "depend" Expression ";"
/// ```
fn parse_depend_stmt(&mut self) -> Result<tree::Node, Error> {
self.assert_scope(Scope::Target)?;
self.scope.push(Scope::DepList);
let depend_keyword = self.lexer.expect_kind(token::Kind::DependKeyword)?;
let rvalue = self.parse_expr(&[token::Kind::Semi])?;
self.scope.pop();
Ok(tree::Node::DepList(depend_keyword.pos, Box::new(rvalue)))
}
/// ```notrust
/// IfStatement
/// : "if" "(" Expression ")" BlockStatement [ "else" BlockStatement ]
/// ```
fn parse_if_stmt(&mut self) -> Result<tree::Node, Error> {
let if_keyword = self.lexer.expect_kind(token::Kind::IfKeyword)?;
self.lexer.expect_kind(token::Kind::OParen)?;
let condition = self.parse_expr(&[token::Kind::CParen])?;
let then_block = self.parse_block_stmt()?;
let else_block = if self.lexer.next_if(token::Kind::ElseKeyword).is_some() {
Some(self.parse_block_stmt()?)
} else {
None
};
Ok(tree::Node::make_if_stmt(
if_keyword.pos,
condition,
then_block,
else_block,
))
}
/// ```notrust
/// WhileStatement
/// : "while" "(" Expression ")" BlockStatement
/// ```
fn parse_while_stmt(&mut self) -> Result<tree::Node, Error> {
let while_keyword = self.lexer.expect_kind(token::Kind::WhileKeyword)?;
self.scope.push(Scope::Loop);
self.lexer.expect_kind(token::Kind::OParen)?;
let condition = self.parse_expr(&[token::Kind::CParen])?;
let body = self.parse_block_stmt()?;
self.scope.pop();
Ok(tree::Node::make_while_stmt(
while_keyword.pos,
condition,
body,
))
}
/// ```notrust
/// ForStatement
/// : "for" "(" [ Expression ] ";" [ Expression ] ";" [ Expression ] ")" BlockStatement
/// ```
fn parse_for_stmt(&mut self) -> Result<tree::Node, Error> {
let for_keyword = self.lexer.expect_kind(token::Kind::ForKeyword)?;
self.scope.push(Scope::Loop);
self.lexer.expect_kind(token::Kind::OParen)?;
let terminators = [token::Kind::Semi, token::Kind::Semi, token::Kind::CParen];
let mut exprs = Vec::new();
for i in 0..3 {
exprs.push(if self.lexer.next_if(terminators[i]).is_some() {
None
} else {
Some(self.parse_expr(&terminators[i..=i])?)
});
}
let body = self.parse_block_stmt()?;
self.scope.pop();
let exec = exprs.pop().unwrap();
let condition = exprs.pop().unwrap();
let setup = exprs.pop().unwrap();
Ok(tree::Node::make_for_stmt(
for_keyword.pos,
setup,
condition,
exec,
body,
))
}
/// ```notrust
/// SetStatement
/// : "set" AssignmentExpression ";"
/// ```
fn parse_set_stmt(&mut self) -> Result<tree::Node, Error> {
self.assert_scope(Scope::File)?;
let set_keyword = self.lexer.expect_kind(token::Kind::SetKeyword)?;
let expr = self.parse_expr(&[token::Kind::Semi])?;
match expr {
tree::Node::BinaryExpr(_, expr) => {
if expr.op == Operator::Eq {
Ok(tree::Node::make_set_stmt(
set_keyword.pos,
*expr.lhs,
*expr.rhs,
))
} else {
self.syntax_error(format!("Invalid operator"), self.lexer.current().unwrap())
}
}
_ => self.syntax_error(
format!("Expected an assignment"),
self.lexer.current().unwrap(),
),
}
}
/// ```notrust
/// TypeStatement
/// : "type" Expression ";"
/// ```
fn parse_type_stmt(&mut self) -> Result<tree::Node, Error> {
let type_keyword = self.lexer.expect_kind(token::Kind::TypeKeyword)?;
self.assert_scope(Scope::Target)?;
let expr = self.parse_expr(&[token::Kind::Semi])?;
Ok(tree::Node::TypeStmt(type_keyword.pos, Box::new(expr)))
}
/// ```notrust
/// SourceStatement
/// : "source" Expression ";"
/// ```
fn parse_source_stmt(&mut self) -> Result<tree::Node, Error> {
let source_keyword = self.lexer.expect_kind(token::Kind::SourceKeyword)?;
self.assert_scope(Scope::Target)?;
self.scope.push(Scope::SourceList);
let source = self.parse_expr(&[token::Kind::Semi])?;
self.scope.pop();
Ok(tree::Node::SourceList(source_keyword.pos, Box::new(source)))
}
/// ```notrust
/// ExpressionStatement
/// : Expression ";"
/// ```
fn parse_expr_stmt(&mut self) -> Result<tree::Node, Error> {
self.parse_expr(&[token::Kind::Semi])
}
/// ```notrust
/// Expression
/// : AssignmentExpression
/// | BinaryExpression
/// | UnaryExpression
/// | PrimaryExpression
/// ```
fn parse_expr(&mut self, terminators: &[token::Kind]) -> Result<tree::Node, Error> {
self.assert_scope(Scope::File)?;
let expr = if let Some(result) = self.lexer.peek() {
let token = result?;
if !token.kind.is_start_of_expr() {
self.syntax_error(String::from("Expected an expression"), &token)
} else {
self.parse_assignment_expr_or_higher(terminators)
}
} else {
self.syntax_error(
String::from("Unexpected EOF"),
&self.lexer.current().unwrap(),
)
};
expr
}
/// Parse an assignment expression.
/// This is no different to parsing any other binary expression, with the
/// difference being that assignment operators are right associative.
/// Therefore, we need a separate function for this. Other than that,
/// this method is no different to `parse_binary_expr_or_higher()` (it even
/// returns the same kind of tree node).
///
/// ```notrust
/// AssignmentExpression
/// : PrimaryExpression AssignmentOperator Expression
///
/// AssignmentOperator
/// : "=" | "+=" | "-=" | "*=" | "/=" | "%="
/// | "&=" | "|=" | "^=" | ">>=" | "<<="
/// ```
fn parse_assignment_expr_or_higher(
&mut self,
terminators: &[token::Kind],
) -> Result<tree::Node, Error> {
// we speculate on this being an assignment expression, so we need to
// be able to undo our work in case this speculation doesn't hold true
// so parse_binary_expr_or_higher() can do its thing
let bookmark = self.lexer.save();
let lhs = self.parse_primary_expr()?;
if let Some(Ok(token)) = self.lexer.peek() {
if token.kind.is_assignment_op() {
let op_token = self.lexer.require_next()?;
let op = Operator::from_token(&op_token)?;
let rhs = self.parse_binary_expr_or_higher(terminators)?;
return Ok(tree::Node::make_binary_expr(op_token.pos, lhs, op, rhs));
} else if token.kind.binary_op_precedence().is_some() {
// shoot, this wasn't an assignment, all of our work was useless
self.lexer.restore(bookmark);
return self.parse_binary_expr_or_higher(terminators);
} else {
self.lexer.expect_kinds(terminators)?;
}
}
Ok(lhs)
}
/// Binary expressions are generally left associative (except for assignments,
/// which are handled separately in `parse_assignment_expr_or_higher()`).
/// However, things get a little more tricky when taking the fact that there
/// are 9 different levels of precedence into account.
///
/// ```notrust
/// BinaryExpression
/// : Expression BinaryOperator Expression
///
/// BinaryOperator
/// : "||" | "&&" | "==" | "!=" | "<" | "<=" | ">" | ">="
/// : "|" | "^" | "&" | "<<" | ">>" | "+" | "-" | "*" | "/" | "%"
/// ```
fn parse_binary_expr_or_higher(
&mut self,
terminators: &[token::Kind],
) -> Result<tree::Node, Error> {
let mut lhs = self.parse_unary_expr_or_higher()?;
while let Some(Ok(token)) = self.lexer.peek() {
if terminators.contains(&token.kind) {
self.lexer.next();
break;
}
let op = Operator::from_token(&token)?;
self.lexer.next();
let precedence = token.kind.binary_op_precedence().unwrap();
lhs = tree::Node::make_binary_expr(
token.pos,
lhs,
op,
self.parse_binary_rhs(precedence, terminators)?,
);
}
Ok(lhs)
}
/// This is for parsing the right-hand side of a binary expression.
/// If the expression is followed by another operator with higher precedence, we need to
/// consume that entire subexpression and return it to the caller. This is best described
/// by the following two examples: The left one would be the result of `1 + 2 - 3`, and
/// the right one is `1 + 2 * 3` (note how the plus operator moves to the top of the tree
/// in the right example due to the multiplication operator's higher precedence).
///
/// ```notrust
/// - +
/// / \ / \
/// + 3 1 *
/// / \ / \
/// 1 2 2 3
/// ```
///
/// `parse_binary_expr_or_higher()` parses only left associatively through iteration.
/// It always calls this method to try and parse any chained binary expressions of higher
/// precedence. In the simplest case, this method will only read one unary expression
/// or higher and immediately return (if the following binary operator has equal or lower
/// precedence). In other cases, it invokes one recursion per increase in precedence.
fn parse_binary_rhs(
&mut self,
precedence: u32,
terminators: &[token::Kind],
) -> Result<tree::Node, Error> {
let mut lhs = self.parse_unary_expr_or_higher()?;
while let Some(Ok(token)) = self.lexer.peek() {
if let Some(new_precedence) = token.kind.binary_op_precedence() {
if new_precedence > precedence {
let op = Operator::from_token(&token)?;
self.lexer.next();
lhs = tree::Node::make_binary_expr(
token.pos,
lhs,
op,
self.parse_binary_rhs(new_precedence, terminators)?,
);
} else {
break;
}
} else {
break;
}
}
Ok(lhs)
}
/// ```notrust
/// UnaryExpression
/// : UnaryOperator Expression
///
/// UnaryOperator
/// : "!" | "-"
/// ```
fn parse_unary_expr_or_higher(&mut self) -> Result<tree::Node, Error> {
if let Some(result) = self.lexer.peek() {
let token = result?;
if token.kind == token::Kind::Bang || token.kind == token::Kind::Minus {
self.lexer.next(); // consume unary operator token
let op = Operator::from_token(&token)?;
let expr = self.parse_primary_expr()?;
return Ok(tree::Node::make_unary_expr(token.pos, op, expr));
}
}
self.parse_primary_expr()
}
/// ```notrust
/// PrimaryExpression
/// : "(" Expression ")"
/// | ArrayExpression
/// | CallExpression
/// | MemberExpression
/// | Identifier
/// | StringLiteral
/// | IntLiteral
/// | ArrayLiteral
/// | BoolLiteral
///
/// ArrayExpression
/// : PrimaryExpression "[" Expression "]"
///
/// CallExpression
/// : PrimaryExpression "(" [ ParameterList ] ")"
///
/// ParameterList
/// : Expression [ "," ]
/// | Expression "," ParameterList
///
/// MemberExpression
/// : PrimaryExpression "." PrimaryExpression
/// ```
fn parse_primary_expr(&mut self) -> Result<tree::Node, Error> {
let token = self.lexer.require_next()?;
match token.kind {
token::Kind::OParen => {
let expr = self.parse_binary_expr_or_higher(&[token::Kind::CParen])?;
self.parse_primary_expr_rest(expr)
}
token::Kind::Ident => {
let ident = tree::Node::Ident(token.pos, String::from(token.raw));
self.parse_primary_expr_rest(ident)
}
token::Kind::IntLiteral => {
let raw = token.raw;
let num = match raw.chars().nth(1) {
Some('x') => i128::from_str_radix(&raw[2..], 16),
Some('o') => i128::from_str_radix(&raw[2..], 8),
Some('b') => i128::from_str_radix(&raw[2..], 2),
_ => raw.parse(),
}
.unwrap();
Ok(tree::Node::Int(token.pos, num))
}
token::Kind::StringLiteral => Ok(tree::Node::String(token.pos, token.raw)),
token::Kind::TrueKeyword => Ok(tree::Node::Bool(token.pos, true)),
token::Kind::FalseKeyword => Ok(tree::Node::Bool(token.pos, false)),
token::Kind::FnKeyword => {
self.lexer.prev(); // parse_fn() expects to consume the keyword
self.parse_fn(true)
}
token::Kind::OBracket => {
let elements =
self.parse_delimited_list(token::Kind::Comma, token::Kind::CBracket, true)?;
Ok(tree::Node::Array(token.pos, elements))
}
_ => self.syntax_error(format!("Unexpected token {}", token.kind), &token),
}
}
/// Parse an optional appendix to a primary expression, i.e. an array access
/// or function call. This can also be chained, for example when dealing
/// with a matrix or a function returning another function like this:
///
/// ```notrust
/// matrix[y][x]
/// array_of_functions[index](params)
/// function_returning_an_array(params)[index]
/// (fn(a, b) { return a + b; })(1, 2)
/// ```
fn parse_primary_expr_rest(&mut self, start: tree::Node) -> Result<tree::Node, Error> {
if let Some(Ok(token)) = self.lexer.peek() {
match token.kind {
token::Kind::OParen => {
// function call
self.lexer.next();
let params =
self.parse_delimited_list(token::Kind::Comma, token::Kind::CParen, false)?;
self.parse_primary_expr_rest(tree::Node::make_call_expr(
token.pos, start, params,
))
}
token::Kind::OBracket => {
// array index
self.lexer.next();
let index = self.parse_expr(&[token::Kind::CBracket])?;
self.parse_primary_expr_rest(tree::Node::make_array_expr(
token.pos, start, index,
))
}
token::Kind::Dot => {
// member access
self.lexer.next();
let member = self.lexer.expect_kind(token::Kind::Ident)?;
self.parse_primary_expr_rest(tree::Node::make_binary_expr(
token.pos,
start,
Operator::Dot,
tree::Node::Ident(member.pos, member.raw),
))
}
_ => Ok(start),
}
} else {
Ok(start)
}
}
/// ```notrust
/// Function
/// : "fn" [ Identifier ] "(" [ ParameterList ] ")" BlockStatement
///
/// ParameterList
/// : Identifier [ "," ParameterList ]
/// ```
fn parse_fn(&mut self, allow_anonymous: bool) -> Result<tree::Node, Error> {
self.scope.push(Scope::Function);
let fn_keyword = self.lexer.expect_kind(token::Kind::FnKeyword)?;
// function name is optional (there are inline anonymous functions)
let name = if let Some(Ok(token)) = self.lexer.peek() {
if token.kind == token::Kind::Ident {
self.lexer.next();
Some(tree::Node::Ident(token.pos, token.raw))
} else {
None
}
} else {
None
};
let oparen = self.lexer.expect_kind(token::Kind::OParen)?;
if name.is_none() && !allow_anonymous {
// anonymous function are not allowed for definitions as a block
// statement (you can only do that with inline functions)
return self.syntax_error(String::from("Function name required"), &oparen);
}
let params = self.parse_delimited_list(token::Kind::Comma, token::Kind::CParen, false)?;
for p in &params {
match p {
tree::Node::Ident(_, _) => continue,
_ => {
return self
.syntax_error(format!("Not an identifier"), &self.lexer.current().unwrap())
}
}
}
let body = self.parse_block_stmt()?;
self.scope.pop();
Ok(tree::Node::make_fn(fn_keyword.pos, name, params, body))
}
/// Parse a terminated, delimited list of expressions. This is used for
/// parameter lists in function calls and elements in array literals.
fn parse_delimited_list(
&mut self,
delimiter: token::Kind,
terminator: token::Kind,
allow_trailing_delimiter: bool,
) -> Result<Vec<tree::Node>, Error> {
let mut list = Vec::new();
// In the simplest case, we immediately see the terminator.
// That means we are already finished and return an empty list.
if let Some(Ok(token)) = self.lexer.peek() {
if token.kind == terminator {
self.lexer.next();
return Ok(list);
}
}
// now we know the list must contain at least one item
while self.lexer.peek().is_some() {
list.push(self.parse_expr(&[delimiter, terminator])?);
let current = self.lexer.current().unwrap();
if current.kind == terminator {
// this is the end of the list, we are finished
break;
} else if current.kind == delimiter {
// depending on whether trailing delimiters are allowed,
// this might still be the end of the list
if let Some(Ok(token)) = self.lexer.peek() {
if token.kind == terminator && allow_trailing_delimiter {
// so we saw a trailing delimiter followed by the
// terminator *and* trailing delimiters are allowed;
// this means we are finished here
self.lexer.next();
break;
}
}
} else {
// this should never happen since parse_expr() always returns
// with the current token kind being one of the ones specified
// (otherwise it would return a syntax error, in which case we
// wouldn't even reach this entire if block in the first place)
panic!("parse_expr() ended with an illegal token");
}
}
Ok(list)
}
/// Ensure that the `scope` stack contains a certain scope.
fn assert_scope(&self, scope: Scope) -> Result<(), Error> {
if self.scope.contains(&scope) {
Ok(())
} else {
let token = self.lexer.current().unwrap();
self.syntax_error(
format!("Token {} cannot be used in this context", token),
token,
)
}
}
/// Ensure that the `scope` stack does not contain a certain scope.
fn assert_scope_not(&self, scope: Scope) -> Result<(), Error> {
if self.scope.contains(&scope) {
let token = self.lexer.current().unwrap();
self.syntax_error(
format!("Token {} cannot be used in this context", token),
token,
)
} else {
Ok(())
}
}
fn syntax_error<T>(&self, msg: String, token: &Token) -> Result<T, Error> {
Err(Error::syntax_error(token.pos.clone(), msg))
}
}