pub(crate) mod tree; use crate::error::Error; use crate::lex::token; use crate::lex::token::Token; use crate::lex::Lexer; use crate::ast::tree::Operator; use std::fs; use std::io; #[derive(PartialEq)] enum Scope { File, Target, DepList, SourceList, Function, } struct Parser { lexer: Lexer, scope: Vec, filename: String, } pub fn parse(filename: String) -> io::Result> { let raw: String = fs::read_to_string(filename.clone())?; let mut p = Parser::new(filename, raw); Ok(p.parse_file()) } /// All of the functions expect the leading token to not be consumed yet, /// meaning you need to use `self.lexer.peek()` when determining what other /// parsing function to call next. They consume every token up to and /// *including* the terminating one (like a semicolon or closing brace). /// /// Note: For now, the production rules for grammatical elements are more /// like "freestyle guidelines" to help people understand the code. /// In their final form, they will most likely be much more restrictive. impl Parser { pub fn new(filename: String, raw: String) -> Parser { let lexer = Lexer::new(filename.clone(), raw); Parser { lexer, scope: Vec::new(), filename, } } /// ```notrust /// File /// : Statement [ File ] /// ``` pub fn parse_file(&mut self) -> Result { let mut nodes = Vec::new(); self.scope.push(Scope::File); while self.lexer.peek().is_some() { nodes.push(self.parse_stmt()?); } self.scope.pop(); Ok(tree::Node::File { name: self.filename.clone(), content: nodes, }) } /// ```notrust /// Statement /// : DependStatement /// | SetStatement /// | SourceStatement /// | TargetStatement /// | TypeStatement /// | ExpressionStatement /// ``` fn parse_stmt(&mut self) -> Result { let token = self.lexer.peek_or_err()?; match token.kind { token::Kind::DependKeyword => self.parse_depend_stmt(), token::Kind::FnKeyword => self.parse_fn(false), token::Kind::IfKeyword => self.parse_if_stmt(), token::Kind::ReturnKeyword => self.parse_return_stmt(), token::Kind::SetKeyword => self.parse_set_stmt(), token::Kind::SourceKeyword => self.parse_source_stmt(), token::Kind::TargetKeyword => self.parse_target_stmt(), token::Kind::TypeKeyword => self.parse_type_stmt(), k if k.is_start_of_expr() => self.parse_expr_stmt(), _ => self.syntax_error(format!("Unexpected token {}", token), &token), } } /// ```notrust /// BlockStatement /// : "{" [ StatementList ] "}" /// /// StatementList /// : Statement [ StatementList ] /// ``` fn parse_block_stmt(&mut self) -> Result { let mut nodes = Vec::new(); self.lexer.expect_kind(token::Kind::OBrace)?; while let Some(result) = self.lexer.peek() { match result?.kind { token::Kind::CBrace => { self.lexer.next(); break; } _ => nodes.push(self.parse_stmt()?), } } Ok(tree::Node::Block(nodes)) } /// ```notrust /// ReturnStatement /// : "return" Expression ";" /// ``` fn parse_return_stmt(&mut self) -> Result { self.assert_scope(Scope::Function)?; self.lexer.expect_kind(token::Kind::ReturnKeyword)?; let expr = self.parse_expr(&[token::Kind::Semi])?; Ok(tree::Node::ReturnStmt(Box::new(expr))) } /// ```notrust /// TargetStatement /// : "target" Expression BlockStatement /// ``` fn parse_target_stmt(&mut self) -> Result { self.assert_scope(Scope::File)?; self.assert_scope_not(Scope::Target)?; self.scope.push(Scope::Target); self.lexer.expect_kind(token::Kind::TargetKeyword)?; let name_token = self.lexer.expect_kind(token::Kind::Ident)?; let children = self.parse_block_stmt()?; self.scope.pop(); Ok(tree::Node::Target { name: Box::new(tree::Node::Ident(name_token.raw)), content: Box::new(children), }) } /// ```notrust /// DependStatement /// : "depend" Expression ";" /// ``` fn parse_depend_stmt(&mut self) -> Result { self.assert_scope(Scope::Target)?; self.scope.push(Scope::DepList); self.lexer.expect_kind(token::Kind::DependKeyword)?; let rvalue = self.parse_expr(&[token::Kind::Semi])?; self.scope.pop(); Ok(tree::Node::DepList(Box::new(rvalue))) } /// ```notrust /// IfStatement /// : "if" "(" Expression ")" BlockStatement [ "else" BlockStatement ] /// ``` fn parse_if_stmt(&mut self) -> Result { self.lexer.expect_kind(token::Kind::IfKeyword)?; self.lexer.expect_kind(token::Kind::OParen)?; let condition = self.parse_expr(&[token::Kind::CParen])?; let then_block = self.parse_block_stmt()?; let mut else_block = None; if let Some(Ok(token)) = self.lexer.peek() { if token.kind == token::Kind::ElseKeyword { self.lexer.next(); else_block = Some(Box::new(self.parse_block_stmt()?)); } } Ok(tree::Node::IfStmt { condition: Box::new(condition), then_block: Box::new(then_block), else_block, }) } /// ```notrust /// SetStatement /// : "set" AssignmentExpression ";" /// ``` fn parse_set_stmt(&mut self) -> Result { self.assert_scope(Scope::File)?; self.lexer.expect_kind(token::Kind::SetKeyword)?; let expr = self.parse_expr(&[token::Kind::Semi])?; match expr { tree::Node::BinaryExpr { op, lhs, rhs } => { if op == Operator::Eq { Ok(tree::Node::SetExpr { name: lhs, val: rhs, }) } else { self.syntax_error(format!("Invalid operator"), self.lexer.current().unwrap()) } } _ => self.syntax_error( format!("Expected an assignment"), self.lexer.current().unwrap(), ), } } /// ```notrust /// TypeStatement /// : "type" Expression ";" /// ``` fn parse_type_stmt(&mut self) -> Result { self.assert_scope(Scope::Target)?; self.lexer.expect_kind(token::Kind::TypeKeyword)?; let expr = self.parse_expr(&[token::Kind::Semi])?; Ok(tree::Node::TypeExpr(Box::new(expr))) } /// ```notrust /// SourceStatement /// : "source" Expression ";" /// ``` fn parse_source_stmt(&mut self) -> Result { self.assert_scope(Scope::Target)?; self.lexer.expect_kind(token::Kind::SourceKeyword)?; self.scope.push(Scope::SourceList); let source = self.parse_expr(&[token::Kind::Semi])?; self.scope.pop(); Ok(tree::Node::SourceList(Box::new(source))) } /// ```notrust /// ExpressionStatement /// : Expression ";" /// ``` fn parse_expr_stmt(&mut self) -> Result { self.parse_expr(&[token::Kind::Semi]) } /// ```notrust /// Expression /// : AssignmentExpression /// | BinaryExpression /// | UnaryExpression /// | PrimaryExpression /// ``` fn parse_expr(&mut self, terminators: &[token::Kind]) -> Result { self.assert_scope(Scope::File)?; let expr = if let Some(result) = self.lexer.peek() { let token = result?; if !token.kind.is_start_of_expr() { self.syntax_error(String::from("Expected an expression"), &token) } else { self.parse_assignment_expr_or_higher(terminators) } } else { self.syntax_error( String::from("Unexpected EOF"), &self.lexer.current().unwrap(), ) }; expr } /// Parse an assignment expression. /// This is no different to parsing any other binary expression, with the /// difference being that assignment operators are right associative. /// Therefore, we need a separate function for this. Other than that, /// this method is no different to `parse_binary_expr_or_higher()` (it even /// returns the same kind of tree node). /// /// ```notrust /// AssignmentExpression /// : PrimaryExpression AssignmentOperator Expression /// /// AssignmentOperator /// : "=" | "+=" | "-=" | "*=" | "/=" | "%=" /// | "&=" | "|=" | "^=" | ">>=" | "<<=" /// ``` fn parse_assignment_expr_or_higher( &mut self, terminators: &[token::Kind], ) -> Result { // we speculate on this being an assignment expression, so we need to // be able to undo our work in case this speculation doesn't hold true // so parse_binary_expr_or_higher() can do its thing let bookmark = self.lexer.save(); let lhs = self.parse_primary_expr()?; if let Some(Ok(token)) = self.lexer.peek() { if token.kind.is_assignment_op() { let op_token = self.lexer.require_next()?; let op = Operator::from_token(&op_token)?; let rhs = self.parse_binary_expr_or_higher(terminators)?; return Ok(tree::Node::BinaryExpr { op, lhs: Box::new(lhs), rhs: Box::new(rhs), }); } else if token.kind.binary_op_precedence().is_some() { // shoot, this wasn't an assignment, all of our work was useless self.lexer.restore(bookmark); return self.parse_binary_expr_or_higher(terminators); } else { self.lexer.expect_kinds(terminators)?; } } Ok(lhs) } /// Binary expressions are generally left associative (except for assignments, /// which are handled separately in `parse_assignment_expr_or_higher()`). /// However, things get a little more tricky when taking the fact that there /// are 9 different levels of precedence into account. /// /// ```notrust /// BinaryExpression /// : Expression BinaryOperator Expression /// /// BinaryOperator /// : "||" | "&&" | "==" | "!=" | "<" | "<=" | ">" | ">=" /// : "|" | "^" | "&" | "<<" | ">>" | "+" | "-" | "*" | "/" | "%" /// ``` fn parse_binary_expr_or_higher( &mut self, terminators: &[token::Kind], ) -> Result { let mut expr = self.parse_unary_expr_or_higher()?; while let Some(Ok(token)) = self.lexer.peek() { if terminators.contains(&token.kind) { self.lexer.next(); break; } let op = Operator::from_token(&token)?; self.lexer.next(); let precedence = token.kind.binary_op_precedence().unwrap(); expr = tree::Node::BinaryExpr { op, lhs: Box::new(expr), rhs: Box::new(self.parse_binary_rhs(precedence, terminators)?), }; } Ok(expr) } /// This is for parsing the right-hand side of a binary expression. /// If the expression is followed by another operator with higher precedence, we need to /// consume that entire subexpression and return it to the caller. This is best described /// by the following two examples: The left one would be the result of `1 + 2 - 3`, and /// the right one is `1 + 2 * 3` (note how the plus operator moves to the top of the tree /// in the right example due to the multiplication operator's higher precedence). /// /// ```notrust /// - + /// / \ / \ /// + 3 1 * /// / \ / \ /// 1 2 2 3 /// ``` /// /// `parse_binary_expr_or_higher()` parses only left associatively through iteration. /// It always calls this method to try and parse any chained binary expressions of higher /// precedence. In the simplest case, this method will only read one unary expression /// or higher and immediately return (if the following binary operator has equal or lower /// precedence). In other cases, it invokes one recursion per increase in precedence. fn parse_binary_rhs( &mut self, precedence: u32, terminators: &[token::Kind], ) -> Result { let mut lhs = self.parse_unary_expr_or_higher()?; while let Some(Ok(token)) = self.lexer.peek() { if let Some(new_precedence) = token.kind.binary_op_precedence() { if new_precedence > precedence { let op = Operator::from_token(&token)?; self.lexer.next(); lhs = tree::Node::BinaryExpr { op, lhs: Box::new(lhs), rhs: Box::new(self.parse_binary_rhs(new_precedence, terminators)?), }; } else { break; } } else { break; } } Ok(lhs) } /// ```notrust /// UnaryExpression /// : UnaryOperator Expression /// /// UnaryOperator /// : "!" | "-" /// ``` fn parse_unary_expr_or_higher(&mut self) -> Result { if let Some(result) = self.lexer.peek() { let token = result?; if token.kind == token::Kind::Bang || token.kind == token::Kind::Minus { self.lexer.next(); // consume unary operator token let op = Operator::from_token(&token)?; let expr = self.parse_primary_expr()?; return Ok(tree::Node::UnaryExpr { op, node: Box::new(expr), }); } } self.parse_primary_expr() } /// ```notrust /// PrimaryExpression /// : "(" Expression ")" /// | ArrayExpression /// | CallExpression /// | Identifier /// | StringLiteral /// | IntLiteral /// | ArrayLiteral /// | BoolLiteral /// /// ArrayExpression /// : PrimaryExpression "[" Expression "]" /// /// CallExpression /// : PrimaryExpression "(" [ ParameterList ] ")" /// /// ParameterList /// : Expression [ "," ] /// | Expression "," ParameterList /// ``` fn parse_primary_expr(&mut self) -> Result { let token = self.lexer.require_next()?; match token.kind { token::Kind::OParen => { let expr = self.parse_binary_expr_or_higher(&[token::Kind::CParen])?; self.parse_primary_expr_rest(expr) } token::Kind::Ident => { let ident = tree::Node::Ident(String::from(token.raw)); self.parse_primary_expr_rest(ident) } token::Kind::IntLiteral => { let raw = token.raw; let num = match raw.chars().nth(1) { Some('x') => i128::from_str_radix(&raw[2..], 16), Some('o') => i128::from_str_radix(&raw[2..], 8), Some('b') => i128::from_str_radix(&raw[2..], 2), _ => raw.parse(), } .unwrap(); Ok(tree::Node::Int(num)) } token::Kind::StringLiteral => Ok(tree::Node::String(token.raw)), token::Kind::TrueKeyword => Ok(tree::Node::Bool(true)), token::Kind::FalseKeyword => Ok(tree::Node::Bool(false)), token::Kind::FnKeyword => { self.lexer.prev(); // parse_fn() expects to consume the keyword self.parse_fn(true) } token::Kind::OBracket => { let elements = self.parse_delimited_list(token::Kind::Comma, token::Kind::CBracket, true)?; Ok(tree::Node::Array(elements)) } _ => self.syntax_error(format!("Unexpected token {}", token.kind), &token), } } /// Parse an optional appendix to a primary expression, i.e. an array access /// or function call. This can also be chained, for example when dealing /// with a matrix or a function returning another function like this: /// /// ```notrust /// matrix[y][x] /// array_of_functions[index](params) /// function_returning_an_array(params)[index] /// (fn(a, b) { return a + b; })(1, 2) /// ``` fn parse_primary_expr_rest(&mut self, start: tree::Node) -> Result { if let Some(Ok(token)) = self.lexer.peek() { match token.kind { token::Kind::OParen => { // function call self.lexer.next(); let params = self.parse_delimited_list(token::Kind::Comma, token::Kind::CParen, false)?; self.parse_primary_expr_rest(tree::Node::CallExpr { func: Box::new(start), params, }) } token::Kind::OBracket => { // array index self.lexer.next(); let index = self.parse_expr(&[token::Kind::CBracket])?; self.parse_primary_expr_rest(tree::Node::ArrayExpr { array: Box::new(start), index: Box::new(index), }) } _ => Ok(start), } } else { Ok(start) } } /// ```notrust /// Function /// : "fn" [ Identifier ] "(" [ ParameterList ] ")" BlockStatement /// /// ParameterList /// : Identifier [ "," ParameterList ] /// ``` fn parse_fn(&mut self, allow_anonymous: bool) -> Result { self.scope.push(Scope::Function); self.lexer.expect_kind(token::Kind::FnKeyword)?; // function name is optional (there are inline anonymous functions) let name = if let Some(Ok(token)) = self.lexer.peek() { if token.kind == token::Kind::Ident { self.lexer.next(); Some(Box::new(tree::Node::Ident(token.raw))) } else { None } } else { None }; let oparen = self.lexer.expect_kind(token::Kind::OParen)?; if name.is_none() && !allow_anonymous { // anonymous function are not allowed for definitions as a block // statement (you can only do that with inline functions) return self.syntax_error(String::from("Function name required"), &oparen); } let params = self.parse_delimited_list(token::Kind::Comma, token::Kind::CParen, false)?; for p in ¶ms { match p { tree::Node::Ident(_) => continue, _ => { return self .syntax_error(format!("Not an identifier"), &self.lexer.current().unwrap()) } } } let body = self.parse_block_stmt()?; self.scope.pop(); Ok(tree::Node::Fn { name, params, body: Box::new(body), }) } /// Parse a terminated, delimited list of expressions. This is used for /// parameter lists in function calls and elements in array literals. fn parse_delimited_list( &mut self, delimiter: token::Kind, terminator: token::Kind, allow_trailing_delimiter: bool, ) -> Result, Error> { let mut list = Vec::new(); // In the simplest case, we immediately see the terminator. // That means we are already finished and return an empty list. if let Some(Ok(token)) = self.lexer.peek() { if token.kind == terminator { self.lexer.next(); return Ok(list); } } // now we know the list must contain at least one item while self.lexer.peek().is_some() { list.push(self.parse_expr(&[delimiter, terminator])?); let current = self.lexer.current().unwrap(); if current.kind == terminator { // this is the end of the list, we are finished break; } else if current.kind == delimiter { // depending on whether trailing delimiters are allowed, // this might still be the end of the list if let Some(Ok(token)) = self.lexer.peek() { if token.kind == terminator && allow_trailing_delimiter { // so we saw a trailing delimiter followed by the // terminator *and* trailing delimiters are allowed; // this means we are finished here self.lexer.next(); break; } } } else { // this should never happen since parse_expr() always returns // with the current token kind being one of the ones specified // (otherwise it would return a syntax error, in which case we // wouldn't even reach this entire if block in the first place) panic!("parse_expr() ended with an illegal token"); } } Ok(list) } /// Ensure that the `scope` stack contains a certain scope. fn assert_scope(&self, scope: Scope) -> Result<(), Error> { if self.scope.contains(&scope) { Ok(()) } else { let token = self.lexer.current().unwrap(); self.syntax_error( format!("Token {} cannot be used in this context", token), token, ) } } /// Ensure that the `scope` stack does not contain a certain scope. fn assert_scope_not(&self, scope: Scope) -> Result<(), Error> { if self.scope.contains(&scope) { let token = self.lexer.current().unwrap(); self.syntax_error( format!("Token {} cannot be used in this context", token), token, ) } else { Ok(()) } } fn syntax_error(&self, msg: String, token: &Token) -> Result { Err(Error::syntax_error(token.pos.clone(), msg)) } }