From 05e73aeb2168f5e984963b4df56d1c01b9f272de Mon Sep 17 00:00:00 2001 From: fef Date: Thu, 28 Jul 2022 18:38:36 +0200 Subject: [PATCH] ast: refactor and add production rule docs As usual, this is nowhere near finished, but it's a good start. --- src/ast/mod.rs | 181 ++++++++++++++++++++++++++++++++++++++++++----- src/ast/tree.rs | 2 +- src/lex/token.rs | 6 +- 3 files changed, 169 insertions(+), 20 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 0c43e0c..b20d751 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -5,9 +5,9 @@ use crate::lex::token; use crate::lex::token::Token; use crate::lex::Lexer; +use crate::ast::tree::Operator; use std::fs; use std::io; -use crate::ast::tree::Operator; #[derive(PartialEq)] enum Scope { @@ -29,6 +29,14 @@ pub fn parse(filename: String) -> io::Result> { Ok(p.parse_file()) } +/// All of the functions expect the leading token to not be consumed yet, +/// meaning you need to use `self.lexer.peek()` when determining what other +/// parsing function to call next. They consume every token up to and +/// *including* the terminating one (like a semicolon or closing brace). +/// +/// Note: For now, the production rules for grammatical elements are more +/// like "freestyle guidelines" to help people understand the code. +/// In their final form, they will most likely be much more restrictive. impl Parser { pub fn new(filename: String, raw: String) -> Parser { let lexer = Lexer::new(filename.clone(), raw); @@ -39,6 +47,10 @@ impl Parser { } } + /// ```notrust + /// File + /// : Statement [ File ] + /// ``` pub fn parse_file(&mut self) -> Result { let mut nodes = Vec::new(); self.scope.push(Scope::File); @@ -54,6 +66,15 @@ impl Parser { }) } + /// ```notrust + /// Statement + /// : DependStatement + /// | SetStatement + /// | SourceStatement + /// | TargetStatement + /// | TypeStatement + /// | ExpressionStatement + /// ``` fn parse_stmt(&mut self) -> Result { let token = self.lexer.peek_or_err()?; match token.kind { @@ -67,6 +88,10 @@ impl Parser { } } + /// ```notrust + /// TargetStatement + /// : "target" Expression BlockStatement + /// ``` fn parse_target_stmt(&mut self) -> Result { self.assert_scope(Scope::File)?; self.assert_scope_not(Scope::Target)?; @@ -95,6 +120,10 @@ impl Parser { }) } + /// ```notrust + /// DependStatement + /// : "depend" Expression ";" + /// ``` fn parse_depend_stmt(&mut self) -> Result { self.assert_scope(Scope::Target)?; self.scope.push(Scope::DepList); @@ -104,6 +133,10 @@ impl Parser { Ok(tree::Node::DepList(Box::new(rvalue))) } + /// ```notrust + /// SetStatement + /// : "set" AssignmentExpression ";" + /// ``` fn parse_set_stmt(&mut self) -> Result { self.assert_scope(Scope::File)?; self.lexer.expect_kind(token::Kind::SetKeyword)?; @@ -119,10 +152,17 @@ impl Parser { self.syntax_error(format!("Invalid operator"), self.lexer.current().unwrap()) } } - _ => self.syntax_error(format!("Expected an assignment"), self.lexer.current().unwrap()) + _ => self.syntax_error( + format!("Expected an assignment"), + self.lexer.current().unwrap(), + ), } } + /// ```notrust + /// TypeStatement + /// : "type" Expression ";" + /// ``` fn parse_type_stmt(&mut self) -> Result { self.assert_scope(Scope::Target)?; self.lexer.expect_kind(token::Kind::TypeKeyword)?; @@ -130,6 +170,10 @@ impl Parser { Ok(tree::Node::TypeExpr(Box::new(expr))) } + /// ```notrust + /// SourceStatement + /// : "source" Expression ";" + /// ``` fn parse_source_stmt(&mut self) -> Result { self.assert_scope(Scope::Target)?; self.lexer.expect_kind(token::Kind::SourceKeyword)?; @@ -139,10 +183,21 @@ impl Parser { Ok(tree::Node::SourceList(Box::new(source))) } + /// ```notrust + /// ExpressionStatement + /// : Expression ";" + /// ``` fn parse_expr_stmt(&mut self) -> Result { self.parse_expr(&[token::Kind::Semi]) } + /// ```notrust + /// Expression + /// : AssignmentExpression + /// | BinaryExpression + /// | UnaryExpression + /// | PrimaryExpression + /// ``` fn parse_expr(&mut self, terminators: &[token::Kind]) -> Result { self.assert_scope(Scope::File)?; let expr = if let Some(result) = self.lexer.peek() { @@ -161,7 +216,18 @@ impl Parser { expr } - fn parse_assignment_expr_or_higher(&mut self, terminators: &[token::Kind]) -> Result { + /// ```notrust + /// AssignmentExpression + /// : PrimaryExpression AssignmentOperator Expression + /// + /// AssignmentOperator + /// : "=" | "+=" | "-=" | "*=" | "/=" | "%=" + /// | "&=" | "|=" | "^=" | ">>=" | "<<=" + /// ``` + fn parse_assignment_expr_or_higher( + &mut self, + terminators: &[token::Kind], + ) -> Result { let lhs = self.parse_primary_expr()?; if let Some(Ok(token)) = self.lexer.peek() { if token.kind.is_assignment_op() { @@ -183,7 +249,18 @@ impl Parser { /// Binary expressions are generally left associative. /// However, things get a little more tricky when taking the fact that there /// are 9 different levels of precedence into account. - fn parse_binary_expr_or_higher(&mut self, terminators: &[token::Kind]) -> Result { + /// + /// ```notrust + /// BinaryExpression + /// : Expression BinaryOperator Expression + /// + /// BinaryOperator + /// : "|" | "^" | "&" | "<<" | ">>" | "+" | "-" | "*" | "/" | "%" + /// ``` + fn parse_binary_expr_or_higher( + &mut self, + terminators: &[token::Kind], + ) -> Result { let mut expr = self.parse_unary_expr_or_higher()?; while let Some(Ok(token)) = self.lexer.peek() { @@ -198,14 +275,38 @@ impl Parser { expr = tree::Node::BinaryExpr { op, lhs: Box::new(expr), - rhs: Box::new(self.parse_partial_binary_expr(precedence, terminators)?), + rhs: Box::new(self.parse_binary_rhs(precedence, terminators)?), }; } Ok(expr) } - fn parse_partial_binary_expr(&mut self, precedence: u32, terminators: &[token::Kind]) -> Result { + /// This is for parsing the right-hand side of a binary expression. + /// If the expression is followed by another operator with higher precedence, we need to + /// consume that entire subexpression and return it to the caller. This is best described + /// by the following two examples: The left one would be the result of `1 + 2 - 3`, and + /// the right one is `1 + 2 * 3` (note how the plus operator moves to the top of the tree + /// in the right example due to the multiplication operator's higher precedence). + /// + /// ```notrust + /// - + + /// / \ / \ + /// + 3 1 * + /// / \ / \ + /// 1 2 2 3 + /// ``` + /// + /// `parse_binary_expr_or_higher()` parses only left associatively through iteration. + /// It always calls this method to try and parse any chained binary expressions of higher + /// precedence. In the simplest case, this method will only read one unary expression + /// or higher and immediately return (if the following binary operator has equal or lower + /// precedence). In other cases, it invokes one recursion per increase in precedence. + fn parse_binary_rhs( + &mut self, + precedence: u32, + terminators: &[token::Kind], + ) -> Result { let mut lhs = self.parse_unary_expr_or_higher()?; while let Some(Ok(token)) = self.lexer.peek() { @@ -216,7 +317,7 @@ impl Parser { lhs = tree::Node::BinaryExpr { op, lhs: Box::new(lhs), - rhs: Box::new(self.parse_partial_binary_expr(new_precedence, terminators)?), + rhs: Box::new(self.parse_binary_rhs(new_precedence, terminators)?), }; } else { break; @@ -229,6 +330,13 @@ impl Parser { Ok(lhs) } + /// ```notrust + /// UnaryExpression + /// : UnaryOperator Expression + /// + /// UnaryOperator + /// : "!" | "-" + /// ``` fn parse_unary_expr_or_higher(&mut self) -> Result { if let Some(result) = self.lexer.peek() { let token = result?; @@ -245,13 +353,33 @@ impl Parser { self.parse_primary_expr() } + /// ```notrust + /// PrimaryExpression + /// : "(" Expression ")" + /// | ArrayExpression + /// | CallExpression + /// | Identifier + /// | StringLiteral + /// | IntLiteral + /// | ArrayLiteral + /// + /// ArrayExpression + /// : PrimaryExpression "[" Expression "]" + /// + /// CallExpression + /// : PrimaryExpression "(" [ ParameterList ] ")" + /// + /// ParameterList + /// : Expression [ "," ] + /// | Expression "," ParameterList + /// ``` fn parse_primary_expr(&mut self) -> Result { let token = self.lexer.require_next()?; match token.kind { token::Kind::OParen => { let expr = self.parse_binary_expr_or_higher(&[token::Kind::CParen])?; self.parse_primary_expr_rest(expr) - }, + } token::Kind::Ident => { let ident = tree::Node::Ident(String::from(token.raw)); self.parse_primary_expr_rest(ident) @@ -259,11 +387,12 @@ impl Parser { token::Kind::IntLiteral => { let raw = token.raw; let num = match raw.chars().nth(1) { - Some('x') => i128::from_str_radix(&raw[2..], 16).unwrap(), - Some('o') => i128::from_str_radix(&raw[2..], 8).unwrap(), - Some('b') => i128::from_str_radix(&raw[2..], 2).unwrap(), - _ => raw.parse().unwrap(), - }; + Some('x') => i128::from_str_radix(&raw[2..], 16), + Some('o') => i128::from_str_radix(&raw[2..], 8), + Some('b') => i128::from_str_radix(&raw[2..], 2), + _ => raw.parse(), + } + .unwrap(); Ok(tree::Node::Int(num)) } token::Kind::StringLiteral => Ok(tree::Node::String(token.raw)), @@ -272,6 +401,15 @@ impl Parser { } } + /// Parse an optional appendix to a primary expression, i.e. an array access + /// or function call. This can also be chained, for example when dealing + /// with a matrix or a function returning another function like this: + /// + /// ```notrust + /// matrix[y][x] + /// array_of_functions[index](params) + /// function_returning_an_array(params)[index] + /// ``` fn parse_primary_expr_rest(&mut self, start: tree::Node) -> Result { if let Some(Ok(token)) = self.lexer.peek() { match token.kind { @@ -279,7 +417,7 @@ impl Parser { // function call self.lexer.next(); let params = self.parse_param_list()?; - Ok(tree::Node::CallExpr { + self.parse_primary_expr_rest(tree::Node::CallExpr { func: Box::new(start), params, }) @@ -288,7 +426,7 @@ impl Parser { // array index self.lexer.next(); let index = self.parse_expr(&[token::Kind::CBracket])?; - Ok(tree::Node::ArrayExpr { + self.parse_primary_expr_rest(tree::Node::ArrayExpr { array: Box::new(start), index: Box::new(index), }) @@ -300,6 +438,13 @@ impl Parser { } } + /// ```notrust + /// ArrayLiteral + /// : "[" ArrayElements "]" + /// + /// ArrayElements + /// : Expression [ "," [ ArrayElements ] ] + /// ``` fn parse_array(&mut self) -> Result { let mut elements = Vec::new(); while let Some(result) = self.lexer.peek() { @@ -320,18 +465,19 @@ impl Parser { token::Kind::CParen => { self.lexer.next(); break; - }, + } _ => { params.push(self.parse_expr(&[token::Kind::Comma, token::Kind::CParen])?); if self.lexer.current().unwrap().kind == token::Kind::CParen { break; } - }, + } } } Ok(params) } + /// Ensure that the `scope` stack contains a certain scope. fn assert_scope(&self, scope: Scope) -> Result<(), Error> { if self.scope.contains(&scope) { Ok(()) @@ -344,6 +490,7 @@ impl Parser { } } + /// Ensure that the `scope` stack does not contain a certain scope. fn assert_scope_not(&self, scope: Scope) -> Result<(), Error> { if self.scope.contains(&scope) { let token = self.lexer.current().unwrap(); diff --git a/src/ast/tree.rs b/src/ast/tree.rs index e3ecd49..33e9961 100644 --- a/src/ast/tree.rs +++ b/src/ast/tree.rs @@ -48,7 +48,7 @@ pub enum Node { }, } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum Operator { Eq, EqEq, diff --git a/src/lex/token.rs b/src/lex/token.rs index 82b63fe..5028be4 100644 --- a/src/lex/token.rs +++ b/src/lex/token.rs @@ -88,7 +88,7 @@ impl Kind { Kind::Ident => true, Kind::IntLiteral => true, Kind::StringLiteral => true, - _ => false + _ => false, } } @@ -96,7 +96,9 @@ impl Kind { match self { k if k.is_start_of_lhs_expr() => true, Kind::Minus => true, - _ => false + Kind::OBracket => true, + Kind::OParen => true, + _ => false, } }