diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 2614aad..bb57460 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -7,7 +7,9 @@ use crate::lex::Lexer; use std::fs; use std::io; +use crate::ast::tree::Operator; +#[derive(PartialEq)] enum Scope { File, Target, @@ -41,40 +43,48 @@ impl Parser { let mut nodes = Vec::new(); self.scope.push(Scope::File); - while let Some(result) = self.lexer.next() { - let token = result?; - let node = match token.kind { - token::Kind::TargetKeyword => self.parse_target(), - token::Kind::SetKeyword => self.parse_set_expr(), - _ => self.syntax_error(format!("Unexpected token {}", token.kind), &token), - }?; - nodes.push(node); + while self.lexer.peek().is_some() { + nodes.push(self.parse_stmt()?); } + self.scope.pop(); Ok(tree::Node::File { name: self.filename.clone(), content: nodes, }) } - fn parse_target(&mut self) -> Result { + fn parse_stmt(&mut self) -> Result { + let token = self.lexer.peek_or_err()?; + match token.kind { + token::Kind::DependKeyword => self.parse_depend_stmt(), + token::Kind::SetKeyword => self.parse_set_stmt(), + token::Kind::SourceKeyword => self.parse_source_stmt(), + token::Kind::TargetKeyword => self.parse_target_stmt(), + token::Kind::TypeKeyword => self.parse_type_stmt(), + token::Kind::Ident => self.parse_expr_stmt(), + _ => self.syntax_error(format!("Unexpected token {}", token), &token), + } + } + + fn parse_target_stmt(&mut self) -> Result { + self.assert_scope(Scope::File)?; + self.assert_scope_not(Scope::Target)?; self.scope.push(Scope::Target); + + self.lexer.expect_kind(token::Kind::TargetKeyword)?; let name_token = self.lexer.expect_kind(token::Kind::Ident)?; self.lexer.expect_kind(token::Kind::OBrace)?; let mut children = Vec::new(); - while let Some(result) = self.lexer.next() { - let token = result?; - match token.kind { - token::Kind::DependKeyword => children.push(self.parse_depend_expr()?), - token::Kind::TypeKeyword => children.push(self.parse_type_expr()?), - token::Kind::SourceKeyword => children.push(self.parse_source_expr()?), - token::Kind::SetKeyword => children.push(self.parse_set_expr()?), - token::Kind::CBrace => break, - _ => { - return self.syntax_error(format!("Unexpected token \"{}\"", token.raw), &token) + while let Some(result) = self.lexer.peek() { + match result?.kind { + token::Kind::CBrace => { + self.lexer.next(); + break; } + _ => children.push(self.parse_stmt()?), } } @@ -85,32 +95,205 @@ impl Parser { }) } - fn parse_depend_expr(&mut self) -> Result { + fn parse_depend_stmt(&mut self) -> Result { + self.assert_scope(Scope::Target)?; self.scope.push(Scope::DepList); - let rvalue = self.parse_rvalue(&[token::Kind::Semi])?; + self.lexer.expect_kind(token::Kind::DependKeyword)?; + let rvalue = self.parse_expr(&[token::Kind::Semi])?; self.scope.pop(); Ok(tree::Node::DepList(Box::new(rvalue))) } - fn parse_set_expr(&mut self) -> Result { - let lvalue = self.parse_lvalue(&[token::Kind::Eq])?; - let rvalue = self.parse_rvalue(&[token::Kind::Semi])?; - Ok(tree::Node::SetExpr { - name: Box::new(lvalue), - val: Box::new(rvalue), - }) + fn parse_set_stmt(&mut self) -> Result { + self.assert_scope(Scope::File)?; + self.lexer.expect_kind(token::Kind::SetKeyword)?; + let expr = self.parse_expr(&[token::Kind::Semi])?; + match expr { + tree::Node::BinaryExpr { op, lhs, rhs } => { + if op == Operator::Eq { + Ok(tree::Node::SetExpr { + name: lhs, + val: rhs, + }) + } else { + self.syntax_error(format!("Invalid operator"), self.lexer.current().unwrap()) + } + } + _ => self.syntax_error(format!("Expected an assignment"), self.lexer.current().unwrap()) + } } - fn parse_type_expr(&mut self) -> Result { - let rvalue = self.parse_rvalue(&[token::Kind::Semi])?; - Ok(tree::Node::TypeExpr(Box::new(rvalue))) + fn parse_type_stmt(&mut self) -> Result { + self.assert_scope(Scope::Target)?; + self.lexer.expect_kind(token::Kind::TypeKeyword)?; + let expr = self.parse_expr(&[token::Kind::Semi])?; + Ok(tree::Node::TypeExpr(Box::new(expr))) } - fn parse_source_expr(&mut self) -> Result { + fn parse_source_stmt(&mut self) -> Result { + self.assert_scope(Scope::Target)?; + self.lexer.expect_kind(token::Kind::SourceKeyword)?; self.scope.push(Scope::SourceList); - let rvalue = self.parse_rvalue(&[token::Kind::Semi])?; + let source = self.parse_expr(&[token::Kind::Semi])?; self.scope.pop(); - Ok(tree::Node::SourceList(Box::new(rvalue))) + Ok(tree::Node::SourceList(Box::new(source))) + } + + fn parse_expr_stmt(&mut self) -> Result { + self.parse_expr(&[token::Kind::Semi]) + } + + fn parse_expr(&mut self, terminators: &[token::Kind]) -> Result { + self.assert_scope(Scope::File)?; + let expr = if let Some(result) = self.lexer.peek() { + let token = result?; + if !token.kind.is_start_of_expr() { + self.syntax_error(String::from("Expected an expression"), &token) + } else { + self.parse_assignment_expr_or_higher(terminators) + } + } else { + self.syntax_error( + String::from("Unexpected EOF"), + &self.lexer.current().unwrap(), + ) + }; + expr + } + + fn parse_assignment_expr_or_higher(&mut self, terminators: &[token::Kind]) -> Result { + let lhs = self.parse_primary_expr()?; + if let Some(Ok(token)) = self.lexer.peek() { + if token.kind.is_assignment_op() { + let op_token = self.lexer.require_next()?; + let op = Operator::from_token(&op_token)?; + let rhs = self.parse_binary_expr_or_higher(terminators)?; + return Ok(tree::Node::BinaryExpr { + op, + lhs: Box::new(lhs), + rhs: Box::new(rhs), + }); + } else { + self.lexer.expect_kinds(terminators)?; + } + } + Ok(lhs) + } + + /// Binary expressions are generally left associative. + /// However, things get a little more tricky when taking the fact that there + /// are 9 different levels of precedence into account. + fn parse_binary_expr_or_higher(&mut self, terminators: &[token::Kind]) -> Result { + let mut expr = self.parse_unary_expr_or_higher()?; + + while let Some(Ok(token)) = self.lexer.peek() { + if terminators.contains(&token.kind) { + self.lexer.next(); + break; + } + + let op = Operator::from_token(&token)?; + self.lexer.next(); + let precedence = token.kind.binary_op_precedence().unwrap(); + expr = tree::Node::BinaryExpr { + op, + lhs: Box::new(expr), + rhs: Box::new(self.parse_partial_binary_expr(precedence, terminators)?), + }; + } + + Ok(expr) + } + + fn parse_partial_binary_expr(&mut self, precedence: u32, terminators: &[token::Kind]) -> Result { + let mut lhs = self.parse_unary_expr_or_higher()?; + + while let Some(Ok(token)) = self.lexer.peek() { + if let Some(new_precedence) = token.kind.binary_op_precedence() { + if new_precedence > precedence { + let op = Operator::from_token(&token)?; + self.lexer.next(); + lhs = tree::Node::BinaryExpr { + op, + lhs: Box::new(lhs), + rhs: Box::new(self.parse_partial_binary_expr(new_precedence, terminators)?), + }; + } else { + break; + } + } else { + break; + } + } + + Ok(lhs) + } + + fn parse_unary_expr_or_higher(&mut self) -> Result { + if let Some(result) = self.lexer.peek() { + let token = result?; + if token.kind == token::Kind::Bang || token.kind == token::Kind::Minus { + self.lexer.next(); // consume unary operator token + let op = Operator::from_token(&token)?; + let expr = self.parse_primary_expr()?; + return Ok(tree::Node::UnaryExpr { + op, + node: Box::new(expr), + }); + } + } + self.parse_primary_expr() + } + + fn parse_primary_expr(&mut self) -> Result { + let token = self.lexer.require_next()?; + match token.kind { + token::Kind::Ident => { + let ident = tree::Node::Ident(String::from(token.raw)); + self.parse_primary_expr_rest(ident) + } + token::Kind::IntLiteral => { + let raw = token.raw; + let num = match raw.chars().nth(1) { + Some('x') => i128::from_str_radix(&raw[2..], 16).unwrap(), + Some('o') => i128::from_str_radix(&raw[2..], 8).unwrap(), + Some('b') => i128::from_str_radix(&raw[2..], 2).unwrap(), + _ => raw.parse().unwrap(), + }; + Ok(tree::Node::Int(num)) + } + token::Kind::StringLiteral => Ok(tree::Node::String(token.raw)), + token::Kind::OBracket => self.parse_array(), + _ => self.syntax_error(format!("Unexpected token {}", token.kind), &token), + } + } + + fn parse_primary_expr_rest(&mut self, start: tree::Node) -> Result { + if let Some(Ok(token)) = self.lexer.peek() { + match token.kind { + token::Kind::OParen => { + // function call + self.lexer.next(); + let params = self.parse_param_list()?; + Ok(tree::Node::CallExpr { + func: Box::new(start), + params, + }) + } + token::Kind::OBracket => { + // array index + self.lexer.next(); + let index = self.parse_expr(&[token::Kind::CBracket])?; + Ok(tree::Node::ArrayExpr { + array: Box::new(start), + index: Box::new(index), + }) + } + _ => Ok(start), + } + } else { + Ok(start) + } } fn parse_array(&mut self) -> Result { @@ -120,31 +303,53 @@ impl Parser { self.lexer.next(); break; } else { - elements.push(self.parse_rvalue(&[token::Kind::Comma, token::Kind::CBracket])?); + elements.push(self.parse_expr(&[token::Kind::Comma, token::Kind::CBracket])?); } } Ok(tree::Node::Array(elements)) } - fn parse_lvalue(&mut self, terminators: &[token::Kind]) -> Result { - // this will be expanded when we have array accesses and similar stuff - let ident_token = self.lexer.expect_kind(token::Kind::Ident)?; - self.lexer.expect_kinds(terminators)?; - Ok(tree::Node::Ident(ident_token.raw)) + fn parse_param_list(&mut self) -> Result, Error> { + let mut params = Vec::new(); + while let Some(result) = self.lexer.peek() { + match result?.kind { + token::Kind::CParen => { + self.lexer.next(); + break; + }, + _ => { + params.push(self.parse_expr(&[token::Kind::Comma, token::Kind::CParen])?); + if self.lexer.current().unwrap().kind == token::Kind::CParen { + break; + } + }, + } + } + Ok(params) + } + + fn assert_scope(&self, scope: Scope) -> Result<(), Error> { + if self.scope.contains(&scope) { + Ok(()) + } else { + let token = self.lexer.current().unwrap(); + self.syntax_error( + format!("Token {} cannot be used in this context", token), + token, + ) + } } - fn parse_rvalue(&mut self, terminators: &[token::Kind]) -> Result { - // this will also be expanded to support more complex expressions, naturally - let token = self.lexer.require_next()?; - let node = match token.kind { - token::Kind::Ident => Ok(tree::Node::Ident(token.raw)), - token::Kind::IntLiteral => Ok(tree::Node::Int(token.raw.parse().unwrap())), - token::Kind::StringLiteral => Ok(tree::Node::String(token.raw)), - token::Kind::OBracket => self.parse_array(), - k => self.syntax_error(format!("Unexpected {}", k), &token), - }?; - self.lexer.expect_kinds(terminators)?; - Ok(node) + fn assert_scope_not(&self, scope: Scope) -> Result<(), Error> { + if self.scope.contains(&scope) { + let token = self.lexer.current().unwrap(); + self.syntax_error( + format!("Token {} cannot be used in this context", token), + token, + ) + } else { + Ok(()) + } } fn syntax_error(&self, msg: String, token: &Token) -> Result { diff --git a/src/ast/tree.rs b/src/ast/tree.rs index ee4972d..e3ecd49 100644 --- a/src/ast/tree.rs +++ b/src/ast/tree.rs @@ -14,6 +14,16 @@ pub enum Node { Int(i128), String(String), Array(Vec), + ArrayExpr { + // array access + array: Box, + index: Box, + }, + CallExpr { + // function call + func: Box, + params: Vec, + }, UnaryExpr { op: Operator, node: Box, @@ -74,56 +84,6 @@ pub enum Operator { CaretEq, } -impl Operator { - pub const fn precedence(&self) -> u8 { - match self { - Operator::Eq => 2, - Operator::PlusEq => 2, - Operator::MinusEq => 2, - Operator::AsteriskEq => 2, - Operator::AsteriskAsteriskEq => 2, - Operator::SlashEq => 2, - Operator::PercentEq => 2, - Operator::AmpEq => 2, - Operator::PipeEq => 2, - Operator::CaretEq => 2, - Operator::GtGtEq => 2, - Operator::LtLtEq => 2, - - Operator::PipePipe => 3, - - Operator::AmpAmp => 4, - - Operator::EqEq => 5, - Operator::BangEq => 5, - Operator::Lt => 5, - Operator::LtEq => 5, - Operator::Gt => 5, - Operator::GtEq => 5, - - Operator::Pipe => 6, - - Operator::Caret => 7, - - Operator::Amp => 8, - - Operator::GtGt => 9, - Operator::LtLt => 9, - - Operator::Plus => 10, - Operator::Minus => 10, - - Operator::Asterisk => 11, - Operator::Slash => 11, - Operator::Percent => 11, - - Operator::AsteriskAsterisk => 12, - - Operator::Bang => 13, - } - } -} - impl Node { pub fn walk(&self, cb: fn(node: &Node, depth: u32)) { self.visit(cb, 0); @@ -140,6 +100,16 @@ impl Node { node.visit(cb, depth); } } + Node::ArrayExpr { array, index } => { + array.visit(cb, depth); + index.visit(cb, depth); + } + Node::CallExpr { func, params } => { + func.visit(cb, depth); + for p in params { + p.visit(cb, depth); + } + } Node::UnaryExpr { op, node } => node.visit(cb, depth), Node::BinaryExpr { op, lhs, rhs } => { lhs.visit(cb, depth); @@ -182,6 +152,8 @@ impl fmt::Display for Node { Node::DepList(_) => "depend", Node::SourceList(_) => "source", Node::Array(_) => "", + Node::ArrayExpr { array, index } => "", + Node::CallExpr { func, params } => "", Node::UnaryExpr { op, node } => op.raw(), Node::BinaryExpr { op, lhs, rhs } => op.raw(), Node::TypeExpr(_) => "type", diff --git a/src/error.rs b/src/error.rs index 3bcace4..f0462e1 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,4 +1,4 @@ -use std::{fmt, io}; +use std::fmt; use crate::ast::tree::Type; use crate::lex::token::Position; diff --git a/src/lex/mod.rs b/src/lex/mod.rs index 9a44a88..a39991f 100644 --- a/src/lex/mod.rs +++ b/src/lex/mod.rs @@ -146,6 +146,12 @@ impl Lexer { Some(t) } + pub fn peek_or_err(&mut self) -> Result { + let token = self.require_next()?; + self.prev(); + Ok(token) + } + pub fn prev(&mut self) -> Option<&Token> { if self.offset < self.history.len() - 1 { self.offset += 1; diff --git a/src/lex/token.rs b/src/lex/token.rs index ad9af33..82b63fe 100644 --- a/src/lex/token.rs +++ b/src/lex/token.rs @@ -153,7 +153,7 @@ impl Kind { Kind::Slash => Some(11), Kind::Percent => Some(11), Kind::AsteriskAsterisk => Some(12), - _ => None + _ => None, } } } diff --git a/test.gaybuild b/test.gaybuild index 3cb7f0b..ade8328 100644 --- a/test.gaybuild +++ b/test.gaybuild @@ -9,6 +9,9 @@ set BUILD_PREFIX = "build"; # a target is a single component. # targets can depend on other targets. target kern { + + owo = 1 + 2 * 3 * 4 - 5; + # the type keyword defines whether the target is supposed to be # compiled into an executable binary (exe) or a library (lib). type exe; @@ -23,6 +26,7 @@ target kern { } target libk { + print("hello, world"); type lib; depend arch; source "libk/lib.rs";