From 30273b790209a0fee78046c7a8daa26cdf1c6b43 Mon Sep 17 00:00:00 2001 From: fef Date: Sun, 24 Jul 2022 14:06:09 +0200 Subject: [PATCH] ast: add abstract syntax tree parser This is nowhere near finished, but it's a good start for now. --- src/ast/mod.rs | 153 +++++++++++++++++++++++++++++++++++++++ src/ast/tree.rs | 187 ++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 15 ++-- 3 files changed, 347 insertions(+), 8 deletions(-) create mode 100644 src/ast/mod.rs create mode 100644 src/ast/tree.rs diff --git a/src/ast/mod.rs b/src/ast/mod.rs new file mode 100644 index 0000000..32d1687 --- /dev/null +++ b/src/ast/mod.rs @@ -0,0 +1,153 @@ +pub(crate) mod tree; + +use crate::error::Error; +use crate::lex::token; +use crate::lex::token::Token; +use crate::lex::Lexer; + +use std::fs; +use std::io; + +enum Scope { + File, + Module, + DepList, + SourceList, +} + +struct Parser { + lexer: Lexer, + scope: Vec, + filename: String, +} + +pub fn parse(filename: String) -> io::Result> { + let raw: String = fs::read_to_string(filename.clone())?; + let mut p = Parser::new(filename, raw); + Ok(p.parse_file()) +} + +impl Parser { + pub fn new(filename: String, raw: String) -> Parser { + let lexer = Lexer::new(filename.clone(), raw); + Parser { + lexer, + scope: Vec::new(), + filename, + } + } + + pub fn parse_file(&mut self) -> Result { + let mut nodes = Vec::new(); + self.scope.push(Scope::File); + + while let Some(result) = self.lexer.next() { + let token = result?; + let node = match token.kind { + token::Kind::ModuleKeyword => self.parse_module(), + token::Kind::SetKeyword => self.parse_set_expr(), + _ => self.syntax_error(format!("Unexpected token {}", token.kind), &token), + }?; + nodes.push(node); + } + + Ok(tree::Node::File { + name: self.filename.clone(), + content: nodes, + }) + } + + fn parse_module(&mut self) -> Result { + self.scope.push(Scope::Module); + let name_token = self.lexer.expect_kind(token::Kind::Ident)?; + self.lexer.expect_kind(token::Kind::OBrace)?; + + let mut children = Vec::new(); + + while let Some(result) = self.lexer.next() { + let token = result?; + match token.kind { + token::Kind::DependKeyword => children.push(self.parse_depend_expr()?), + token::Kind::TypeKeyword => children.push(self.parse_type_expr()?), + token::Kind::SourceKeyword => children.push(self.parse_source_expr()?), + token::Kind::SetKeyword => children.push(self.parse_set_expr()?), + token::Kind::CBrace => break, + _ => { + return self.syntax_error(format!("Unexpected token \"{}\"", token.raw), &token) + } + } + } + + self.scope.pop(); + Ok(tree::Node::Module { + name: Box::new(tree::Node::Ident(name_token.raw)), + content: children, + }) + } + + fn parse_depend_expr(&mut self) -> Result { + self.scope.push(Scope::DepList); + let rvalue = self.parse_rvalue(&[token::Kind::Semi])?; + self.scope.pop(); + Ok(tree::Node::DepList(Box::new(rvalue))) + } + + fn parse_set_expr(&mut self) -> Result { + let lvalue = self.parse_lvalue(&[token::Kind::Eq])?; + let rvalue = self.parse_rvalue(&[token::Kind::Semi])?; + Ok(tree::Node::SetExpr { + name: Box::new(lvalue), + val: Box::new(rvalue), + }) + } + + fn parse_type_expr(&mut self) -> Result { + let rvalue = self.parse_rvalue(&[token::Kind::Semi])?; + Ok(tree::Node::TypeExpr(Box::new(rvalue))) + } + + fn parse_source_expr(&mut self) -> Result { + self.scope.push(Scope::SourceList); + let rvalue = self.parse_rvalue(&[token::Kind::Semi])?; + self.scope.pop(); + Ok(tree::Node::SourceList(Box::new(rvalue))) + } + + fn parse_array(&mut self) -> Result { + let mut elements = Vec::new(); + while let Some(result) = self.lexer.peek() { + if result?.kind == token::Kind::CBracket { + self.lexer.next(); + break; + } else { + elements.push(self.parse_rvalue(&[token::Kind::Comma, token::Kind::CBracket])?); + } + } + Ok(tree::Node::Array(elements)) + } + + fn parse_lvalue(&mut self, terminators: &[token::Kind]) -> Result { + // this will be expanded when we have array accesses and similar stuff + let ident_token = self.lexer.expect_kind(token::Kind::Ident)?; + self.lexer.expect_kinds(terminators)?; + Ok(tree::Node::Ident(ident_token.raw)) + } + + fn parse_rvalue(&mut self, terminators: &[token::Kind]) -> Result { + // this will also be expanded to support more complex expressions, naturally + let token = self.lexer.require_next()?; + let node = match token.kind { + token::Kind::Ident => Ok(tree::Node::Ident(token.raw)), + token::Kind::IntLiteral => Ok(tree::Node::Int(token.raw.parse().unwrap())), + token::Kind::StringLiteral => Ok(tree::Node::String(token.raw)), + token::Kind::OBracket => self.parse_array(), + k => self.syntax_error(format!("Unexpected {}", k), &token), + }?; + self.lexer.expect_kinds(terminators)?; + Ok(node) + } + + fn syntax_error(&self, msg: String, token: &Token) -> Result { + Err(Error::syntax_error(token.pos.clone(), msg)) + } +} diff --git a/src/ast/tree.rs b/src/ast/tree.rs new file mode 100644 index 0000000..06c965e --- /dev/null +++ b/src/ast/tree.rs @@ -0,0 +1,187 @@ +use std::fmt; +use std::fmt::Formatter; + +use crate::error::Error; + +use crate::lex::token; +use crate::lex::token::Token; + +#[derive(Debug)] +pub enum Node { + Ident(String), + DepList(Box), + SourceList(Box), + Int(i128), + String(String), + Array(Vec), + UnaryExpr { + op: Operator, + node: Box, + }, + BinaryExpr { + op: Operator, + lhs: Box, + rhs: Box, + }, + TypeExpr(Box), + SetExpr { + name: Box, + val: Box, + }, + Module { + name: Box, + content: Vec, + }, + File { + name: String, + content: Vec, + }, +} + +#[derive(Debug)] +pub enum Operator { + Eq, + Plus, + Minus, + Asterisk, + Slash, + Percent, +} + +impl Node { + pub fn walk(&self, cb: fn(node: &Node, depth: u32)) { + self.visit(cb, 0); + } + + fn visit(&self, cb: fn(node: &Node, depth: u32), current_depth: u32) { + cb(self, current_depth); + let depth = current_depth + 1; + match self { + Node::DepList(list) => list.visit(cb, depth), + Node::SourceList(list) => list.visit(cb, depth), + Node::Array(elements) => { + for node in elements { + node.visit(cb, depth); + } + } + Node::UnaryExpr { op, node } => node.visit(cb, depth), + Node::BinaryExpr { op, lhs, rhs } => { + lhs.visit(cb, depth); + rhs.visit(cb, depth); + } + Node::TypeExpr(node) => node.visit(cb, depth), + Node::SetExpr { name, val } => { + name.visit(cb, depth); + val.visit(cb, depth); + } + Node::Module { name, content } => { + name.visit(cb, depth); + for n in content { + n.visit(cb, depth); + } + } + Node::File { name, content } => { + for n in content { + n.visit(cb, depth); + } + } + _ => return, + } + } +} + +impl fmt::Display for Node { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let mut tmp: String; + write!( + f, + "{}", + match self { + Node::Ident(name) => name.as_str(), + Node::Int(i) => { + tmp = format!("{}", i); + tmp.as_str() + } + Node::String(s) => s.as_str(), + Node::DepList(_) => "depend", + Node::SourceList(_) => "source", + Node::Array(_) => "", + Node::UnaryExpr { op, node } => op.raw(), + Node::BinaryExpr { op, lhs, rhs } => op.raw(), + Node::TypeExpr(_) => "type", + Node::SetExpr { name, val } => "set", + Node::Module { name, content } => "module", + Node::File { name, content } => "file", + } + ) + } +} + +impl Operator { + pub fn from_token(token: &Token) -> Result { + match token.kind { + token::Kind::Eq => Ok(Operator::Eq), + token::Kind::Plus => Ok(Operator::Plus), + token::Kind::Minus => Ok(Operator::Minus), + token::Kind::Asterisk => Ok(Operator::Asterisk), + token::Kind::Slash => Ok(Operator::Slash), + token::Kind::Percent => Ok(Operator::Percent), + _ => Err(Error::syntax_error( + token.pos.clone(), + format!("\"{}\" is not an operator", token.raw), + )), + } + } + + pub fn raw(&self) -> &'static str { + match self { + Operator::Eq => "=", + Operator::Plus => "+", + Operator::Minus => "-", + Operator::Asterisk => "*", + Operator::Slash => "/", + Operator::Percent => "%", + } + } +} + +impl fmt::Display for Operator { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.raw()) + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Type { + /// For identifier tokens (evaluates at runtime) + Unknown, + /// For expressions that don't emit a value + None, + Int, + String, +} + +impl Type { + pub fn from_token(token: &Token) -> Option { + match token.kind { + token::Kind::IntLiteral => Some(Type::Int), + token::Kind::StringLiteral => Some(Type::String), + _ => None, + } + } +} + +impl fmt::Display for Type { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}", + match self { + Type::Unknown => "", + Type::None => "()", + Type::Int => "int", + Type::String => "string", + } + ) + } +} diff --git a/src/main.rs b/src/main.rs index fd3686d..3c49fd4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,14 +1,13 @@ -use std::fs; - +mod ast; +mod error; mod lex; -use lex::Lexer; -mod error; +use ast::parse; fn main() { - let s = fs::read_to_string("test.gaybuild").unwrap(); - let lexer = Lexer::new(String::from("test.gaybuild"), s); - for token in lexer { - println!("{}", token.unwrap()); + let result = parse(String::from("test.gaybuild")).unwrap(); + match result { + Ok(tree) => tree.walk(|n, d| println!("{}{}", " ".repeat(d as usize), n)), + Err(e) => println!("{:?}", e), } }