ast: add abstract syntax tree parser

This is nowhere near finished, but it's a good
start for now.
main
anna 2 years ago
parent b0e2e405d7
commit 30273b7902
Signed by: fef
GPG Key ID: EC22E476DC2D3D84

@ -0,0 +1,153 @@
pub(crate) mod tree;
use crate::error::Error;
use crate::lex::token;
use crate::lex::token::Token;
use crate::lex::Lexer;
use std::fs;
use std::io;
enum Scope {
File,
Module,
DepList,
SourceList,
}
struct Parser {
lexer: Lexer,
scope: Vec<Scope>,
filename: String,
}
pub fn parse(filename: String) -> io::Result<Result<tree::Node, Error>> {
let raw: String = fs::read_to_string(filename.clone())?;
let mut p = Parser::new(filename, raw);
Ok(p.parse_file())
}
impl Parser {
pub fn new(filename: String, raw: String) -> Parser {
let lexer = Lexer::new(filename.clone(), raw);
Parser {
lexer,
scope: Vec::new(),
filename,
}
}
pub fn parse_file(&mut self) -> Result<tree::Node, Error> {
let mut nodes = Vec::new();
self.scope.push(Scope::File);
while let Some(result) = self.lexer.next() {
let token = result?;
let node = match token.kind {
token::Kind::ModuleKeyword => self.parse_module(),
token::Kind::SetKeyword => self.parse_set_expr(),
_ => self.syntax_error(format!("Unexpected token {}", token.kind), &token),
}?;
nodes.push(node);
}
Ok(tree::Node::File {
name: self.filename.clone(),
content: nodes,
})
}
fn parse_module(&mut self) -> Result<tree::Node, Error> {
self.scope.push(Scope::Module);
let name_token = self.lexer.expect_kind(token::Kind::Ident)?;
self.lexer.expect_kind(token::Kind::OBrace)?;
let mut children = Vec::new();
while let Some(result) = self.lexer.next() {
let token = result?;
match token.kind {
token::Kind::DependKeyword => children.push(self.parse_depend_expr()?),
token::Kind::TypeKeyword => children.push(self.parse_type_expr()?),
token::Kind::SourceKeyword => children.push(self.parse_source_expr()?),
token::Kind::SetKeyword => children.push(self.parse_set_expr()?),
token::Kind::CBrace => break,
_ => {
return self.syntax_error(format!("Unexpected token \"{}\"", token.raw), &token)
}
}
}
self.scope.pop();
Ok(tree::Node::Module {
name: Box::new(tree::Node::Ident(name_token.raw)),
content: children,
})
}
fn parse_depend_expr(&mut self) -> Result<tree::Node, Error> {
self.scope.push(Scope::DepList);
let rvalue = self.parse_rvalue(&[token::Kind::Semi])?;
self.scope.pop();
Ok(tree::Node::DepList(Box::new(rvalue)))
}
fn parse_set_expr(&mut self) -> Result<tree::Node, Error> {
let lvalue = self.parse_lvalue(&[token::Kind::Eq])?;
let rvalue = self.parse_rvalue(&[token::Kind::Semi])?;
Ok(tree::Node::SetExpr {
name: Box::new(lvalue),
val: Box::new(rvalue),
})
}
fn parse_type_expr(&mut self) -> Result<tree::Node, Error> {
let rvalue = self.parse_rvalue(&[token::Kind::Semi])?;
Ok(tree::Node::TypeExpr(Box::new(rvalue)))
}
fn parse_source_expr(&mut self) -> Result<tree::Node, Error> {
self.scope.push(Scope::SourceList);
let rvalue = self.parse_rvalue(&[token::Kind::Semi])?;
self.scope.pop();
Ok(tree::Node::SourceList(Box::new(rvalue)))
}
fn parse_array(&mut self) -> Result<tree::Node, Error> {
let mut elements = Vec::new();
while let Some(result) = self.lexer.peek() {
if result?.kind == token::Kind::CBracket {
self.lexer.next();
break;
} else {
elements.push(self.parse_rvalue(&[token::Kind::Comma, token::Kind::CBracket])?);
}
}
Ok(tree::Node::Array(elements))
}
fn parse_lvalue(&mut self, terminators: &[token::Kind]) -> Result<tree::Node, Error> {
// this will be expanded when we have array accesses and similar stuff
let ident_token = self.lexer.expect_kind(token::Kind::Ident)?;
self.lexer.expect_kinds(terminators)?;
Ok(tree::Node::Ident(ident_token.raw))
}
fn parse_rvalue(&mut self, terminators: &[token::Kind]) -> Result<tree::Node, Error> {
// this will also be expanded to support more complex expressions, naturally
let token = self.lexer.require_next()?;
let node = match token.kind {
token::Kind::Ident => Ok(tree::Node::Ident(token.raw)),
token::Kind::IntLiteral => Ok(tree::Node::Int(token.raw.parse().unwrap())),
token::Kind::StringLiteral => Ok(tree::Node::String(token.raw)),
token::Kind::OBracket => self.parse_array(),
k => self.syntax_error(format!("Unexpected {}", k), &token),
}?;
self.lexer.expect_kinds(terminators)?;
Ok(node)
}
fn syntax_error<T>(&self, msg: String, token: &Token) -> Result<T, Error> {
Err(Error::syntax_error(token.pos.clone(), msg))
}
}

@ -0,0 +1,187 @@
use std::fmt;
use std::fmt::Formatter;
use crate::error::Error;
use crate::lex::token;
use crate::lex::token::Token;
#[derive(Debug)]
pub enum Node {
Ident(String),
DepList(Box<Node>),
SourceList(Box<Node>),
Int(i128),
String(String),
Array(Vec<Node>),
UnaryExpr {
op: Operator,
node: Box<Node>,
},
BinaryExpr {
op: Operator,
lhs: Box<Node>,
rhs: Box<Node>,
},
TypeExpr(Box<Node>),
SetExpr {
name: Box<Node>,
val: Box<Node>,
},
Module {
name: Box<Node>,
content: Vec<Node>,
},
File {
name: String,
content: Vec<Node>,
},
}
#[derive(Debug)]
pub enum Operator {
Eq,
Plus,
Minus,
Asterisk,
Slash,
Percent,
}
impl Node {
pub fn walk(&self, cb: fn(node: &Node, depth: u32)) {
self.visit(cb, 0);
}
fn visit(&self, cb: fn(node: &Node, depth: u32), current_depth: u32) {
cb(self, current_depth);
let depth = current_depth + 1;
match self {
Node::DepList(list) => list.visit(cb, depth),
Node::SourceList(list) => list.visit(cb, depth),
Node::Array(elements) => {
for node in elements {
node.visit(cb, depth);
}
}
Node::UnaryExpr { op, node } => node.visit(cb, depth),
Node::BinaryExpr { op, lhs, rhs } => {
lhs.visit(cb, depth);
rhs.visit(cb, depth);
}
Node::TypeExpr(node) => node.visit(cb, depth),
Node::SetExpr { name, val } => {
name.visit(cb, depth);
val.visit(cb, depth);
}
Node::Module { name, content } => {
name.visit(cb, depth);
for n in content {
n.visit(cb, depth);
}
}
Node::File { name, content } => {
for n in content {
n.visit(cb, depth);
}
}
_ => return,
}
}
}
impl fmt::Display for Node {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
let mut tmp: String;
write!(
f,
"{}",
match self {
Node::Ident(name) => name.as_str(),
Node::Int(i) => {
tmp = format!("{}", i);
tmp.as_str()
}
Node::String(s) => s.as_str(),
Node::DepList(_) => "depend",
Node::SourceList(_) => "source",
Node::Array(_) => "<array>",
Node::UnaryExpr { op, node } => op.raw(),
Node::BinaryExpr { op, lhs, rhs } => op.raw(),
Node::TypeExpr(_) => "type",
Node::SetExpr { name, val } => "set",
Node::Module { name, content } => "module",
Node::File { name, content } => "file",
}
)
}
}
impl Operator {
pub fn from_token(token: &Token) -> Result<Operator, Error> {
match token.kind {
token::Kind::Eq => Ok(Operator::Eq),
token::Kind::Plus => Ok(Operator::Plus),
token::Kind::Minus => Ok(Operator::Minus),
token::Kind::Asterisk => Ok(Operator::Asterisk),
token::Kind::Slash => Ok(Operator::Slash),
token::Kind::Percent => Ok(Operator::Percent),
_ => Err(Error::syntax_error(
token.pos.clone(),
format!("\"{}\" is not an operator", token.raw),
)),
}
}
pub fn raw(&self) -> &'static str {
match self {
Operator::Eq => "=",
Operator::Plus => "+",
Operator::Minus => "-",
Operator::Asterisk => "*",
Operator::Slash => "/",
Operator::Percent => "%",
}
}
}
impl fmt::Display for Operator {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.raw())
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Type {
/// For identifier tokens (evaluates at runtime)
Unknown,
/// For expressions that don't emit a value
None,
Int,
String,
}
impl Type {
pub fn from_token(token: &Token) -> Option<Type> {
match token.kind {
token::Kind::IntLiteral => Some(Type::Int),
token::Kind::StringLiteral => Some(Type::String),
_ => None,
}
}
}
impl fmt::Display for Type {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"{}",
match self {
Type::Unknown => "<unknown>",
Type::None => "()",
Type::Int => "int",
Type::String => "string",
}
)
}
}

@ -1,14 +1,13 @@
use std::fs;
mod ast;
mod error;
mod lex;
use lex::Lexer;
mod error;
use ast::parse;
fn main() {
let s = fs::read_to_string("test.gaybuild").unwrap();
let lexer = Lexer::new(String::from("test.gaybuild"), s);
for token in lexer {
println!("{}", token.unwrap());
let result = parse(String::from("test.gaybuild")).unwrap();
match result {
Ok(tree) => tree.walk(|n, d| println!("{}{}", " ".repeat(d as usize), n)),
Err(e) => println!("{:?}", e),
}
}

Loading…
Cancel
Save