ast: add abstract syntax tree parser
This is nowhere near finished, but it's a good start for now.main
parent
b0e2e405d7
commit
30273b7902
@ -0,0 +1,153 @@
|
|||||||
|
pub(crate) mod tree;
|
||||||
|
|
||||||
|
use crate::error::Error;
|
||||||
|
use crate::lex::token;
|
||||||
|
use crate::lex::token::Token;
|
||||||
|
use crate::lex::Lexer;
|
||||||
|
|
||||||
|
use std::fs;
|
||||||
|
use std::io;
|
||||||
|
|
||||||
|
enum Scope {
|
||||||
|
File,
|
||||||
|
Module,
|
||||||
|
DepList,
|
||||||
|
SourceList,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Parser {
|
||||||
|
lexer: Lexer,
|
||||||
|
scope: Vec<Scope>,
|
||||||
|
filename: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse(filename: String) -> io::Result<Result<tree::Node, Error>> {
|
||||||
|
let raw: String = fs::read_to_string(filename.clone())?;
|
||||||
|
let mut p = Parser::new(filename, raw);
|
||||||
|
Ok(p.parse_file())
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Parser {
|
||||||
|
pub fn new(filename: String, raw: String) -> Parser {
|
||||||
|
let lexer = Lexer::new(filename.clone(), raw);
|
||||||
|
Parser {
|
||||||
|
lexer,
|
||||||
|
scope: Vec::new(),
|
||||||
|
filename,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_file(&mut self) -> Result<tree::Node, Error> {
|
||||||
|
let mut nodes = Vec::new();
|
||||||
|
self.scope.push(Scope::File);
|
||||||
|
|
||||||
|
while let Some(result) = self.lexer.next() {
|
||||||
|
let token = result?;
|
||||||
|
let node = match token.kind {
|
||||||
|
token::Kind::ModuleKeyword => self.parse_module(),
|
||||||
|
token::Kind::SetKeyword => self.parse_set_expr(),
|
||||||
|
_ => self.syntax_error(format!("Unexpected token {}", token.kind), &token),
|
||||||
|
}?;
|
||||||
|
nodes.push(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(tree::Node::File {
|
||||||
|
name: self.filename.clone(),
|
||||||
|
content: nodes,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_module(&mut self) -> Result<tree::Node, Error> {
|
||||||
|
self.scope.push(Scope::Module);
|
||||||
|
let name_token = self.lexer.expect_kind(token::Kind::Ident)?;
|
||||||
|
self.lexer.expect_kind(token::Kind::OBrace)?;
|
||||||
|
|
||||||
|
let mut children = Vec::new();
|
||||||
|
|
||||||
|
while let Some(result) = self.lexer.next() {
|
||||||
|
let token = result?;
|
||||||
|
match token.kind {
|
||||||
|
token::Kind::DependKeyword => children.push(self.parse_depend_expr()?),
|
||||||
|
token::Kind::TypeKeyword => children.push(self.parse_type_expr()?),
|
||||||
|
token::Kind::SourceKeyword => children.push(self.parse_source_expr()?),
|
||||||
|
token::Kind::SetKeyword => children.push(self.parse_set_expr()?),
|
||||||
|
token::Kind::CBrace => break,
|
||||||
|
_ => {
|
||||||
|
return self.syntax_error(format!("Unexpected token \"{}\"", token.raw), &token)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.scope.pop();
|
||||||
|
Ok(tree::Node::Module {
|
||||||
|
name: Box::new(tree::Node::Ident(name_token.raw)),
|
||||||
|
content: children,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_depend_expr(&mut self) -> Result<tree::Node, Error> {
|
||||||
|
self.scope.push(Scope::DepList);
|
||||||
|
let rvalue = self.parse_rvalue(&[token::Kind::Semi])?;
|
||||||
|
self.scope.pop();
|
||||||
|
Ok(tree::Node::DepList(Box::new(rvalue)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_set_expr(&mut self) -> Result<tree::Node, Error> {
|
||||||
|
let lvalue = self.parse_lvalue(&[token::Kind::Eq])?;
|
||||||
|
let rvalue = self.parse_rvalue(&[token::Kind::Semi])?;
|
||||||
|
Ok(tree::Node::SetExpr {
|
||||||
|
name: Box::new(lvalue),
|
||||||
|
val: Box::new(rvalue),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_type_expr(&mut self) -> Result<tree::Node, Error> {
|
||||||
|
let rvalue = self.parse_rvalue(&[token::Kind::Semi])?;
|
||||||
|
Ok(tree::Node::TypeExpr(Box::new(rvalue)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_source_expr(&mut self) -> Result<tree::Node, Error> {
|
||||||
|
self.scope.push(Scope::SourceList);
|
||||||
|
let rvalue = self.parse_rvalue(&[token::Kind::Semi])?;
|
||||||
|
self.scope.pop();
|
||||||
|
Ok(tree::Node::SourceList(Box::new(rvalue)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_array(&mut self) -> Result<tree::Node, Error> {
|
||||||
|
let mut elements = Vec::new();
|
||||||
|
while let Some(result) = self.lexer.peek() {
|
||||||
|
if result?.kind == token::Kind::CBracket {
|
||||||
|
self.lexer.next();
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
elements.push(self.parse_rvalue(&[token::Kind::Comma, token::Kind::CBracket])?);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(tree::Node::Array(elements))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_lvalue(&mut self, terminators: &[token::Kind]) -> Result<tree::Node, Error> {
|
||||||
|
// this will be expanded when we have array accesses and similar stuff
|
||||||
|
let ident_token = self.lexer.expect_kind(token::Kind::Ident)?;
|
||||||
|
self.lexer.expect_kinds(terminators)?;
|
||||||
|
Ok(tree::Node::Ident(ident_token.raw))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_rvalue(&mut self, terminators: &[token::Kind]) -> Result<tree::Node, Error> {
|
||||||
|
// this will also be expanded to support more complex expressions, naturally
|
||||||
|
let token = self.lexer.require_next()?;
|
||||||
|
let node = match token.kind {
|
||||||
|
token::Kind::Ident => Ok(tree::Node::Ident(token.raw)),
|
||||||
|
token::Kind::IntLiteral => Ok(tree::Node::Int(token.raw.parse().unwrap())),
|
||||||
|
token::Kind::StringLiteral => Ok(tree::Node::String(token.raw)),
|
||||||
|
token::Kind::OBracket => self.parse_array(),
|
||||||
|
k => self.syntax_error(format!("Unexpected {}", k), &token),
|
||||||
|
}?;
|
||||||
|
self.lexer.expect_kinds(terminators)?;
|
||||||
|
Ok(node)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn syntax_error<T>(&self, msg: String, token: &Token) -> Result<T, Error> {
|
||||||
|
Err(Error::syntax_error(token.pos.clone(), msg))
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,187 @@
|
|||||||
|
use std::fmt;
|
||||||
|
use std::fmt::Formatter;
|
||||||
|
|
||||||
|
use crate::error::Error;
|
||||||
|
|
||||||
|
use crate::lex::token;
|
||||||
|
use crate::lex::token::Token;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum Node {
|
||||||
|
Ident(String),
|
||||||
|
DepList(Box<Node>),
|
||||||
|
SourceList(Box<Node>),
|
||||||
|
Int(i128),
|
||||||
|
String(String),
|
||||||
|
Array(Vec<Node>),
|
||||||
|
UnaryExpr {
|
||||||
|
op: Operator,
|
||||||
|
node: Box<Node>,
|
||||||
|
},
|
||||||
|
BinaryExpr {
|
||||||
|
op: Operator,
|
||||||
|
lhs: Box<Node>,
|
||||||
|
rhs: Box<Node>,
|
||||||
|
},
|
||||||
|
TypeExpr(Box<Node>),
|
||||||
|
SetExpr {
|
||||||
|
name: Box<Node>,
|
||||||
|
val: Box<Node>,
|
||||||
|
},
|
||||||
|
Module {
|
||||||
|
name: Box<Node>,
|
||||||
|
content: Vec<Node>,
|
||||||
|
},
|
||||||
|
File {
|
||||||
|
name: String,
|
||||||
|
content: Vec<Node>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum Operator {
|
||||||
|
Eq,
|
||||||
|
Plus,
|
||||||
|
Minus,
|
||||||
|
Asterisk,
|
||||||
|
Slash,
|
||||||
|
Percent,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Node {
|
||||||
|
pub fn walk(&self, cb: fn(node: &Node, depth: u32)) {
|
||||||
|
self.visit(cb, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn visit(&self, cb: fn(node: &Node, depth: u32), current_depth: u32) {
|
||||||
|
cb(self, current_depth);
|
||||||
|
let depth = current_depth + 1;
|
||||||
|
match self {
|
||||||
|
Node::DepList(list) => list.visit(cb, depth),
|
||||||
|
Node::SourceList(list) => list.visit(cb, depth),
|
||||||
|
Node::Array(elements) => {
|
||||||
|
for node in elements {
|
||||||
|
node.visit(cb, depth);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::UnaryExpr { op, node } => node.visit(cb, depth),
|
||||||
|
Node::BinaryExpr { op, lhs, rhs } => {
|
||||||
|
lhs.visit(cb, depth);
|
||||||
|
rhs.visit(cb, depth);
|
||||||
|
}
|
||||||
|
Node::TypeExpr(node) => node.visit(cb, depth),
|
||||||
|
Node::SetExpr { name, val } => {
|
||||||
|
name.visit(cb, depth);
|
||||||
|
val.visit(cb, depth);
|
||||||
|
}
|
||||||
|
Node::Module { name, content } => {
|
||||||
|
name.visit(cb, depth);
|
||||||
|
for n in content {
|
||||||
|
n.visit(cb, depth);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::File { name, content } => {
|
||||||
|
for n in content {
|
||||||
|
n.visit(cb, depth);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => return,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Node {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||||
|
let mut tmp: String;
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{}",
|
||||||
|
match self {
|
||||||
|
Node::Ident(name) => name.as_str(),
|
||||||
|
Node::Int(i) => {
|
||||||
|
tmp = format!("{}", i);
|
||||||
|
tmp.as_str()
|
||||||
|
}
|
||||||
|
Node::String(s) => s.as_str(),
|
||||||
|
Node::DepList(_) => "depend",
|
||||||
|
Node::SourceList(_) => "source",
|
||||||
|
Node::Array(_) => "<array>",
|
||||||
|
Node::UnaryExpr { op, node } => op.raw(),
|
||||||
|
Node::BinaryExpr { op, lhs, rhs } => op.raw(),
|
||||||
|
Node::TypeExpr(_) => "type",
|
||||||
|
Node::SetExpr { name, val } => "set",
|
||||||
|
Node::Module { name, content } => "module",
|
||||||
|
Node::File { name, content } => "file",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Operator {
|
||||||
|
pub fn from_token(token: &Token) -> Result<Operator, Error> {
|
||||||
|
match token.kind {
|
||||||
|
token::Kind::Eq => Ok(Operator::Eq),
|
||||||
|
token::Kind::Plus => Ok(Operator::Plus),
|
||||||
|
token::Kind::Minus => Ok(Operator::Minus),
|
||||||
|
token::Kind::Asterisk => Ok(Operator::Asterisk),
|
||||||
|
token::Kind::Slash => Ok(Operator::Slash),
|
||||||
|
token::Kind::Percent => Ok(Operator::Percent),
|
||||||
|
_ => Err(Error::syntax_error(
|
||||||
|
token.pos.clone(),
|
||||||
|
format!("\"{}\" is not an operator", token.raw),
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn raw(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Operator::Eq => "=",
|
||||||
|
Operator::Plus => "+",
|
||||||
|
Operator::Minus => "-",
|
||||||
|
Operator::Asterisk => "*",
|
||||||
|
Operator::Slash => "/",
|
||||||
|
Operator::Percent => "%",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Operator {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||||
|
write!(f, "{}", self.raw())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||||
|
pub enum Type {
|
||||||
|
/// For identifier tokens (evaluates at runtime)
|
||||||
|
Unknown,
|
||||||
|
/// For expressions that don't emit a value
|
||||||
|
None,
|
||||||
|
Int,
|
||||||
|
String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Type {
|
||||||
|
pub fn from_token(token: &Token) -> Option<Type> {
|
||||||
|
match token.kind {
|
||||||
|
token::Kind::IntLiteral => Some(Type::Int),
|
||||||
|
token::Kind::StringLiteral => Some(Type::String),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Type {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{}",
|
||||||
|
match self {
|
||||||
|
Type::Unknown => "<unknown>",
|
||||||
|
Type::None => "()",
|
||||||
|
Type::Int => "int",
|
||||||
|
Type::String => "string",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
@ -1,14 +1,13 @@
|
|||||||
use std::fs;
|
mod ast;
|
||||||
|
mod error;
|
||||||
mod lex;
|
mod lex;
|
||||||
use lex::Lexer;
|
|
||||||
|
|
||||||
mod error;
|
use ast::parse;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let s = fs::read_to_string("test.gaybuild").unwrap();
|
let result = parse(String::from("test.gaybuild")).unwrap();
|
||||||
let lexer = Lexer::new(String::from("test.gaybuild"), s);
|
match result {
|
||||||
for token in lexer {
|
Ok(tree) => tree.walk(|n, d| println!("{}{}", " ".repeat(d as usize), n)),
|
||||||
println!("{}", token.unwrap());
|
Err(e) => println!("{:?}", e),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue