ast: add abstract syntax tree parser
This is nowhere near finished, but it's a good start for now.main
parent
b0e2e405d7
commit
30273b7902
@ -0,0 +1,153 @@
|
||||
pub(crate) mod tree;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::lex::token;
|
||||
use crate::lex::token::Token;
|
||||
use crate::lex::Lexer;
|
||||
|
||||
use std::fs;
|
||||
use std::io;
|
||||
|
||||
enum Scope {
|
||||
File,
|
||||
Module,
|
||||
DepList,
|
||||
SourceList,
|
||||
}
|
||||
|
||||
struct Parser {
|
||||
lexer: Lexer,
|
||||
scope: Vec<Scope>,
|
||||
filename: String,
|
||||
}
|
||||
|
||||
pub fn parse(filename: String) -> io::Result<Result<tree::Node, Error>> {
|
||||
let raw: String = fs::read_to_string(filename.clone())?;
|
||||
let mut p = Parser::new(filename, raw);
|
||||
Ok(p.parse_file())
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn new(filename: String, raw: String) -> Parser {
|
||||
let lexer = Lexer::new(filename.clone(), raw);
|
||||
Parser {
|
||||
lexer,
|
||||
scope: Vec::new(),
|
||||
filename,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_file(&mut self) -> Result<tree::Node, Error> {
|
||||
let mut nodes = Vec::new();
|
||||
self.scope.push(Scope::File);
|
||||
|
||||
while let Some(result) = self.lexer.next() {
|
||||
let token = result?;
|
||||
let node = match token.kind {
|
||||
token::Kind::ModuleKeyword => self.parse_module(),
|
||||
token::Kind::SetKeyword => self.parse_set_expr(),
|
||||
_ => self.syntax_error(format!("Unexpected token {}", token.kind), &token),
|
||||
}?;
|
||||
nodes.push(node);
|
||||
}
|
||||
|
||||
Ok(tree::Node::File {
|
||||
name: self.filename.clone(),
|
||||
content: nodes,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_module(&mut self) -> Result<tree::Node, Error> {
|
||||
self.scope.push(Scope::Module);
|
||||
let name_token = self.lexer.expect_kind(token::Kind::Ident)?;
|
||||
self.lexer.expect_kind(token::Kind::OBrace)?;
|
||||
|
||||
let mut children = Vec::new();
|
||||
|
||||
while let Some(result) = self.lexer.next() {
|
||||
let token = result?;
|
||||
match token.kind {
|
||||
token::Kind::DependKeyword => children.push(self.parse_depend_expr()?),
|
||||
token::Kind::TypeKeyword => children.push(self.parse_type_expr()?),
|
||||
token::Kind::SourceKeyword => children.push(self.parse_source_expr()?),
|
||||
token::Kind::SetKeyword => children.push(self.parse_set_expr()?),
|
||||
token::Kind::CBrace => break,
|
||||
_ => {
|
||||
return self.syntax_error(format!("Unexpected token \"{}\"", token.raw), &token)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.scope.pop();
|
||||
Ok(tree::Node::Module {
|
||||
name: Box::new(tree::Node::Ident(name_token.raw)),
|
||||
content: children,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_depend_expr(&mut self) -> Result<tree::Node, Error> {
|
||||
self.scope.push(Scope::DepList);
|
||||
let rvalue = self.parse_rvalue(&[token::Kind::Semi])?;
|
||||
self.scope.pop();
|
||||
Ok(tree::Node::DepList(Box::new(rvalue)))
|
||||
}
|
||||
|
||||
fn parse_set_expr(&mut self) -> Result<tree::Node, Error> {
|
||||
let lvalue = self.parse_lvalue(&[token::Kind::Eq])?;
|
||||
let rvalue = self.parse_rvalue(&[token::Kind::Semi])?;
|
||||
Ok(tree::Node::SetExpr {
|
||||
name: Box::new(lvalue),
|
||||
val: Box::new(rvalue),
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_type_expr(&mut self) -> Result<tree::Node, Error> {
|
||||
let rvalue = self.parse_rvalue(&[token::Kind::Semi])?;
|
||||
Ok(tree::Node::TypeExpr(Box::new(rvalue)))
|
||||
}
|
||||
|
||||
fn parse_source_expr(&mut self) -> Result<tree::Node, Error> {
|
||||
self.scope.push(Scope::SourceList);
|
||||
let rvalue = self.parse_rvalue(&[token::Kind::Semi])?;
|
||||
self.scope.pop();
|
||||
Ok(tree::Node::SourceList(Box::new(rvalue)))
|
||||
}
|
||||
|
||||
fn parse_array(&mut self) -> Result<tree::Node, Error> {
|
||||
let mut elements = Vec::new();
|
||||
while let Some(result) = self.lexer.peek() {
|
||||
if result?.kind == token::Kind::CBracket {
|
||||
self.lexer.next();
|
||||
break;
|
||||
} else {
|
||||
elements.push(self.parse_rvalue(&[token::Kind::Comma, token::Kind::CBracket])?);
|
||||
}
|
||||
}
|
||||
Ok(tree::Node::Array(elements))
|
||||
}
|
||||
|
||||
fn parse_lvalue(&mut self, terminators: &[token::Kind]) -> Result<tree::Node, Error> {
|
||||
// this will be expanded when we have array accesses and similar stuff
|
||||
let ident_token = self.lexer.expect_kind(token::Kind::Ident)?;
|
||||
self.lexer.expect_kinds(terminators)?;
|
||||
Ok(tree::Node::Ident(ident_token.raw))
|
||||
}
|
||||
|
||||
fn parse_rvalue(&mut self, terminators: &[token::Kind]) -> Result<tree::Node, Error> {
|
||||
// this will also be expanded to support more complex expressions, naturally
|
||||
let token = self.lexer.require_next()?;
|
||||
let node = match token.kind {
|
||||
token::Kind::Ident => Ok(tree::Node::Ident(token.raw)),
|
||||
token::Kind::IntLiteral => Ok(tree::Node::Int(token.raw.parse().unwrap())),
|
||||
token::Kind::StringLiteral => Ok(tree::Node::String(token.raw)),
|
||||
token::Kind::OBracket => self.parse_array(),
|
||||
k => self.syntax_error(format!("Unexpected {}", k), &token),
|
||||
}?;
|
||||
self.lexer.expect_kinds(terminators)?;
|
||||
Ok(node)
|
||||
}
|
||||
|
||||
fn syntax_error<T>(&self, msg: String, token: &Token) -> Result<T, Error> {
|
||||
Err(Error::syntax_error(token.pos.clone(), msg))
|
||||
}
|
||||
}
|
@ -0,0 +1,187 @@
|
||||
use std::fmt;
|
||||
use std::fmt::Formatter;
|
||||
|
||||
use crate::error::Error;
|
||||
|
||||
use crate::lex::token;
|
||||
use crate::lex::token::Token;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Node {
|
||||
Ident(String),
|
||||
DepList(Box<Node>),
|
||||
SourceList(Box<Node>),
|
||||
Int(i128),
|
||||
String(String),
|
||||
Array(Vec<Node>),
|
||||
UnaryExpr {
|
||||
op: Operator,
|
||||
node: Box<Node>,
|
||||
},
|
||||
BinaryExpr {
|
||||
op: Operator,
|
||||
lhs: Box<Node>,
|
||||
rhs: Box<Node>,
|
||||
},
|
||||
TypeExpr(Box<Node>),
|
||||
SetExpr {
|
||||
name: Box<Node>,
|
||||
val: Box<Node>,
|
||||
},
|
||||
Module {
|
||||
name: Box<Node>,
|
||||
content: Vec<Node>,
|
||||
},
|
||||
File {
|
||||
name: String,
|
||||
content: Vec<Node>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Operator {
|
||||
Eq,
|
||||
Plus,
|
||||
Minus,
|
||||
Asterisk,
|
||||
Slash,
|
||||
Percent,
|
||||
}
|
||||
|
||||
impl Node {
|
||||
pub fn walk(&self, cb: fn(node: &Node, depth: u32)) {
|
||||
self.visit(cb, 0);
|
||||
}
|
||||
|
||||
fn visit(&self, cb: fn(node: &Node, depth: u32), current_depth: u32) {
|
||||
cb(self, current_depth);
|
||||
let depth = current_depth + 1;
|
||||
match self {
|
||||
Node::DepList(list) => list.visit(cb, depth),
|
||||
Node::SourceList(list) => list.visit(cb, depth),
|
||||
Node::Array(elements) => {
|
||||
for node in elements {
|
||||
node.visit(cb, depth);
|
||||
}
|
||||
}
|
||||
Node::UnaryExpr { op, node } => node.visit(cb, depth),
|
||||
Node::BinaryExpr { op, lhs, rhs } => {
|
||||
lhs.visit(cb, depth);
|
||||
rhs.visit(cb, depth);
|
||||
}
|
||||
Node::TypeExpr(node) => node.visit(cb, depth),
|
||||
Node::SetExpr { name, val } => {
|
||||
name.visit(cb, depth);
|
||||
val.visit(cb, depth);
|
||||
}
|
||||
Node::Module { name, content } => {
|
||||
name.visit(cb, depth);
|
||||
for n in content {
|
||||
n.visit(cb, depth);
|
||||
}
|
||||
}
|
||||
Node::File { name, content } => {
|
||||
for n in content {
|
||||
n.visit(cb, depth);
|
||||
}
|
||||
}
|
||||
_ => return,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Node {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
let mut tmp: String;
|
||||
write!(
|
||||
f,
|
||||
"{}",
|
||||
match self {
|
||||
Node::Ident(name) => name.as_str(),
|
||||
Node::Int(i) => {
|
||||
tmp = format!("{}", i);
|
||||
tmp.as_str()
|
||||
}
|
||||
Node::String(s) => s.as_str(),
|
||||
Node::DepList(_) => "depend",
|
||||
Node::SourceList(_) => "source",
|
||||
Node::Array(_) => "<array>",
|
||||
Node::UnaryExpr { op, node } => op.raw(),
|
||||
Node::BinaryExpr { op, lhs, rhs } => op.raw(),
|
||||
Node::TypeExpr(_) => "type",
|
||||
Node::SetExpr { name, val } => "set",
|
||||
Node::Module { name, content } => "module",
|
||||
Node::File { name, content } => "file",
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl Operator {
|
||||
pub fn from_token(token: &Token) -> Result<Operator, Error> {
|
||||
match token.kind {
|
||||
token::Kind::Eq => Ok(Operator::Eq),
|
||||
token::Kind::Plus => Ok(Operator::Plus),
|
||||
token::Kind::Minus => Ok(Operator::Minus),
|
||||
token::Kind::Asterisk => Ok(Operator::Asterisk),
|
||||
token::Kind::Slash => Ok(Operator::Slash),
|
||||
token::Kind::Percent => Ok(Operator::Percent),
|
||||
_ => Err(Error::syntax_error(
|
||||
token.pos.clone(),
|
||||
format!("\"{}\" is not an operator", token.raw),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn raw(&self) -> &'static str {
|
||||
match self {
|
||||
Operator::Eq => "=",
|
||||
Operator::Plus => "+",
|
||||
Operator::Minus => "-",
|
||||
Operator::Asterisk => "*",
|
||||
Operator::Slash => "/",
|
||||
Operator::Percent => "%",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Operator {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}", self.raw())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum Type {
|
||||
/// For identifier tokens (evaluates at runtime)
|
||||
Unknown,
|
||||
/// For expressions that don't emit a value
|
||||
None,
|
||||
Int,
|
||||
String,
|
||||
}
|
||||
|
||||
impl Type {
|
||||
pub fn from_token(token: &Token) -> Option<Type> {
|
||||
match token.kind {
|
||||
token::Kind::IntLiteral => Some(Type::Int),
|
||||
token::Kind::StringLiteral => Some(Type::String),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Type {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}",
|
||||
match self {
|
||||
Type::Unknown => "<unknown>",
|
||||
Type::None => "()",
|
||||
Type::Int => "int",
|
||||
Type::String => "string",
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
@ -1,14 +1,13 @@
|
||||
use std::fs;
|
||||
|
||||
mod ast;
|
||||
mod error;
|
||||
mod lex;
|
||||
use lex::Lexer;
|
||||
|
||||
mod error;
|
||||
use ast::parse;
|
||||
|
||||
fn main() {
|
||||
let s = fs::read_to_string("test.gaybuild").unwrap();
|
||||
let lexer = Lexer::new(String::from("test.gaybuild"), s);
|
||||
for token in lexer {
|
||||
println!("{}", token.unwrap());
|
||||
let result = parse(String::from("test.gaybuild")).unwrap();
|
||||
match result {
|
||||
Ok(tree) => tree.walk(|n, d| println!("{}{}", " ".repeat(d as usize), n)),
|
||||
Err(e) => println!("{:?}", e),
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue