Created
January 6, 2020 16:31
-
-
Save Mr4k/56d84c987a0316172fcd598749ec3288 to your computer and use it in GitHub Desktop.
Toy Lisp Interpreter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::env; | |
const EMPTY_NODE_ID: usize = 0; | |
// an intermediate representation for symbol data during the parse step | |
#[derive(Clone, Debug, PartialEq)] | |
struct ParseSymbol { | |
string: String, | |
debug_program_position: usize, | |
} | |
#[derive(Clone, Debug, PartialEq)] | |
enum ASTIdOrSymbol { | |
ASTId(usize), | |
Symbol(Symbol), | |
} | |
#[derive(Clone, Debug, PartialEq)] | |
struct Symbol { | |
data: Primitives, | |
debug_program_position: usize, | |
} | |
#[derive(Clone, Debug, PartialEq)] | |
enum Primitives { | |
Integer { value: i32 }, | |
AddOp, | |
} | |
#[derive(Clone, Debug, PartialEq)] | |
struct AST { | |
// the AST does not need a debug_program_position because parenthesis are parsed away | |
// and therefore we cannot error at one during runtime | |
node_id: usize, | |
parent_node_id: usize, | |
args: Vec<ASTIdOrSymbol>, | |
} | |
impl AST { | |
pub fn new(node_id: usize, parent_node_id: usize) -> Self { | |
Self { | |
node_id, | |
parent_node_id, | |
args: Vec::new(), | |
} | |
} | |
} | |
fn parse_symbols_from_program_string(program_string: &str, parsed_symbols: &mut Vec<ParseSymbol>) { | |
let mut current_symbol_chars: Vec<char> = Vec::new(); | |
for (pos, chr) in program_string.chars().enumerate() { | |
match chr { | |
'(' | ')' => { | |
// flush any previous symbol | |
if current_symbol_chars.len() > 0 { | |
let symbol_string: String = current_symbol_chars.drain(..).collect(); | |
parsed_symbols.push(ParseSymbol { | |
debug_program_position: pos - symbol_string.len(), | |
string: symbol_string, | |
}); | |
} | |
// push the parenthesis as it's own symbol | |
parsed_symbols.push(ParseSymbol { | |
string: chr.to_string(), | |
debug_program_position: pos, | |
}); | |
} | |
chr if chr.is_whitespace() => { | |
// flush any previous symbol | |
if current_symbol_chars.len() > 0 { | |
let symbol_string: String = current_symbol_chars.drain(..).collect(); | |
parsed_symbols.push(ParseSymbol { | |
debug_program_position: pos - symbol_string.len(), | |
string: symbol_string, | |
}); | |
} | |
} | |
_ => { | |
current_symbol_chars.push(chr); | |
} | |
} | |
} | |
// flush any remaining symbols which have not been completed | |
// note this techinically shouldn't be needed since these programs should end with ')' | |
// but deciding if the program is valid is not the resposibility of this function | |
if current_symbol_chars.len() > 0 { | |
let symbol_string: String = current_symbol_chars.drain(..).collect(); | |
parsed_symbols.push(ParseSymbol { | |
debug_program_position: program_string.len() - symbol_string.len(), | |
string: symbol_string, | |
}); | |
} | |
} | |
fn construct_ast_from_parse_symbols(program_symbols: Vec<ParseSymbol>, program_ast: &mut Vec<AST>) { | |
let mut max_node_id = 0; | |
// index 0 is reserved for the EMPTY_NODE_ID | |
// this sets it aside although maybe not in the best way | |
let mut current_ast_id = 0; | |
program_ast.push(AST::new(EMPTY_NODE_ID, EMPTY_NODE_ID)); | |
for parse_symbol in program_symbols.iter() { | |
let current_tree = program_ast.get_mut(current_ast_id).unwrap(); | |
match &*parse_symbol.string { | |
"(" => { | |
let parent_node_id = current_ast_id; | |
max_node_id += 1; | |
let new_node_id = max_node_id; | |
current_tree.args.push(ASTIdOrSymbol::ASTId(new_node_id)); | |
program_ast.push(AST::new(new_node_id, parent_node_id)); | |
current_ast_id = max_node_id; | |
} | |
")" => { | |
// assert here that we have an open subtree to close | |
if current_ast_id == 0 { | |
panic!(format!( | |
"Parse Error: Invalid Close Parenthesis at position {}", | |
parse_symbol.debug_program_position | |
)); | |
} | |
current_ast_id = current_tree.parent_node_id; | |
} | |
// operatators and keywords | |
"+" => { | |
current_tree.args.push(ASTIdOrSymbol::Symbol(Symbol { | |
debug_program_position: parse_symbol.debug_program_position, | |
data: Primitives::AddOp, | |
})); | |
} | |
// numbers (currently only integers) | |
num if num.parse::<i32>().is_ok() => { | |
current_tree.args.push(ASTIdOrSymbol::Symbol(Symbol { | |
debug_program_position: parse_symbol.debug_program_position, | |
data: Primitives::Integer { | |
value: num.parse::<i32>().unwrap(), | |
}, | |
})); | |
} | |
unknown => panic!(format!( | |
"Parse Error: Invalid Symbol {} at position {}", | |
unknown, parse_symbol.debug_program_position | |
)), | |
} | |
} | |
if current_ast_id != 0 { | |
panic!("Parse Error: Error Parenthesis Left Open at end of program"); | |
} | |
} | |
fn main() { | |
let args: Vec<String> = env::args().collect(); | |
let program = match args.get(1) { | |
Some(arg) => arg, | |
None => "", | |
}; | |
if program == "" { | |
println!("Warning program is empty."); | |
} | |
let mut program_ast: Vec<AST> = Vec::new(); | |
{ | |
println!("Parsing Text..."); | |
let mut program_symbols: Vec<ParseSymbol> = Vec::new(); | |
parse_symbols_from_program_string(program, &mut program_symbols); | |
println!("Constructing AST..."); | |
construct_ast_from_parse_symbols(program_symbols, &mut program_ast); | |
} | |
println!("AST {:?}", program_ast); | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
#[test] | |
fn parse_symbols_from_program_string_handles_symbols_cutoffs_with_whitespace() { | |
let test_string = "aa bbc d2"; | |
let mut parse_symbols: Vec<ParseSymbol> = Vec::new(); | |
parse_symbols_from_program_string(test_string, &mut parse_symbols); | |
assert_eq!( | |
parse_symbols, | |
vec![ | |
ParseSymbol { | |
string: "aa".to_string(), | |
debug_program_position: 0 | |
}, | |
ParseSymbol { | |
string: "bbc".to_string(), | |
debug_program_position: 3 | |
}, | |
ParseSymbol { | |
string: "d2".to_string(), | |
debug_program_position: 9 | |
}, | |
] | |
); | |
} | |
#[test] | |
fn parse_symbols_from_program_string_handles_symbols_cutoffs_with_parenthesis() { | |
let test_string = "aa(bbc)d2"; | |
let mut parse_symbols: Vec<ParseSymbol> = Vec::new(); | |
parse_symbols_from_program_string(test_string, &mut parse_symbols); | |
assert_eq!( | |
parse_symbols, | |
vec![ | |
ParseSymbol { | |
string: "aa".to_string(), | |
debug_program_position: 0 | |
}, | |
ParseSymbol { | |
string: "(".to_string(), | |
debug_program_position: 2 | |
}, | |
ParseSymbol { | |
string: "bbc".to_string(), | |
debug_program_position: 3 | |
}, | |
ParseSymbol { | |
string: ")".to_string(), | |
debug_program_position: 6 | |
}, | |
ParseSymbol { | |
string: "d2".to_string(), | |
debug_program_position: 7 | |
}, | |
] | |
); | |
} | |
#[test] | |
fn parse_symbols_from_program_string_handles_repeated_parenthesis_correctly() { | |
let test_string = "(()))"; | |
let mut parse_symbols: Vec<ParseSymbol> = Vec::new(); | |
parse_symbols_from_program_string(test_string, &mut parse_symbols); | |
assert_eq!( | |
parse_symbols, | |
vec![ | |
ParseSymbol { | |
string: "(".to_string(), | |
debug_program_position: 0 | |
}, | |
ParseSymbol { | |
string: "(".to_string(), | |
debug_program_position: 1 | |
}, | |
ParseSymbol { | |
string: ")".to_string(), | |
debug_program_position: 2 | |
}, | |
ParseSymbol { | |
string: ")".to_string(), | |
debug_program_position: 3 | |
}, | |
ParseSymbol { | |
string: ")".to_string(), | |
debug_program_position: 4 | |
}, | |
] | |
); | |
} | |
#[test] | |
#[should_panic(expected = "Parse Error: Error Parenthesis Left Open at end of program")] | |
fn construct_ast_from_parse_symbols_errors_on_unmatched_open_parenthesis() { | |
let test_string = "((+ 1 2)"; | |
let mut program_ast: Vec<AST> = Vec::new(); | |
let mut parse_symbols: Vec<ParseSymbol> = Vec::new(); | |
parse_symbols_from_program_string(test_string, &mut parse_symbols); | |
construct_ast_from_parse_symbols(parse_symbols, &mut program_ast); | |
} | |
#[test] | |
#[should_panic(expected = "Parse Error: Invalid Close Parenthesis at position 11")] | |
fn construct_ast_from_parse_symbols_errors_on_unmatched_closed_parenthesis() { | |
let test_string = "((+ 1 2) 5)) (7))"; | |
let mut program_ast: Vec<AST> = Vec::new(); | |
let mut parse_symbols: Vec<ParseSymbol> = Vec::new(); | |
parse_symbols_from_program_string(test_string, &mut parse_symbols); | |
construct_ast_from_parse_symbols(parse_symbols, &mut program_ast); | |
} | |
#[test] | |
#[should_panic(expected = "Parse Error: Invalid Symbol $ at position 11")] | |
fn construct_ast_from_parse_symbols_errors_on_invalid_symbol() { | |
let test_string = "(+ (+ 1 2) $ 7)"; | |
let mut program_ast: Vec<AST> = Vec::new(); | |
let mut parse_symbols: Vec<ParseSymbol> = Vec::new(); | |
parse_symbols_from_program_string(test_string, &mut parse_symbols); | |
construct_ast_from_parse_symbols(parse_symbols, &mut program_ast); | |
} | |
#[test] | |
fn construct_ast_from_parse_symbols_simple_expression_correctly() { | |
let test_string = "(+ 1 2)"; | |
let mut program_ast: Vec<AST> = Vec::new(); | |
let mut parse_symbols: Vec<ParseSymbol> = Vec::new(); | |
parse_symbols_from_program_string(test_string, &mut parse_symbols); | |
construct_ast_from_parse_symbols(parse_symbols, &mut program_ast); | |
assert_eq!( | |
program_ast, | |
vec![ | |
AST { | |
node_id: 0, | |
parent_node_id: 0, | |
args: vec![ASTIdOrSymbol::ASTId(1)] | |
}, | |
AST { | |
node_id: 1, | |
parent_node_id: 0, | |
args: vec![ | |
ASTIdOrSymbol::Symbol(Symbol { | |
data: Primitives::AddOp, | |
debug_program_position: 1 | |
}), | |
ASTIdOrSymbol::Symbol(Symbol { | |
data: Primitives::Integer { value: 1 }, | |
debug_program_position: 3 | |
}), | |
ASTIdOrSymbol::Symbol(Symbol { | |
data: Primitives::Integer { value: 2 }, | |
debug_program_position: 5 | |
}) | |
] | |
} | |
] | |
); | |
} | |
#[test] | |
fn construct_ast_from_parse_symbols_works_with_simple_subtrees() { | |
let test_string = "(+ (+ 1 3) 2)"; | |
let mut program_ast: Vec<AST> = Vec::new(); | |
let mut parse_symbols: Vec<ParseSymbol> = Vec::new(); | |
parse_symbols_from_program_string(test_string, &mut parse_symbols); | |
construct_ast_from_parse_symbols(parse_symbols, &mut program_ast); | |
assert_eq!( | |
program_ast, | |
vec![ | |
AST { | |
node_id: 0, | |
parent_node_id: 0, | |
args: vec![ASTIdOrSymbol::ASTId(1)] | |
}, | |
AST { | |
node_id: 1, | |
parent_node_id: 0, | |
args: vec![ | |
ASTIdOrSymbol::Symbol(Symbol { | |
data: Primitives::AddOp, | |
debug_program_position: 1 | |
}), | |
ASTIdOrSymbol::ASTId(2), | |
ASTIdOrSymbol::Symbol(Symbol { | |
data: Primitives::Integer { value: 2 }, | |
debug_program_position: 11 | |
}) | |
] | |
}, | |
AST { | |
node_id: 2, | |
parent_node_id: 1, | |
args: vec![ | |
ASTIdOrSymbol::Symbol(Symbol { | |
data: Primitives::AddOp, | |
debug_program_position: 4 | |
}), | |
ASTIdOrSymbol::Symbol(Symbol { | |
data: Primitives::Integer { value: 1 }, | |
debug_program_position: 6 | |
}), | |
ASTIdOrSymbol::Symbol(Symbol { | |
data: Primitives::Integer { value: 3 }, | |
debug_program_position: 8 | |
}) | |
] | |
} | |
] | |
); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This program currently parses a simple lisp string and turns it into an AST
Example usage:
input:
toy_lisp (+ 1 (+ 2 3))
output:
It will also check for two simple kinds of malformed programs:
Imbalanced Parenthesis:
input:
toy_lisp (+ 1 (+ 2 3)
output:
input:
toy_lisp (+ 1 (+ 2 3)))
output:
Invalid Symbol:
input:
toy_lisp (+ 1 (u 2 3)))
output:
TODO
Actually interpret lisp