Last active
April 7, 2021 22:32
-
-
Save arthurbacci/34ef27b6bd6dd2edbd4d3cd90ab2ceed to your computer and use it in GitHub Desktop.
S-expr parser in Rust
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::io::{self, Write}; | |
fn main() { | |
loop { | |
print!("repl>"); | |
io::stdout().flush().expect("Could not flush stdout"); | |
let mut line = String::new(); | |
io::stdin().read_line(&mut line).expect("Could not read from stdin"); | |
let mut lexed = lexer::lex(&line).unwrap(); | |
println!("{:?}", parser::parse(&mut lexed)); | |
} | |
} | |
mod lexer { | |
#[derive(Debug)] | |
#[derive(PartialEq)] | |
#[derive(Clone)] | |
pub enum Lexed { | |
Number(f64), | |
String(String), | |
Identifier(String), | |
Open, | |
Close, | |
} | |
pub fn lex(data: &str) -> Option<Vec<Lexed>> { | |
let data = data.replace("(", " ( ").replace(")", " ) ").trim().to_string(); | |
if data.is_empty() { | |
return None; | |
} | |
let mut vec = Vec::new(); | |
let mut pos: usize = 0; | |
let mut whitespace = false; | |
let mut in_string = false; | |
let mut backslash = false; | |
for (i, el) in data.char_indices() { | |
if el.is_whitespace() { | |
whitespace = true; | |
} else if whitespace { | |
if !in_string { | |
vec.push(data[pos..i].trim().to_string()); | |
pos = i; | |
whitespace = false; | |
} | |
} | |
if el == '"' && !backslash { | |
in_string = !in_string; | |
whitespace = true; | |
} | |
if !backslash && el == '\\' { | |
backslash = true; | |
} else { | |
backslash = false; | |
} | |
} | |
vec.push(data[pos..data.len()].trim().to_string()); | |
let mut r = Vec::new(); | |
for i in vec { | |
r.push(match i.chars().next().unwrap() { | |
'(' => Lexed::Open, | |
')' => Lexed::Close, | |
'"' => Lexed::String(parse_string(&i)), | |
_ => { | |
if let Ok(ps) = i.parse() { | |
Lexed::Number(ps) | |
} else { | |
Lexed::Identifier(i) | |
} | |
} | |
}); | |
} | |
Some(r) | |
} | |
fn parse_string(data: &str) -> String { | |
let data = data[1..data.len() - 1].to_string(); | |
let mut string = String::new(); | |
let mut backslash = false; | |
for i in data.chars() { | |
if i != '\\' || backslash { | |
string.push(i); | |
} | |
if !backslash && i == '\\' { | |
backslash = true; | |
} else { | |
backslash = false; | |
} | |
} | |
string | |
} | |
} | |
mod parser { | |
use crate::lexer; | |
#[derive(Debug)] | |
pub enum Parsed { | |
Atom(lexer::Lexed), | |
List(Vec<Parsed>), | |
} | |
pub fn parse(data: &mut Vec<lexer::Lexed>) -> Option<Parsed> { | |
match data.remove(0) { | |
lexer::Lexed::Open => { | |
let mut ts = Vec::new(); | |
while data[0] != lexer::Lexed::Close { | |
ts.push(parse(data).unwrap()); | |
} | |
data.remove(0); | |
Some(Parsed::List(ts)) | |
} | |
lexer::Lexed::Close => { | |
println!("Unexpected ')'"); | |
None | |
} | |
a => { | |
Some(Parsed::Atom(a.clone())) | |
} | |
} | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment