Last active
March 22, 2020 14:24
-
-
Save alphaKAI/0071f1379089d3d82938286e7331c508 to your computer and use it in GitHub Desktop.
S-Expression Parser in Rust
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[derive(Debug)] | |
pub enum SexpObject { | |
Float(f64), | |
Bool(bool), | |
String(String), | |
Symbol(String), | |
List(Vec<SexpObject>), | |
} | |
impl SexpObject { | |
fn equal(lhs: &SexpObject, rhs: &SexpObject) -> Result<bool, &'static str> { | |
match (lhs, rhs) { | |
(SexpObject::Float(l), SexpObject::Float(r)) => Ok(l == r), | |
(SexpObject::Bool(l), SexpObject::Bool(r)) => Ok(l == r), | |
(SexpObject::String(l), SexpObject::String(r)) => Ok(l == r), | |
(SexpObject::Symbol(l), SexpObject::Symbol(r)) => Ok(l == r), | |
(SexpObject::List(l), SexpObject::List(r)) => { | |
if l.len() == r.len() { | |
for i in 0..l.len() { | |
let lv = &l[i]; | |
let rv = &r[i]; | |
if let Err(_) = SexpObject::equal(&lv, &rv) { | |
return Ok(false); | |
} | |
} | |
Ok(true) | |
} else { | |
Ok(false) | |
} | |
} | |
_ => Err("Type mismatch"), | |
} | |
} | |
} | |
fn next_bracket(code: &str, left_offset: usize) -> usize { | |
let mut index = 0; | |
let mut left_count = left_offset; | |
let mut right_count = 0; | |
let mut code = code.chars(); | |
while left_count != right_count { | |
let c = code.next().unwrap(); | |
match c { | |
'(' => left_count += 1, | |
')' => right_count += 1, | |
_ => {} | |
} | |
index += 1; | |
} | |
return index; | |
} | |
#[derive(Debug)] | |
struct ParseResult { | |
parse_result: Option<SexpObject>, | |
read_len: usize, | |
} | |
impl ParseResult { | |
fn new(obj: SexpObject, read_len: usize) -> Self { | |
ParseResult { | |
parse_result: Some(obj), | |
read_len: read_len, | |
} | |
} | |
fn new_len_only(read_len: usize) -> Self { | |
ParseResult { | |
parse_result: None, | |
read_len: read_len, | |
} | |
} | |
} | |
fn parse_list(code: &str) -> ParseResult { | |
let mut list = vec![]; | |
let mut i = 1; | |
let next_bracket_idx = next_bracket(&code[1..], 1); | |
let mut j = 0; | |
let contents = &code[1..next_bracket_idx]; | |
while j < contents.len() { | |
let tmp_result = sexp_parse_expr(&contents[j..]); | |
if tmp_result.parse_result.is_some() { | |
list.push(tmp_result.parse_result.unwrap()); | |
} | |
j += tmp_result.read_len; | |
} | |
i += j; | |
i += 1; // skip final ')' | |
ParseResult::new(SexpObject::List(list), i) | |
} | |
fn skip_line(code: &str) -> ParseResult { | |
let mut code_chars = code.chars(); | |
let mut i = 0; | |
while i < code.len() && code_chars.next().unwrap() != '\n' { | |
i += 1; | |
} | |
ParseResult::new_len_only(i) | |
} | |
fn parse_number(code: &str) -> ParseResult { | |
let mut i = 0; | |
if code.chars().nth(0).unwrap() == '-' { | |
i += 1; | |
} | |
while i < code.len() | |
&& (code.chars().nth(i).unwrap().is_ascii_digit() | |
|| (code.chars().nth(i).unwrap() == '.' | |
&& i + 1 < code.len() | |
&& code.chars().nth(i + 1).unwrap().is_ascii_digit())) | |
{ | |
i += 1; | |
} | |
let tmp_val = code[0..i].parse().unwrap(); | |
ParseResult::new(SexpObject::Float(tmp_val), i) | |
} | |
fn parse_symbol(code: &str) -> ParseResult { | |
let mut i = 0; | |
let symbol_chars: String = String::from("~!@#$%^&*-_=+:/?<>"); | |
while i < code.len() | |
&& (code.chars().nth(i).unwrap().is_ascii_alphabetic() | |
|| symbol_chars.find(code.chars().nth(i).unwrap()).is_some()) | |
{ | |
i += 1; | |
} | |
let tmp_str = code[0..i].to_string(); | |
ParseResult::new(SexpObject::Symbol(tmp_str), i) | |
} | |
fn parse_string(code: &str) -> ParseResult { | |
let str_len = code.len(); | |
let mut i = 1; | |
let mut code_chars = code.chars(); | |
let _ = code_chars.next(); | |
while i < str_len { | |
let c = code_chars.next().unwrap(); | |
if c == '\"' { | |
break; | |
} | |
i += 1; | |
} | |
ParseResult::new(SexpObject::String((code[1..i]).to_string()), i + 1) | |
} | |
fn parse_quote(code: &str) -> ParseResult { | |
let expr_ret = sexp_parse_expr(&code[1..]); | |
ParseResult::new(expr_ret.parse_result.unwrap(), 1 + expr_ret.read_len) | |
} | |
fn sexp_parse_expr(code: &str) -> ParseResult { | |
let mut i = 0; | |
let symbol_chars: String = String::from("~!@#$%^&*-_=+:/?<>"); | |
while i < code.len() { | |
let c = code.chars().nth(i).unwrap(); | |
if c == ' ' || c == '\n' || c == '\r' || c == '\t' { | |
i += 1; | |
continue; | |
} | |
if c == ';' { | |
i += skip_line(&code[i..]).read_len; | |
continue; | |
} | |
if c.is_ascii_digit() { | |
let result = parse_number(&code[i..]); | |
return ParseResult::new(result.parse_result.unwrap(), result.read_len + i); | |
} | |
if c == '-' && i + 1 < code.len() && code.chars().nth(i + 1).unwrap().is_ascii_digit() { | |
let result = parse_number(&code[i..]); | |
return ParseResult::new(result.parse_result.unwrap(), result.read_len + i); | |
} | |
if c.is_ascii_alphabetic() || symbol_chars.find(c).is_some() { | |
let result = parse_symbol(&code[i..]); | |
return ParseResult::new(result.parse_result.unwrap(), result.read_len + i); | |
} | |
if c == '\"' { | |
let result = parse_string(&code[i..]); | |
return ParseResult::new(result.parse_result.unwrap(), result.read_len + i); | |
} | |
if c == '(' { | |
let result = parse_list(&code[i..]); | |
return ParseResult::new(result.parse_result.unwrap(), result.read_len + i); | |
} | |
if c == '\'' { | |
let result = parse_quote(&code[i..]); | |
return ParseResult::new(result.parse_result.unwrap(), result.read_len + i); | |
} | |
} | |
panic!("should not reach here"); | |
} | |
fn sexp_parse(code: &str) -> Vec<SexpObject> { | |
let mut ret = vec![]; | |
let mut i = 0; | |
while i < code.len() { | |
let result = sexp_parse_expr(&code[i..]); | |
if result.parse_result.is_some() { | |
ret.push(result.parse_result.unwrap()); | |
} | |
i += result.read_len; | |
} | |
ret | |
} | |
fn main() { | |
println!("Hello, world!"); | |
let codes = vec![ | |
"(\"ABC\")", | |
"(123)", | |
"(123 456 789 -123)", | |
"(println \"Hello, world\")", | |
]; | |
for code in codes.iter() { | |
let r = sexp_parse_expr(code); | |
println!("{} -> : {:?}", code, r.parse_result); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment