Skip to content

Instantly share code, notes, and snippets.

@alphaKAI
Last active March 22, 2020 14:24
Show Gist options
  • Save alphaKAI/0071f1379089d3d82938286e7331c508 to your computer and use it in GitHub Desktop.
Save alphaKAI/0071f1379089d3d82938286e7331c508 to your computer and use it in GitHub Desktop.
S-Expression Parser in Rust
#[derive(Debug)]
pub enum SexpObject {
Float(f64),
Bool(bool),
String(String),
Symbol(String),
List(Vec<SexpObject>),
}
impl SexpObject {
fn equal(lhs: &SexpObject, rhs: &SexpObject) -> Result<bool, &'static str> {
match (lhs, rhs) {
(SexpObject::Float(l), SexpObject::Float(r)) => Ok(l == r),
(SexpObject::Bool(l), SexpObject::Bool(r)) => Ok(l == r),
(SexpObject::String(l), SexpObject::String(r)) => Ok(l == r),
(SexpObject::Symbol(l), SexpObject::Symbol(r)) => Ok(l == r),
(SexpObject::List(l), SexpObject::List(r)) => {
if l.len() == r.len() {
for i in 0..l.len() {
let lv = &l[i];
let rv = &r[i];
if let Err(_) = SexpObject::equal(&lv, &rv) {
return Ok(false);
}
}
Ok(true)
} else {
Ok(false)
}
}
_ => Err("Type mismatch"),
}
}
}
fn next_bracket(code: &str, left_offset: usize) -> usize {
let mut index = 0;
let mut left_count = left_offset;
let mut right_count = 0;
let mut code = code.chars();
while left_count != right_count {
let c = code.next().unwrap();
match c {
'(' => left_count += 1,
')' => right_count += 1,
_ => {}
}
index += 1;
}
return index;
}
#[derive(Debug)]
struct ParseResult {
parse_result: Option<SexpObject>,
read_len: usize,
}
impl ParseResult {
fn new(obj: SexpObject, read_len: usize) -> Self {
ParseResult {
parse_result: Some(obj),
read_len: read_len,
}
}
fn new_len_only(read_len: usize) -> Self {
ParseResult {
parse_result: None,
read_len: read_len,
}
}
}
fn parse_list(code: &str) -> ParseResult {
let mut list = vec![];
let mut i = 1;
let next_bracket_idx = next_bracket(&code[1..], 1);
let mut j = 0;
let contents = &code[1..next_bracket_idx];
while j < contents.len() {
let tmp_result = sexp_parse_expr(&contents[j..]);
if tmp_result.parse_result.is_some() {
list.push(tmp_result.parse_result.unwrap());
}
j += tmp_result.read_len;
}
i += j;
i += 1; // skip final ')'
ParseResult::new(SexpObject::List(list), i)
}
fn skip_line(code: &str) -> ParseResult {
let mut code_chars = code.chars();
let mut i = 0;
while i < code.len() && code_chars.next().unwrap() != '\n' {
i += 1;
}
ParseResult::new_len_only(i)
}
fn parse_number(code: &str) -> ParseResult {
let mut i = 0;
if code.chars().nth(0).unwrap() == '-' {
i += 1;
}
while i < code.len()
&& (code.chars().nth(i).unwrap().is_ascii_digit()
|| (code.chars().nth(i).unwrap() == '.'
&& i + 1 < code.len()
&& code.chars().nth(i + 1).unwrap().is_ascii_digit()))
{
i += 1;
}
let tmp_val = code[0..i].parse().unwrap();
ParseResult::new(SexpObject::Float(tmp_val), i)
}
fn parse_symbol(code: &str) -> ParseResult {
let mut i = 0;
let symbol_chars: String = String::from("~!@#$%^&*-_=+:/?<>");
while i < code.len()
&& (code.chars().nth(i).unwrap().is_ascii_alphabetic()
|| symbol_chars.find(code.chars().nth(i).unwrap()).is_some())
{
i += 1;
}
let tmp_str = code[0..i].to_string();
ParseResult::new(SexpObject::Symbol(tmp_str), i)
}
fn parse_string(code: &str) -> ParseResult {
let str_len = code.len();
let mut i = 1;
let mut code_chars = code.chars();
let _ = code_chars.next();
while i < str_len {
let c = code_chars.next().unwrap();
if c == '\"' {
break;
}
i += 1;
}
ParseResult::new(SexpObject::String((code[1..i]).to_string()), i + 1)
}
fn parse_quote(code: &str) -> ParseResult {
let expr_ret = sexp_parse_expr(&code[1..]);
ParseResult::new(expr_ret.parse_result.unwrap(), 1 + expr_ret.read_len)
}
fn sexp_parse_expr(code: &str) -> ParseResult {
let mut i = 0;
let symbol_chars: String = String::from("~!@#$%^&*-_=+:/?<>");
while i < code.len() {
let c = code.chars().nth(i).unwrap();
if c == ' ' || c == '\n' || c == '\r' || c == '\t' {
i += 1;
continue;
}
if c == ';' {
i += skip_line(&code[i..]).read_len;
continue;
}
if c.is_ascii_digit() {
let result = parse_number(&code[i..]);
return ParseResult::new(result.parse_result.unwrap(), result.read_len + i);
}
if c == '-' && i + 1 < code.len() && code.chars().nth(i + 1).unwrap().is_ascii_digit() {
let result = parse_number(&code[i..]);
return ParseResult::new(result.parse_result.unwrap(), result.read_len + i);
}
if c.is_ascii_alphabetic() || symbol_chars.find(c).is_some() {
let result = parse_symbol(&code[i..]);
return ParseResult::new(result.parse_result.unwrap(), result.read_len + i);
}
if c == '\"' {
let result = parse_string(&code[i..]);
return ParseResult::new(result.parse_result.unwrap(), result.read_len + i);
}
if c == '(' {
let result = parse_list(&code[i..]);
return ParseResult::new(result.parse_result.unwrap(), result.read_len + i);
}
if c == '\'' {
let result = parse_quote(&code[i..]);
return ParseResult::new(result.parse_result.unwrap(), result.read_len + i);
}
}
panic!("should not reach here");
}
fn sexp_parse(code: &str) -> Vec<SexpObject> {
let mut ret = vec![];
let mut i = 0;
while i < code.len() {
let result = sexp_parse_expr(&code[i..]);
if result.parse_result.is_some() {
ret.push(result.parse_result.unwrap());
}
i += result.read_len;
}
ret
}
fn main() {
println!("Hello, world!");
let codes = vec![
"(\"ABC\")",
"(123)",
"(123 456 789 -123)",
"(println \"Hello, world\")",
];
for code in codes.iter() {
let r = sexp_parse_expr(code);
println!("{} -> : {:?}", code, r.parse_result);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment