Created
November 3, 2015 11:05
-
-
Save goyox86/71fdfa48f6bf80fdfd80 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![feature(str_char)] | |
use std::io::prelude::*; | |
use std::io::BufReader; | |
use std::fs::File; | |
const EOF: u8 = 0; | |
enum Token { | |
Constant(String), | |
Identifier(String), | |
Keyword(String), | |
Number(String), | |
String(String), | |
Indent(usize), | |
Dedent(usize), | |
Newline, | |
Whitespace, | |
Eof, | |
Unknown(String) | |
} | |
struct Lexer<'a> { | |
input: &'a str, | |
p: usize, | |
c: char, | |
keywords: Vec<String>, | |
cur_indent: usize, | |
indent_stack: Vec<usize> | |
} | |
impl<'a> Lexer<'a> { | |
fn new(input: &str) -> Lexer { | |
let p: usize = 0; | |
let mut keywords = Vec::new(); | |
keywords.push("def".to_string()); | |
keywords.push("class".to_string()); | |
keywords.push("if".to_string()); | |
keywords.push("true".to_string()); | |
keywords.push("false".to_string()); | |
keywords.push("nil".to_string()); | |
Lexer { | |
input: input, | |
p: p, | |
c: input.char_at(p), | |
keywords: keywords, | |
cur_indent: 0, | |
indent_stack: Vec::new() | |
} | |
} | |
fn consume(&mut self) { | |
self.p = self.p + 1; | |
if self.p >= self.input.len() { | |
self.c = EOF as char; | |
} else { | |
self.c = self.input.char_at(self.p); | |
} | |
} | |
fn matches(&mut self, x: char) { | |
if self.c == x { | |
self.consume(); | |
} else { | |
panic!("expected {} found {}", x, self.c); | |
} | |
} | |
fn next_token(&mut self) -> Token { | |
while self.c != (EOF as char) { | |
match self.c { | |
'A' ... 'Z' => return self.constant(), | |
'a' ... 'z' => return self.identifier(), | |
'0' ... '9' => return self.number(), | |
'"' => { | |
self.consume(); | |
return self.string() | |
}, | |
':' => { | |
self.consume(); | |
self.matches('\n'); | |
return self.indent() | |
}, | |
'\n' => { | |
self.consume(); | |
return self.newline() | |
}, | |
' ' | '\t' => { | |
self.whitespace(); | |
continue; | |
}, | |
t @ _ => return Token::Unknown(t.to_string()) | |
} | |
} | |
Token::Eof | |
} | |
fn constant(&mut self) -> Token { | |
let mut buffer = String::new(); | |
while self.c.is_alphanumeric() || self.c == '_' { | |
buffer.push(self.c); | |
self.consume(); | |
} | |
Token::Constant(buffer) | |
} | |
fn identifier(&mut self) -> Token { | |
let mut buffer = String::new(); | |
while self.c.is_alphanumeric() || self.c == '_' { | |
buffer.push(self.c); | |
self.consume(); | |
} | |
if self.keywords.contains(&buffer) { | |
Token::Keyword(buffer) | |
} | |
else { | |
Token::Identifier(buffer) | |
} | |
} | |
fn number(&mut self) -> Token { | |
let mut buffer = String::new(); | |
let mut dot_count = 0; | |
while self.c.is_numeric() || self.c == '.' { | |
if self.c == '.' { | |
dot_count = dot_count + 1; | |
} | |
buffer.push(self.c); | |
self.consume(); | |
} | |
if dot_count > 1 { | |
panic!("invalid numeric literal: {}", buffer); | |
} | |
Token::Number(buffer) | |
} | |
fn string(&mut self) -> Token { | |
let mut buffer = String::new(); | |
loop { | |
if self.c == '"' { | |
self.consume(); | |
break; | |
} | |
if self.c == EOF as char { | |
panic!("unclosed string literal: {}", buffer); | |
} | |
buffer.push(self.c); | |
self.consume(); | |
} | |
Token::String(buffer) | |
} | |
fn indent(&mut self) -> Token { | |
let mut indent_size = 0; | |
while self.c == ' ' || self.c == '\t' { | |
indent_size = indent_size + 1; | |
self.consume(); | |
} | |
if indent_size <= self.cur_indent { | |
println!("indent_size: {}", indent_size); | |
println!("cur_indent: {}", self.cur_indent); | |
panic!("indentation is expected to be bigger"); | |
} else if indent_size > self.cur_indent { | |
self.cur_indent = indent_size; | |
self.indent_stack.push(indent_size); | |
} | |
Token::Indent(indent_size) | |
} | |
fn newline(&mut self) -> Token { | |
let mut indent_size = 0; | |
while self.c == ' ' || self.c == '\t' { | |
indent_size = indent_size + 1; | |
self.consume(); | |
} | |
if indent_size > self.cur_indent { | |
panic!("indentation is not supposed to increase"); | |
} else if indent_size < self.cur_indent { | |
let prev_indent = self.indent_stack.pop().unwrap_or(0); | |
if prev_indent != indent_size { | |
panic!("mismatched indentation"); | |
} else { | |
self.cur_indent = prev_indent; | |
return Token::Dedent(indent_size) | |
} | |
} else { | |
Token::Newline | |
} | |
} | |
fn whitespace(&mut self) { | |
while self.c.is_whitespace() { | |
self.consume(); | |
} | |
} | |
} | |
fn main() { | |
let mut f = File::open("./src/example.awm").ok().expect("failed to open file"); | |
let mut reader = BufReader::new(f); | |
let mut contents = String::new(); | |
reader.read_line(&mut contents); | |
let mut l = Lexer::new(&contents); | |
//let mut l = Lexer::new("class Awesome:\n def does_it_work:\n \"yeah!\""); | |
loop { | |
match l.next_token() { | |
Token::Constant(constant) => println!("Constant: {}", constant), | |
Token::Identifier(identifier) => println!("Identifier: {}", identifier), | |
Token::Keyword(keyword) => println!("Keyword: {}", keyword), | |
Token::Number(number) => println!("Number Literal: {}", number), | |
Token::String(string) => println!("String Literal: {}", string), | |
Token::Indent(size) => println!("Indent: {}", size), | |
Token::Dedent(size) => println!("Dedent: {}", size), | |
Token::Whitespace => println!("Whitespace"), | |
Token::Newline => println!("Newline"), | |
Token::Unknown(token) => panic!("Unknown Token: {}", token), | |
Token::Eof => break | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment