Skip to content

Instantly share code, notes, and snippets.

@goyox86
Created November 3, 2015 11:05
Show Gist options
  • Save goyox86/71fdfa48f6bf80fdfd80 to your computer and use it in GitHub Desktop.
Save goyox86/71fdfa48f6bf80fdfd80 to your computer and use it in GitHub Desktop.
#![feature(str_char)]
use std::io::prelude::*;
use std::io::BufReader;
use std::fs::File;
const EOF: u8 = 0;
enum Token {
Constant(String),
Identifier(String),
Keyword(String),
Number(String),
String(String),
Indent(usize),
Dedent(usize),
Newline,
Whitespace,
Eof,
Unknown(String)
}
struct Lexer<'a> {
input: &'a str,
p: usize,
c: char,
keywords: Vec<String>,
cur_indent: usize,
indent_stack: Vec<usize>
}
impl<'a> Lexer<'a> {
fn new(input: &str) -> Lexer {
let p: usize = 0;
let mut keywords = Vec::new();
keywords.push("def".to_string());
keywords.push("class".to_string());
keywords.push("if".to_string());
keywords.push("true".to_string());
keywords.push("false".to_string());
keywords.push("nil".to_string());
Lexer {
input: input,
p: p,
c: input.char_at(p),
keywords: keywords,
cur_indent: 0,
indent_stack: Vec::new()
}
}
fn consume(&mut self) {
self.p = self.p + 1;
if self.p >= self.input.len() {
self.c = EOF as char;
} else {
self.c = self.input.char_at(self.p);
}
}
fn matches(&mut self, x: char) {
if self.c == x {
self.consume();
} else {
panic!("expected {} found {}", x, self.c);
}
}
fn next_token(&mut self) -> Token {
while self.c != (EOF as char) {
match self.c {
'A' ... 'Z' => return self.constant(),
'a' ... 'z' => return self.identifier(),
'0' ... '9' => return self.number(),
'"' => {
self.consume();
return self.string()
},
':' => {
self.consume();
self.matches('\n');
return self.indent()
},
'\n' => {
self.consume();
return self.newline()
},
' ' | '\t' => {
self.whitespace();
continue;
},
t @ _ => return Token::Unknown(t.to_string())
}
}
Token::Eof
}
fn constant(&mut self) -> Token {
let mut buffer = String::new();
while self.c.is_alphanumeric() || self.c == '_' {
buffer.push(self.c);
self.consume();
}
Token::Constant(buffer)
}
fn identifier(&mut self) -> Token {
let mut buffer = String::new();
while self.c.is_alphanumeric() || self.c == '_' {
buffer.push(self.c);
self.consume();
}
if self.keywords.contains(&buffer) {
Token::Keyword(buffer)
}
else {
Token::Identifier(buffer)
}
}
fn number(&mut self) -> Token {
let mut buffer = String::new();
let mut dot_count = 0;
while self.c.is_numeric() || self.c == '.' {
if self.c == '.' {
dot_count = dot_count + 1;
}
buffer.push(self.c);
self.consume();
}
if dot_count > 1 {
panic!("invalid numeric literal: {}", buffer);
}
Token::Number(buffer)
}
fn string(&mut self) -> Token {
let mut buffer = String::new();
loop {
if self.c == '"' {
self.consume();
break;
}
if self.c == EOF as char {
panic!("unclosed string literal: {}", buffer);
}
buffer.push(self.c);
self.consume();
}
Token::String(buffer)
}
fn indent(&mut self) -> Token {
let mut indent_size = 0;
while self.c == ' ' || self.c == '\t' {
indent_size = indent_size + 1;
self.consume();
}
if indent_size <= self.cur_indent {
println!("indent_size: {}", indent_size);
println!("cur_indent: {}", self.cur_indent);
panic!("indentation is expected to be bigger");
} else if indent_size > self.cur_indent {
self.cur_indent = indent_size;
self.indent_stack.push(indent_size);
}
Token::Indent(indent_size)
}
fn newline(&mut self) -> Token {
let mut indent_size = 0;
while self.c == ' ' || self.c == '\t' {
indent_size = indent_size + 1;
self.consume();
}
if indent_size > self.cur_indent {
panic!("indentation is not supposed to increase");
} else if indent_size < self.cur_indent {
let prev_indent = self.indent_stack.pop().unwrap_or(0);
if prev_indent != indent_size {
panic!("mismatched indentation");
} else {
self.cur_indent = prev_indent;
return Token::Dedent(indent_size)
}
} else {
Token::Newline
}
}
fn whitespace(&mut self) {
while self.c.is_whitespace() {
self.consume();
}
}
}
fn main() {
let mut f = File::open("./src/example.awm").ok().expect("failed to open file");
let mut reader = BufReader::new(f);
let mut contents = String::new();
reader.read_line(&mut contents);
let mut l = Lexer::new(&contents);
//let mut l = Lexer::new("class Awesome:\n def does_it_work:\n \"yeah!\"");
loop {
match l.next_token() {
Token::Constant(constant) => println!("Constant: {}", constant),
Token::Identifier(identifier) => println!("Identifier: {}", identifier),
Token::Keyword(keyword) => println!("Keyword: {}", keyword),
Token::Number(number) => println!("Number Literal: {}", number),
Token::String(string) => println!("String Literal: {}", string),
Token::Indent(size) => println!("Indent: {}", size),
Token::Dedent(size) => println!("Dedent: {}", size),
Token::Whitespace => println!("Whitespace"),
Token::Newline => println!("Newline"),
Token::Unknown(token) => panic!("Unknown Token: {}", token),
Token::Eof => break
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment