Skip to content

Instantly share code, notes, and snippets.

@garlicnation
Created March 30, 2020 04:39
Show Gist options
  • Save garlicnation/6d46878abe86c5f43495517653e76428 to your computer and use it in GitHub Desktop.
Save garlicnation/6d46878abe86c5f43495517653e76428 to your computer and use it in GitHub Desktop.
use std::fs::File;
use std::io::{BufReader, Read};
use std::iter::Peekable;
use std::str::Chars;
use std::string::String;
use std::vec::Vec;
#[derive(Debug, PartialEq, Copy, Clone)]
pub struct Location {
offset: u64,
line: u64,
col: u64,
}
#[derive(PartialEq, Debug)]
pub enum Token {
LeftParen,
RightParen,
Plus,
Minus,
Star,
Slash,
StringLiteral(String),
IntegerLiteral(i64),
EOF,
}
#[derive(Debug, PartialEq)]
pub struct TokenValue {
start: Location,
end: Location,
value: Token,
text: String,
}
#[derive(Debug)]
pub struct TokenizeErrorDetails {
msg: String,
filename: String,
location: Location,
}
#[derive(Debug)]
pub enum TokenizeError {
IO(std::io::Error),
ErrorTokenizing(TokenizeErrorDetails),
}
fn error_tokenizing(msg: &str, filename: &str, location: Location) -> TokenizeError {
TokenizeError::ErrorTokenizing(TokenizeErrorDetails {
msg: msg.to_owned(),
filename: filename.to_owned(),
location: location,
})
}
struct Scanner<'a> {
tokens: Vec<TokenValue>,
contents: Peekable<Chars<'a>>,
filename: String,
offset: u64,
line: u64,
col: u64,
newline: bool,
// Start of the current token
start: Location,
// Current token contents
current_token: String,
has_error: bool,
errors: Vec<TokenizeErrorDetails>,
}
impl<'a> Scanner<'a> {
pub fn new(contents: &'a String, filename: &str) -> Scanner<'a> {
Scanner {
tokens: Vec::new(),
contents: contents.chars().peekable(),
col: 0,
line: 0,
offset: 0,
filename: filename.to_owned(),
newline: false,
start: Location {
col: 0,
line: 0,
offset: 0,
},
current_token: String::new(),
has_error: false,
errors: Vec::new(),
}
}
pub fn loc(&self) -> Location {
Location {
offset: self.offset,
line: self.line,
col: self.col,
}
}
pub fn advance(&mut self) -> Option<char> {
let next = self.contents.next();
self.offset += 1;
if self.newline {
self.line += 1;
self.col = 0;
self.newline = false;
}
self.col += 1;
match next {
Some(c) => {
if c == '\n' {
self.newline = true;
}
self.current_token.push(c);
}
_ => {}
}
next
}
pub fn peek(&mut self) -> Option<char> {
// Unwrap the peek reference
match self.contents.peek() {
None => None,
Some(&ch) => Some(ch),
}
}
pub fn push_token(&mut self, t: Token) {
self.tokens.push(TokenValue {
start: self.start,
end: self.loc(),
value: t,
text: self.current_token.to_owned(),
});
self.start = self.loc();
self.current_token.clear()
}
pub fn reset_token(&mut self) {
self.start = self.loc();
self.current_token.clear();
}
pub fn scan_number(&mut self) -> Option<Token> {
loop {
match self.peek() {
Some(c) if c.is_ascii_digit() => {
self.advance();
continue;
},
_ => break,
}
}
match self.current_token.parse() {
Ok(n) => Some(Token::IntegerLiteral(n)),
Err(e) => {
self.add_error(&format!("{}", e));
None
}
}
}
pub fn add_error(&mut self, msg: &str) {
self.has_error = true;
self.errors.push(TokenizeErrorDetails {
msg: msg.to_owned(),
filename: self.filename.to_owned(),
location: self.loc(),
});
}
}
pub fn scan<'a, R: std::io::Read>(
source: &mut BufReader<R>,
file: &'a str,
) -> Result<Vec<TokenValue>, TokenizeError> {
let mut buffer = String::new();
let read_result = source.read_to_string(&mut buffer);
match read_result {
Err(e) => return Err(TokenizeError::IO(e)),
_ => {}
}
let mut scanner = Scanner::new(&buffer, file);
loop {
let c = match scanner.advance() {
None => break,
Some(c) => c
};
dbg!(c);
use lexer::Token::*;
let token = match c {
'+' => Some(Plus),
'-' => Some(Minus),
'*' => Some(Star),
'/' => Some(Slash),
'(' => Some(LeftParen),
')' => Some(RightParen),
' ' | '\t' => None,
'0' ..= '9' => scanner.scan_number(),
_ => {
eprintln!("unexpected character '{}' at {:?}", c, scanner.loc());
scanner.has_error = true;
None
}
};
match token {
Some(t) => scanner.push_token(t),
None => scanner.reset_token(),
}
}
if scanner.has_error {
return Err(error_tokenizing("unable to tokenize, check logs", file, scanner.loc()))
}
scanner.push_token(Token::EOF);
return Ok(scanner.tokens);
}
#[test]
fn test_scanner() {
let mut input = BufReader::new("+ 376 4 5 (+ 2 3) / +****".as_bytes());
let scanned = scan(&mut input, "filename.txt");
assert!(
scanned.is_ok(),
"scanned is ok"
);
for token in scanned.unwrap() {
println!("{:?}", token);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment