garlicnation · March 30, 2020 04:39
diff --git a/lexer.rs b/lexer.rs
 use std::fs::File;
 use std::io::{BufReader, Read};
 use std::iter::Peekable;
 use std::str::Chars;
 use std::string::String;
 use std::vec::Vec;

 #[derive(Debug, PartialEq, Copy, Clone)]
 pub struct Location {
  offset: u64,
  line: u64,
  col: u64,
 }

 #[derive(PartialEq, Debug)]
 pub enum Token {
  LeftParen,
  RightParen,
  Plus,
  Minus,
  Star,
  Slash,
  StringLiteral(String),
  IntegerLiteral(i64),
  EOF,
 }

 #[derive(Debug, PartialEq)]
 pub struct TokenValue {
  start: Location,
  end: Location,
  value: Token,
  text: String,
 }

 #[derive(Debug)]
 pub struct TokenizeErrorDetails {
  msg: String,
  filename: String,
  location: Location,
 }

 #[derive(Debug)]
 pub enum TokenizeError {
  IO(std::io::Error),
  ErrorTokenizing(TokenizeErrorDetails),
 }

 fn error_tokenizing(msg: &str, filename: &str, location: Location) -> TokenizeError {
  TokenizeError::ErrorTokenizing(TokenizeErrorDetails {
    msg: msg.to_owned(),
    filename: filename.to_owned(),
    location: location,
  })
 }

 struct Scanner<'a> {
  tokens: Vec<TokenValue>,
  contents: Peekable<Chars<'a>>,
  filename: String,
  offset: u64,
  line: u64,
  col: u64,
  newline: bool,
  // Start of the current token
  start: Location,
  // Current token contents
  current_token: String,
  has_error: bool,
  errors: Vec<TokenizeErrorDetails>,
 }

 impl<'a> Scanner<'a> {
  pub fn new(contents: &'a String, filename: &str) -> Scanner<'a> {
    Scanner {
      tokens: Vec::new(),
      contents: contents.chars().peekable(),
      col: 0,
      line: 0,
      offset: 0,
      filename: filename.to_owned(),
      newline: false,
      start: Location {
        col: 0,
        line: 0,
        offset: 0,
      },
      current_token: String::new(),
      has_error: false,
      errors: Vec::new(),
    }
  }

  pub fn loc(&self) -> Location {
    Location {
      offset: self.offset,
      line: self.line,
      col: self.col,
    }
  }

  pub fn advance(&mut self) -> Option<char> {
    let next = self.contents.next();
    self.offset += 1;
    if self.newline {
      self.line += 1;
      self.col = 0;
      self.newline = false;
    }
    self.col += 1;
    match next {
      Some(c) => {
        if c == '\n' {
          self.newline = true;
        }
        self.current_token.push(c);
      }
      _ => {}
    }
    next
  }

  pub fn peek(&mut self) -> Option<char> {
    // Unwrap the peek reference
    match self.contents.peek() {
      None => None,
      Some(&ch) => Some(ch),
    }
  }

  pub fn push_token(&mut self, t: Token) {
    self.tokens.push(TokenValue {
      start: self.start,
      end: self.loc(),
      value: t,
      text: self.current_token.to_owned(),
    });
    self.start = self.loc();
    self.current_token.clear()
  }

  pub fn reset_token(&mut self) {
    self.start = self.loc();
    self.current_token.clear();
  }

  pub fn scan_number(&mut self) -> Option<Token> {
    loop {
      match self.peek() {
        Some(c) if c.is_ascii_digit() => {
          self.advance();
          continue;
        },
        _ => break,
      }
    }
    match self.current_token.parse() {
      Ok(n) => Some(Token::IntegerLiteral(n)),
      Err(e) => {
        self.add_error(&format!("{}", e));
        None
      }
    }
  }

  pub fn add_error(&mut self, msg: &str) {
    self.has_error = true;
    self.errors.push(TokenizeErrorDetails {
      msg: msg.to_owned(),
      filename: self.filename.to_owned(),
      location: self.loc(),
    });
  }
 }

 pub fn scan<'a, R: std::io::Read>(
  source: &mut BufReader<R>,
  file: &'a str,
 ) -> Result<Vec<TokenValue>, TokenizeError> {
  let mut buffer = String::new();
  let read_result = source.read_to_string(&mut buffer);
  match read_result {
    Err(e) => return Err(TokenizeError::IO(e)),
    _ => {}
  }
  let mut scanner = Scanner::new(&buffer, file);

  loop {
    let c = match scanner.advance() {
      None => break,
      Some(c) => c
    };

    dbg!(c);

    use lexer::Token::*;
    let token = match c {
      '+' => Some(Plus),
      '-' => Some(Minus),
      '*' => Some(Star),
      '/' => Some(Slash),
      '(' => Some(LeftParen),
      ')' => Some(RightParen),
      ' ' | '\t' => None,
      '0' ..= '9' => scanner.scan_number(),
      _ => {
        eprintln!("unexpected character '{}' at {:?}", c, scanner.loc());
        scanner.has_error = true;
        None
      }
    };
    match token {
      Some(t) => scanner.push_token(t),
      None => scanner.reset_token(),
    }
  }

  if scanner.has_error {
    return Err(error_tokenizing("unable to tokenize, check logs", file, scanner.loc()))
  }
  scanner.push_token(Token::EOF);
  return Ok(scanner.tokens);
 }

 #[test]
 fn test_scanner() {
  let mut input = BufReader::new("+ 376 4 5 (+ 2 3) / +****".as_bytes());
  let scanned = scan(&mut input, "filename.txt");
  assert!(
    scanned.is_ok(),
    "scanned is ok"
  );
  for token in scanned.unwrap() {
    println!("{:?}", token);
  }
 }
	use std::fs::File;
	use std::io::{BufReader, Read};
	use std::iter::Peekable;
	use std::str::Chars;
	use std::string::String;
	use std::vec::Vec;

	#[derive(Debug, PartialEq, Copy, Clone)]
	pub struct Location {
	offset: u64,
	line: u64,
	col: u64,
	}

	#[derive(PartialEq, Debug)]
	pub enum Token {
	LeftParen,
	RightParen,
	Plus,
	Minus,
	Star,
	Slash,
	StringLiteral(String),
	IntegerLiteral(i64),
	EOF,
	}

	#[derive(Debug, PartialEq)]
	pub struct TokenValue {
	start: Location,
	end: Location,
	value: Token,
	text: String,
	}

	#[derive(Debug)]
	pub struct TokenizeErrorDetails {
	msg: String,
	filename: String,
	location: Location,
	}

	#[derive(Debug)]
	pub enum TokenizeError {
	IO(std::io::Error),
	ErrorTokenizing(TokenizeErrorDetails),
	}

	fn error_tokenizing(msg: &str, filename: &str, location: Location) -> TokenizeError {
	TokenizeError::ErrorTokenizing(TokenizeErrorDetails {
	msg: msg.to_owned(),
	filename: filename.to_owned(),
	location: location,
	})
	}

	struct Scanner<'a> {
	tokens: Vec<TokenValue>,
	contents: Peekable<Chars<'a>>,
	filename: String,
	offset: u64,
	line: u64,
	col: u64,
	newline: bool,
	// Start of the current token
	start: Location,
	// Current token contents
	current_token: String,
	has_error: bool,
	errors: Vec<TokenizeErrorDetails>,
	}

	impl<'a> Scanner<'a> {
	pub fn new(contents: &'a String, filename: &str) -> Scanner<'a> {
	Scanner {
	tokens: Vec::new(),
	contents: contents.chars().peekable(),
	col: 0,
	line: 0,
	offset: 0,
	filename: filename.to_owned(),
	newline: false,
	start: Location {
	col: 0,
	line: 0,
	offset: 0,
	},
	current_token: String::new(),
	has_error: false,
	errors: Vec::new(),
	}
	}

	pub fn loc(&self) -> Location {
	Location {
	offset: self.offset,
	line: self.line,
	col: self.col,
	}
	}

	pub fn advance(&mut self) -> Option<char> {
	let next = self.contents.next();
	self.offset += 1;
	if self.newline {
	self.line += 1;
	self.col = 0;
	self.newline = false;
	}
	self.col += 1;
	match next {
	Some(c) => {
	if c == '\n' {
	self.newline = true;
	}
	self.current_token.push(c);
	}
	_ => {}
	}
	next
	}

	pub fn peek(&mut self) -> Option<char> {
	// Unwrap the peek reference
	match self.contents.peek() {
	None => None,
	Some(&ch) => Some(ch),
	}
	}

	pub fn push_token(&mut self, t: Token) {
	self.tokens.push(TokenValue {
	start: self.start,
	end: self.loc(),
	value: t,
	text: self.current_token.to_owned(),
	});
	self.start = self.loc();
	self.current_token.clear()
	}

	pub fn reset_token(&mut self) {
	self.start = self.loc();
	self.current_token.clear();
	}

	pub fn scan_number(&mut self) -> Option<Token> {
	loop {
	match self.peek() {
	Some(c) if c.is_ascii_digit() => {
	self.advance();
	continue;
	},
	_ => break,
	}
	}
	match self.current_token.parse() {
	Ok(n) => Some(Token::IntegerLiteral(n)),
	Err(e) => {
	self.add_error(&format!("{}", e));
	None
	}
	}
	}

	pub fn add_error(&mut self, msg: &str) {
	self.has_error = true;
	self.errors.push(TokenizeErrorDetails {
	msg: msg.to_owned(),
	filename: self.filename.to_owned(),
	location: self.loc(),
	});
	}
	}

	pub fn scan<'a, R: std::io::Read>(
	source: &mut BufReader<R>,
	file: &'a str,
	) -> Result<Vec<TokenValue>, TokenizeError> {
	let mut buffer = String::new();
	let read_result = source.read_to_string(&mut buffer);
	match read_result {
	Err(e) => return Err(TokenizeError::IO(e)),
	_ => {}
	}
	let mut scanner = Scanner::new(&buffer, file);

	loop {
	let c = match scanner.advance() {
	None => break,
	Some(c) => c
	};

	dbg!(c);

	use lexer::Token::*;
	let token = match c {
	'+' => Some(Plus),
	'-' => Some(Minus),
	'*' => Some(Star),
	'/' => Some(Slash),
	'(' => Some(LeftParen),
	')' => Some(RightParen),
	' ' \| '\t' => None,
	'0' ..= '9' => scanner.scan_number(),
	_ => {
	eprintln!("unexpected character '{}' at {:?}", c, scanner.loc());
	scanner.has_error = true;
	None
	}
	};
	match token {
	Some(t) => scanner.push_token(t),
	None => scanner.reset_token(),
	}
	}

	if scanner.has_error {
	return Err(error_tokenizing("unable to tokenize, check logs", file, scanner.loc()))
	}
	scanner.push_token(Token::EOF);
	return Ok(scanner.tokens);
	}

	#[test]
	fn test_scanner() {
	let mut input = BufReader::new("+ 376 4 5 (+ 2 3) / +****".as_bytes());
	let scanned = scan(&mut input, "filename.txt");
	assert!(
	scanned.is_ok(),
	"scanned is ok"
	);
	for token in scanned.unwrap() {
	println!("{:?}", token);
	}
	}