DarinM223 · May 25, 2017 09:39
diff --git a/precedence.rs b/precedence.rs
 #[macro_use]
 extern crate lazy_static;
 extern crate regex;

 use regex::{CaptureMatches, Regex};
 use std::collections::HashMap;
 use std::num::ParseIntError;
 use std::result;
 use std::str::FromStr;

 #[derive(Debug)]
 pub enum TokenErr {
    InvalidToken,
    ParseIntError(ParseIntError),
 }

 impl From<ParseIntError> for TokenErr {
    fn from(err: ParseIntError) -> TokenErr {
        TokenErr::ParseIntError(err)
    }
 }

 #[derive(Debug)]
 pub enum ParseErr {
    UnexpectedEnd,
    ExpectedValue,
    UnmatchedParen,
    TokenErr(TokenErr),
 }

 impl From<TokenErr> for ParseErr {
    fn from(err: TokenErr) -> ParseErr {
        ParseErr::TokenErr(err)
    }
 }

 pub type TokenResult<T> = result::Result<T, TokenErr>;
 pub type ParseResult<T> = result::Result<T, ParseErr>;

 /// Determines whether similar precedence operators
 /// are grouped from left to right or right to left.
 ///
 /// Left association example:
 /// 1 + 2 + 3 => (1 + 2) + 3
 ///
 /// Right association example:
 /// 2 ^ 3 ^ 4 => 2 ^ (3 ^ 4)
 #[derive(Clone, Copy, PartialEq, Eq)]
 enum Assoc {
    Left,
    Right,
 }

 /// A parsable operator. Right now restricted
 /// to basic math operations and exponent.
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 pub enum Op {
    Plus,
    Minus,
    Times,
    Divide,
    Exponent,
 }

 impl Op {
    pub fn from_str<'a>(s: &'a str) -> TokenResult<Op> {
        match s {
            "+" => Ok(Op::Plus),
            "-" => Ok(Op::Minus),
            "*" => Ok(Op::Times),
            "/" => Ok(Op::Divide),
            "^" => Ok(Op::Exponent),
            _ => Err(TokenErr::InvalidToken),
        }
    }
 }

 /// A token returned from the tokenizer.
 #[derive(Clone, Copy, Debug, PartialEq)]
 pub enum Token {
    BinOp(Op),
    Num(i32),
    LeftParen,
    RightParen,
 }

 /// An expression returned from the parser.
 /// Because the parser is only for simple number operations,
 /// an expression can only be an integer number or a binary operation.
 #[derive(Debug, PartialEq)]
 pub enum Expr {
    Value(i32),
    BinOp(Op, Box<Expr>, Box<Expr>),
 }

 struct OpInfo {
    prec: i32,
    assoc: Assoc,
 }

 /// Global "constants".
 lazy_static! {
    /// Regex used by the tokenizer to parse numbers and operators.
    static ref REGEX: Regex = Regex::new(r###"\s*(?:(\d+)|(.))"###).unwrap();
    /// Table of operator to the operator's precedence and association.
    static ref OP_INFO: HashMap<Op, OpInfo> = {
        let mut map = HashMap::new();
        map.insert(Op::Plus,  OpInfo { prec: 1, assoc: Assoc::Left });
        map.insert(Op::Minus, OpInfo { prec: 1, assoc: Assoc::Left });
        map.insert(Op::Times, OpInfo { prec: 2, assoc: Assoc::Left });
        map.insert(Op::Divide, OpInfo { prec: 2, assoc: Assoc::Left });
        map.insert(Op::Exponent, OpInfo { prec: 3, assoc: Assoc::Right });

        map
    };
 }

 /// A cursor through valid tokens in a string that moves from left to right.
 /// Example: 1 + (2 * 3)
 ///
 /// Gets split into tokens and the current position moves to the right
 /// starting from an empty invalid position.
 ///
 ///     [1] [+] [(] [2] [*] [3] [)]
 ///  ^
 ///  |
 /// curr -------------------------->
 pub struct Tokenizer<'a> {
    curr_token: Option<Token>,
    tokens: CaptureMatches<'static, 'a>,
 }

 impl<'a> Tokenizer<'a> {
    pub fn new(text: &'a str) -> Tokenizer<'a> {
        Tokenizer {
            curr_token: None,
            tokens: REGEX.captures_iter(text),
        }
    }

    /// Returns the token the tokenizer is currently on.
    pub fn curr_token(&self) -> Option<Token> {
        self.curr_token
    }

    /// Moves the position to the next token.
    pub fn get_next_token(&mut self) -> TokenResult<()> {
        if let Some(token) = self.tokens.next() {
            let token = match (token.get(1), token.get(2)) {
                (Some(num), _) => Token::Num(i32::from_str(num.as_str())?),
                (_, Some(op)) if op.as_str() == "(" => Token::LeftParen,
                (_, Some(op)) if op.as_str() == ")" => Token::RightParen,
                (_, Some(op)) if Op::from_str(op.as_str()).is_ok() => {
                    let op = Op::from_str(op.as_str())?;
                    Token::BinOp(op)
                }
                _ => return Err(TokenErr::InvalidToken),
            };

            self.curr_token = Some(token);
        }

        Ok(())
    }
 }


 /// A precedence climbing parser for
 /// parsing simple mathmatical expressions.
 pub struct Parser<'a> {
    tokenizer: Tokenizer<'a>,
 }

 impl<'a> Parser<'a> {
    pub fn new(text: &'a str) -> ParseResult<Parser<'a>> {
        let mut tokenizer = Tokenizer::new(text);
        tokenizer.get_next_token()?;
        Ok(Parser { tokenizer: tokenizer })
    }

    /// Parses a value which can be on either side of an operator expression,
    /// and moves the tokenizer position to the token after the value.
    ///
    /// Examples:
    ///
    /// 1 * 2 + 3   => 1 * 2 + 3   , returns Expr::value(1)
    /// ^                ^
    /// |                |
    /// token pos       token pos
    ///
    /// 1 + (2 * 3) + 5   =>   1 + (2 * 3) + 5     , returns Expr::BinOp(
    ///     ^                              ^                     Op::Times,
    ///     |                              |                     Box::new(Expr::Value(2)),
    ///   token pos                      token pos               Box::new(Expr::Value(3))
    ///                                                      )
    pub fn parse_value(&mut self) -> ParseResult<Expr> {
        match self.tokenizer.curr_token() {
            Some(Token::LeftParen) => {
                self.tokenizer.get_next_token()?;

                let value_expr = self.parse_expr(1)?;
                if self.tokenizer.curr_token() != Some(Token::RightParen) {
                    return Err(ParseErr::UnmatchedParen);
                }

                self.tokenizer.get_next_token()?;
                Ok(value_expr)
            }
            Some(Token::Num(num)) => {
                self.tokenizer.get_next_token()?;
                Ok(Expr::Value(num))
            }
            Some(_) => Err(ParseErr::ExpectedValue),
            None => Err(ParseErr::UnexpectedEnd),
        }
    }

    /// Parses an binary operator expression.
    /// 
    /// One reason why parsing operators with precedence looks tricky is
    /// because at a certain position you can't know if there's an operator
    /// further ahead with a higher precedence.
    /// 
    /// For example, parsing 1 + 2 * 3 + 5 while being at the '+' position:
    /// ```
    /// 1 + 2 * 3 + 5 <--- how to know if theres an operator further ahead
    ///   ^                of the '+' that has higher precedence?
    /// ```
    /// 
    /// Recursion is the natural solution to this problem. At the '+' you don't
    /// care about the right hand side, you expect the recursive call to properly
    /// handle the precedence.
    /// 
    /// ```
    /// 1 + (recursive function yielding an expression) ......(extra stuff)
    ///   ^
    /// ```
    /// 
    /// In this case the recursive function should yield (2 * 3) because (2 * 3) has higher
    /// precedence and '+' is left associative so 1 has to be added to (2 * 3) before 5 is added.
    /// 
    /// The new expression 1 + (2 * 3) has proper precedence but there is still the + 5 left.
    /// So (1 + (2 * 3)) is made the new left value and '+' the new operator and it repeats:
    /// 
    /// ```
    /// (1 + (2 * 3)) + (recursive function yielding an expression) ....(extra stuff)
    /// ```
    /// 
    /// This repeats until there is no more valid binary operators.
    /// 
    /// This function needs to take the minimum precedence so that the recursive calls can
    /// know when to return early. The minimum precedence starts at 1 in the base call and
    /// increases in the recursive calls based on the operator encountered.
    pub fn parse_expr(&mut self, min_prec: i32) -> ParseResult<Expr> {
        let mut left_value = self.parse_value()?;

        // Continually link the left hand side to the recursively computed
        // right hand side until there is a precedence conflict or an invalid token.
        loop {
            let token = self.tokenizer.curr_token();
            let op = match token {
                Some(Token::BinOp(ref op)) if OP_INFO[op].prec < min_prec => break,
                Some(Token::BinOp(op)) => op,
                _ => break,
            };

            let OpInfo { prec, assoc } = OP_INFO[&op];
            // Left associative operators will pass in precedence + 1 in order
            // to prevent same precedence operators on the right from beating them.
            // Example: Precedence for '+' is 1. 1 + 1 = 2 which only allows '*', '/', and '^'
            // to beat it since they are >= 2.
            let next_min_prec = if assoc == Assoc::Left { prec + 1 } else { prec };

            // Recursively compute the right hand expression and finish the binop expression.
            self.tokenizer.get_next_token()?;
            let right_value = self.parse_expr(next_min_prec)?;
            left_value = Expr::BinOp(op, Box::new(left_value), Box::new(right_value));
        }

        Ok(left_value)
    }
 }
	#[macro_use]
	extern crate lazy_static;
	extern crate regex;

	use regex::{CaptureMatches, Regex};
	use std::collections::HashMap;
	use std::num::ParseIntError;
	use std::result;
	use std::str::FromStr;

	#[derive(Debug)]
	pub enum TokenErr {
	InvalidToken,
	ParseIntError(ParseIntError),
	}

	impl From<ParseIntError> for TokenErr {
	fn from(err: ParseIntError) -> TokenErr {
	TokenErr::ParseIntError(err)
	}
	}

	#[derive(Debug)]
	pub enum ParseErr {
	UnexpectedEnd,
	ExpectedValue,
	UnmatchedParen,
	TokenErr(TokenErr),
	}

	impl From<TokenErr> for ParseErr {
	fn from(err: TokenErr) -> ParseErr {
	ParseErr::TokenErr(err)
	}
	}

	pub type TokenResult<T> = result::Result<T, TokenErr>;
	pub type ParseResult<T> = result::Result<T, ParseErr>;

	/// Determines whether similar precedence operators
	/// are grouped from left to right or right to left.
	///
	/// Left association example:
	/// 1 + 2 + 3 => (1 + 2) + 3
	///
	/// Right association example:
	/// 2 ^ 3 ^ 4 => 2 ^ (3 ^ 4)
	#[derive(Clone, Copy, PartialEq, Eq)]
	enum Assoc {
	Left,
	Right,
	}

	/// A parsable operator. Right now restricted
	/// to basic math operations and exponent.
	#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
	pub enum Op {
	Plus,
	Minus,
	Times,
	Divide,
	Exponent,
	}

	impl Op {
	pub fn from_str<'a>(s: &'a str) -> TokenResult<Op> {
	match s {
	"+" => Ok(Op::Plus),
	"-" => Ok(Op::Minus),
	"*" => Ok(Op::Times),
	"/" => Ok(Op::Divide),
	"^" => Ok(Op::Exponent),
	_ => Err(TokenErr::InvalidToken),
	}
	}
	}

	/// A token returned from the tokenizer.
	#[derive(Clone, Copy, Debug, PartialEq)]
	pub enum Token {
	BinOp(Op),
	Num(i32),
	LeftParen,
	RightParen,
	}

	/// An expression returned from the parser.
	/// Because the parser is only for simple number operations,
	/// an expression can only be an integer number or a binary operation.
	#[derive(Debug, PartialEq)]
	pub enum Expr {
	Value(i32),
	BinOp(Op, Box<Expr>, Box<Expr>),
	}

	struct OpInfo {
	prec: i32,
	assoc: Assoc,
	}

	/// Global "constants".
	lazy_static! {
	/// Regex used by the tokenizer to parse numbers and operators.
	static ref REGEX: Regex = Regex::new(r###"\s*(?:(\d+)\|(.))"###).unwrap();
	/// Table of operator to the operator's precedence and association.
	static ref OP_INFO: HashMap<Op, OpInfo> = {
	let mut map = HashMap::new();
	map.insert(Op::Plus, OpInfo { prec: 1, assoc: Assoc::Left });
	map.insert(Op::Minus, OpInfo { prec: 1, assoc: Assoc::Left });
	map.insert(Op::Times, OpInfo { prec: 2, assoc: Assoc::Left });
	map.insert(Op::Divide, OpInfo { prec: 2, assoc: Assoc::Left });
	map.insert(Op::Exponent, OpInfo { prec: 3, assoc: Assoc::Right });

	map
	};
	}

	/// A cursor through valid tokens in a string that moves from left to right.
	/// Example: 1 + (2 * 3)
	///
	/// Gets split into tokens and the current position moves to the right
	/// starting from an empty invalid position.
	///
	/// [1] [+] [(] [2] [*] [3] [)]
	/// ^
	/// \|
	/// curr -------------------------->
	pub struct Tokenizer<'a> {
	curr_token: Option<Token>,
	tokens: CaptureMatches<'static, 'a>,
	}

	impl<'a> Tokenizer<'a> {
	pub fn new(text: &'a str) -> Tokenizer<'a> {
	Tokenizer {
	curr_token: None,
	tokens: REGEX.captures_iter(text),
	}
	}

	/// Returns the token the tokenizer is currently on.
	pub fn curr_token(&self) -> Option<Token> {
	self.curr_token
	}

	/// Moves the position to the next token.
	pub fn get_next_token(&mut self) -> TokenResult<()> {
	if let Some(token) = self.tokens.next() {
	let token = match (token.get(1), token.get(2)) {
	(Some(num), _) => Token::Num(i32::from_str(num.as_str())?),
	(_, Some(op)) if op.as_str() == "(" => Token::LeftParen,
	(_, Some(op)) if op.as_str() == ")" => Token::RightParen,
	(_, Some(op)) if Op::from_str(op.as_str()).is_ok() => {
	let op = Op::from_str(op.as_str())?;
	Token::BinOp(op)
	}
	_ => return Err(TokenErr::InvalidToken),
	};

	self.curr_token = Some(token);
	}

	Ok(())
	}
	}


	/// A precedence climbing parser for
	/// parsing simple mathmatical expressions.
	pub struct Parser<'a> {
	tokenizer: Tokenizer<'a>,
	}

	impl<'a> Parser<'a> {
	pub fn new(text: &'a str) -> ParseResult<Parser<'a>> {
	let mut tokenizer = Tokenizer::new(text);
	tokenizer.get_next_token()?;
	Ok(Parser { tokenizer: tokenizer })
	}

	/// Parses a value which can be on either side of an operator expression,
	/// and moves the tokenizer position to the token after the value.
	///
	/// Examples:
	///
	/// 1 * 2 + 3 => 1 * 2 + 3 , returns Expr::value(1)
	/// ^ ^
	/// \| \|
	/// token pos token pos
	///
	/// 1 + (2 * 3) + 5 => 1 + (2 * 3) + 5 , returns Expr::BinOp(
	/// ^ ^ Op::Times,
	/// \| \| Box::new(Expr::Value(2)),
	/// token pos token pos Box::new(Expr::Value(3))
	/// )
	pub fn parse_value(&mut self) -> ParseResult<Expr> {
	match self.tokenizer.curr_token() {
	Some(Token::LeftParen) => {
	self.tokenizer.get_next_token()?;

	let value_expr = self.parse_expr(1)?;
	if self.tokenizer.curr_token() != Some(Token::RightParen) {
	return Err(ParseErr::UnmatchedParen);
	}

	self.tokenizer.get_next_token()?;
	Ok(value_expr)
	}
	Some(Token::Num(num)) => {
	self.tokenizer.get_next_token()?;
	Ok(Expr::Value(num))
	}
	Some(_) => Err(ParseErr::ExpectedValue),
	None => Err(ParseErr::UnexpectedEnd),
	}
	}

	/// Parses an binary operator expression.
	///
	/// One reason why parsing operators with precedence looks tricky is
	/// because at a certain position you can't know if there's an operator
	/// further ahead with a higher precedence.
	///
	/// For example, parsing 1 + 2 * 3 + 5 while being at the '+' position:
	/// ```
	/// 1 + 2 * 3 + 5 <--- how to know if theres an operator further ahead
	/// ^ of the '+' that has higher precedence?
	/// ```
	///
	/// Recursion is the natural solution to this problem. At the '+' you don't
	/// care about the right hand side, you expect the recursive call to properly
	/// handle the precedence.
	///
	/// ```
	/// 1 + (recursive function yielding an expression) ......(extra stuff)
	/// ^
	/// ```
	///
	/// In this case the recursive function should yield (2 * 3) because (2 * 3) has higher
	/// precedence and '+' is left associative so 1 has to be added to (2 * 3) before 5 is added.
	///
	/// The new expression 1 + (2 * 3) has proper precedence but there is still the + 5 left.
	/// So (1 + (2 * 3)) is made the new left value and '+' the new operator and it repeats:
	///
	/// ```
	/// (1 + (2 * 3)) + (recursive function yielding an expression) ....(extra stuff)
	/// ```
	///
	/// This repeats until there is no more valid binary operators.
	///
	/// This function needs to take the minimum precedence so that the recursive calls can
	/// know when to return early. The minimum precedence starts at 1 in the base call and
	/// increases in the recursive calls based on the operator encountered.
	pub fn parse_expr(&mut self, min_prec: i32) -> ParseResult<Expr> {
	let mut left_value = self.parse_value()?;

	// Continually link the left hand side to the recursively computed
	// right hand side until there is a precedence conflict or an invalid token.
	loop {
	let token = self.tokenizer.curr_token();
	let op = match token {
	Some(Token::BinOp(ref op)) if OP_INFO[op].prec < min_prec => break,
	Some(Token::BinOp(op)) => op,
	_ => break,
	};

	let OpInfo { prec, assoc } = OP_INFO[&op];
	// Left associative operators will pass in precedence + 1 in order
	// to prevent same precedence operators on the right from beating them.
	// Example: Precedence for '+' is 1. 1 + 1 = 2 which only allows '*', '/', and '^'
	// to beat it since they are >= 2.
	let next_min_prec = if assoc == Assoc::Left { prec + 1 } else { prec };

	// Recursively compute the right hand expression and finish the binop expression.
	self.tokenizer.get_next_token()?;
	let right_value = self.parse_expr(next_min_prec)?;
	left_value = Expr::BinOp(op, Box::new(left_value), Box::new(right_value));
	}

	Ok(left_value)
	}
	}