Created
February 21, 2015 17:54
-
-
Save jimtla/acbc4ce933dd47514f1d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![feature(collections)] | |
#![feature(box_patterns)] | |
#[cfg(feature = "simple")] | |
pub fn matches(regex : &str, target : &str) -> bool { | |
match (regex.slice_shift_char(), target.slice_shift_char()) { | |
// Both strings are emtpy, so we've got a match. | |
(None, None) => true, | |
// We're looking at _*, match the kleene star. | |
(Some((repeater, reg)), _) | |
if reg.len() > 0 && reg.char_at(0) == '*' => { | |
// a*rest is equivalent to (aa*rest)|(rest). r is aa*rest, and then | |
// we check rest separately | |
let mut r = String::from_str(regex); | |
r.insert(0, repeater); | |
matches(&r[..], target) || matches(®[1..], target) | |
}, | |
// . matches any single character | |
(Some(('.', reg)), Some((_, tar))) => matches(reg, tar), | |
// Anything else mathces only an identical character | |
(Some((c, reg)), Some((tc, tar))) if c == tc => matches(reg, tar), | |
// If nothing matches, we're done. | |
_ => false, | |
} | |
} | |
#[derive(Clone, Debug)] | |
enum Regex { | |
Term(RegexTerm), | |
Or(RegexTerm, Box<Regex>), | |
} | |
impl Regex { | |
pub fn matches<F: Fn(&str) -> bool>(&self, target : &str, matches_rest : &F) -> bool { | |
match self { | |
&Term(ref term) => term.matches(target, matches_rest), | |
&Or(ref term, box ref r) => | |
term.matches(target, matches_rest) || r.matches(target, matches_rest), | |
} | |
} | |
} | |
#[derive(Clone, Debug)] | |
enum RegexTerm { | |
Empty, | |
Cons(RegexFactor, Box<RegexTerm>), | |
} | |
impl RegexTerm { | |
pub fn matches<F: Fn(&str) -> bool>(&self, target : &str, matches_rest : &F) -> bool { | |
match self { | |
&Empty => matches_rest(target), | |
&Cons(ref factor, box ref term) => { | |
factor.matches(target, &|rest| { | |
term.matches(rest, matches_rest) | |
}) | |
} | |
} | |
} | |
} | |
#[derive(Clone, Debug)] | |
enum RegexFactor { | |
Star(RegexBase), | |
Plus(RegexBase), | |
Once(RegexBase), | |
} | |
impl RegexFactor { | |
pub fn matches<F: Fn(&str) -> bool>(&self, target : &str, matches_rest : &F) -> bool { | |
match self { | |
&Star(ref base) => { | |
// b*rest == bb*rest|rest | |
matches_rest(target) || | |
base.matches(target, &|rest| { | |
Star(base.clone()).matches(rest, matches_rest) | |
}) | |
}, | |
&Plus(ref base) => { | |
// b+ == bb* | |
base.matches(target, &|rest| { | |
Star(base.clone()).matches(rest, matches_rest) | |
}) | |
}, | |
&Once(ref base) => base.matches(target, matches_rest), | |
} | |
} | |
} | |
#[derive(Clone, Debug)] | |
enum RegexBase { | |
Paren(Box<Regex>), | |
Char(char), | |
Any(String), | |
Dot, | |
} | |
impl RegexBase { | |
pub fn matches<F: Fn(&str) -> bool>(&self, target : &str, matches_rest : &F) -> bool { | |
match (self, target.slice_shift_char()) { | |
(&Paren(box ref regex), _) => | |
regex.matches(target, matches_rest), | |
(&Char(c), Some((tc, rest))) if tc == c => matches_rest(rest), | |
(&Any(ref chars), Some((tc, rest))) | |
if chars.contains_char(tc) => matches_rest(rest), | |
(&Dot, Some((_, rest))) => matches_rest(rest), | |
_ => false | |
} | |
} | |
} | |
use Regex::{Term, Or}; | |
use RegexTerm::{Empty, Cons}; | |
use RegexFactor::{Star, Plus, Once}; | |
use RegexBase::{Char, Dot, Any, Paren}; | |
fn parse_regex(regex : &str) -> (Regex, &str) { | |
let (term, rest) = parse_term(regex); | |
if rest.len() > 0 && rest.char_at(0) == '|' { | |
let (r, rest2) = parse_regex(&rest[0..]); | |
(Or(term, Box::new(r)), rest2) | |
} else if rest.len() > 0 { | |
panic!("Malformed regex {}", rest) | |
} else { | |
(Term(term), rest) | |
} | |
} | |
fn parse_term(term : &str) -> (RegexTerm, &str) { | |
if term.len() == 0 { | |
(Empty, "") | |
} else { | |
let (factor, rest) = parse_factor(term); | |
let (term, rest2) = parse_term(rest); | |
(Cons(factor, Box::new(term)), rest2) | |
} | |
} | |
fn parse_factor(factor : &str) -> (RegexFactor, &str) { | |
let (base, rest) = parse_base(factor); | |
match rest.slice_shift_char() { | |
Some(('*', _)) => (Star(base), &rest[1..]), | |
Some(('+', _)) => (Plus(base), &rest[1..]), | |
_ => (Once(base), rest), | |
} | |
} | |
fn parse_base(factor : &str) -> (RegexBase, &str) { | |
match factor.slice_shift_char() { | |
Some(('.', rest)) => { | |
(Dot, rest) | |
}, | |
Some(('[', rest)) => { | |
let ind = rest.find(']').expect("Mismatched '['"); | |
(Any(String::from_str(&rest[..ind])), &rest[ind+1..]) | |
}, | |
Some(('(', rest)) => { | |
let (x, r) = parse_regex(rest); | |
(Paren(Box::new(x)), r) | |
}, | |
Some((c, rest)) => { | |
(Char(c), rest) | |
}, | |
None => panic!("Malformed regex... parsing empty as base"), | |
} | |
} | |
#[cfg(feature = "parsed")] | |
pub fn matches(regex : &str, target : &str) -> bool { | |
let (regex, rest) = parse_regex(regex); | |
if rest != "" { | |
panic!("Malformed regex {:?}", regex) | |
} | |
regex.matches(target, &(|rest| rest.len() == 0)) | |
} | |
#[test] | |
fn test_matches() { | |
assert!(matches("asdf", "asdf")); | |
assert!(!matches("asdf", "asdfg")); | |
assert!(!matches("asdf", "")); | |
assert!(!matches("asdf", "foo")); | |
assert!(!matches("b", "a")); | |
assert!(matches(".", "f")); | |
assert!(matches(".", "a")); | |
assert!(matches(".", "a")); | |
assert!(matches(".*", "")); | |
assert!(matches(".*", "anything")); | |
assert!(matches("a*", "aaa")); | |
assert!(matches("a*", "aaaaaa")); | |
assert!(matches("a*", "a")); | |
assert!(matches("a*", "")); | |
assert!(matches("b*", "bbbb")); | |
assert!(matches("aab*", "aabbbb")); | |
assert!(matches("aab*", "aa")); | |
assert!(matches(".*@.*", "[email protected]")); | |
assert!(matches(".*@.*", "@")); | |
assert!(matches("a.c", "abc")); | |
assert!(matches("a.c", "a1c")); | |
assert!(!matches("a.c", "ac")); | |
assert!(matches("a*a", "a")); | |
assert!(matches("a*a", "aa")); | |
assert!(matches("a*a", "aaaaaaa")); | |
assert!(!matches("a*a", "")); | |
assert!(matches(".*abc.*", "ababc")); | |
assert!(matches(".*abc.*", "111abc222")); | |
assert!(matches(".*abc.*", "abcab")); | |
} | |
#[test] | |
#[cfg(feature = "parsed")] | |
fn test_extended_features() { | |
assert!(matches("[abc]", "a")); | |
assert!(matches("[abc]", "b")); | |
assert!(matches("[abc]*", "baccab")); | |
assert!(matches("[abc]+", "baccab")); | |
assert!(!matches("[abc]+", "")); | |
assert!(matches("[abc]+xx", "abaxx")); | |
assert!(matches("(abc)+", "abc")); | |
assert!(matches("(abc)+", "abcabc")); | |
assert!(matches("(ab+c)+", "abbbbcabbc")); | |
assert!(matches("abc|d", "abc")); | |
assert!(matches("abc|d", "d")); | |
assert!(matches("(a*|d)e", "de")); | |
assert!(matches("(a*|d)e", "aaae")); | |
assert!(!matches("(a*|d)e", "aaade")); | |
assert!(!matches("(a*|d)e", "aaa")); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment