Skip to content

Instantly share code, notes, and snippets.

@xbeta
Last active August 29, 2015 13:57
Show Gist options
  • Save xbeta/9511420 to your computer and use it in GitHub Desktop.
Save xbeta/9511420 to your computer and use it in GitHub Desktop.
Various simple regex parsers
-module(regexp).
-export([run/2]).
-include_lib("eunit/include/eunit.hrl").
-type regexp() :: emptych
| {char, integer()}
| {seq, [regexp()]}
| {star, regexp()}.
run(S, Regexp) ->
case match(S, Regexp, 0) of
{match, _, Pos} -> {match, [{0, Pos}]};
nomatch -> nomatch
end.
match(S, empty, Pos) ->
{match, S, Pos};
match([C|Cs], {char, C}, Pos) ->
{match, Cs, Pos + 1};
match(_, {char, _}, _) ->
nomatch;
match(S, {seq, []}, Pos) ->
{match, S, Pos};
match(S, {seq, [Regexp|Regexps]}, Pos) ->
case match(S, Regexp, Pos) of
{match, S2, Pos2} -> match(S2, {seq, Regexps}, Pos2);
nomatch -> nomatch
end;
match(_, {seq, _}, _) ->
nomatch;
match(S, {star, Regexp}, Pos) ->
case match(S, Regexp, Pos) of
{match, S2, Pos2} -> match_star(S2, Regexp, Pos2);
nomatch -> nomatch
end.
match_star(S, {seq, Regexp}, Pos) ->
case match(S, {seq, Regexp}, Pos) of
{match, S, Pos} -> {match, S, Pos};
nomatch -> {match, S, Pos};
{match, S2, Pos2} -> match_star(S2, {seq, Regexp}, Pos2)
end;
match_star(S, {char, C}, Pos) ->
case match(S, {char, C}, Pos) of
{match, S, Pos} -> {match, S, Pos};
nomatch -> {match, S, Pos};
{match, S2, Pos2} -> match_star(S2, {char, C}, Pos2)
end.
char_test() ->
{match, [{0, 1}]} = run("a", {char, $a}),
nomatch = run("b", {char, $a}).
seq_test() ->
{match, [{0, 2}]} = run("ab", {seq, [{char, $a}, {char, $b}]}).
star_test() ->
{match, [{0, 2}]} = run("ab", {star, {seq, [{char, $a}, {char, $b}]}}).
{-
http://jcreigh.blogspot.com/2006/12/simple-regex-engine-in-haskell.html
-}
import Text.ParserCombinators.Parsec
data Match = MkMatch String String deriving (Show, Eq)
type Matcher = Match -> [Match]
matchOne :: (Char -> Bool) -> Matcher
matchOne _ (MkMatch _ "") = []
matchOne f (MkMatch xs (y:ys))
| f y = [MkMatch (xs ++ [y]) ys]
| otherwise = []
rechar :: Char -> Matcher
rechar c = matchOne (==c)
dot :: Matcher
dot = matchOne (const True)
badstar :: Matcher -> Matcher
badstar f x = (f x >>= badstar f) ++ [x]
star :: Matcher -> Matcher
star f x = (filter (/=x) (f x) >>= star f) ++ [x]
alt :: [Matcher] -> Matcher
alt fs x = concatMap ($ x) fs
ordered :: [Matcher] -> Matcher
ordered fs x = foldl (>>=) [x] fs
plus :: Matcher -> Matcher
plus f = ordered [ f, star f ]
reoptional :: Matcher -> Matcher
reoptional f = alt [ f, return ]
literalchar :: Parser Char
literalchar = letter <|> digit <|> oneOf " ,-_;:'\""
parseregex :: Parser Matcher
parseregex = do
xs <- sepBy1 (many1 atomWithMod) (char '|')
return (alt (map ordered xs))
parsechar :: Parser Matcher
parsechar = literalchar >>= (return . rechar)
atomWithMod :: Parser Matcher
atomWithMod = do
a <- atom
choice (map (\(c, f) -> char c >> return (f a)) mods) <|> return a
where
mods = [('+', plus), ('*', star), ('?', reoptional)]
atom :: Parser Matcher
atom = subexpression <|> charclass <|> wildcard <|> parsechar
subexpression :: Parser Matcher
subexpression = do
char '('
expr <- parseregex
char ')'
return expr
charclass :: Parser Matcher
charclass = do
char '['
xs <- many1 ((try range) <|> parsechar)
char ']'
return (alt xs)
range :: Parser Matcher
range = do
from <- literalchar
char '-'
to <- literalchar
return (matchOne (`elem` [from..to]))
wildcard :: Parser Matcher
wildcard = char '.' >> return dot
matches :: String -> String -> [Match]
matches re str = [MkMatch "" str] >>= (runparser parseregex re)
runparser :: GenParser tok () a -> [tok] -> a
runparser p input = case (parse p "" input) of
Right x -> x
Left err -> error (show err)
public class RegexBuilder {
public boolean isMatch(String s, String p) {
if(p.length() == 0)
return s.length() == 0;
//p's length 1 is special case
if(p.length() == 1 || p.charAt(1) != '*'){
if(s.length() < 1 || (p.charAt(0) != '.' && s.charAt(0) != p.charAt(0)))
return false;
return isMatch(s.substring(1), p.substring(1));
}else{
int len = s.length();
int i = -1;
while(i<len && (i < 0 || p.charAt(0) == '.' || p.charAt(0) == s.charAt(i))){
if(isMatch(s.substring(i+1), p.substring(2)))
return true;
i++;
}
return false;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment