Skip to content

Instantly share code, notes, and snippets.

@awagner83
Created April 4, 2012 02:34
Show Gist options
  • Save awagner83/2297262 to your computer and use it in GitHub Desktop.
Save awagner83/2297262 to your computer and use it in GitHub Desktop.
Simple NFA regex engine in Haskell
import Control.Monad (join)
data Match = LiteralChar Char | AnyChar deriving (Eq)
data Regex = Step Match Regex
| Split Regex Regex
| MatchEnd
deriving (Eq)
-- | Does string match given regular expression?
matches :: String -> Regex -> Bool
matches s r = go s [r]
where go [] rs = any (MatchEnd ==) rs
go _ [] = False
go (x:xs) rs = go xs (advanceState x rs)
-- | Advance one step in concurrent tracking of states
advanceState :: Char -> [Regex] -> [Regex]
advanceState c = join . map go
where go (Step m r) | m `matchesChar` c = [r]
| otherwise = []
go (Split l r) = join [go l, go r]
go MatchEnd = []
matchesChar :: Match -> Char -> Bool
matchesChar (LiteralChar x) c = x == c
matchesChar AnyChar _ = True
-- | Build repeating regex sequence
repeating :: (Regex -> Regex) -> Regex -> Regex
repeating left right = let r = Split (left r) right in r
-- | Literal regex match
literal :: String -> Regex -> Regex
literal (x:[]) r = Step (LiteralChar x) r
literal (x:xs) r = Step (LiteralChar x) (literal xs r)
{- Since we don't have a regex compiler yet, here are some prebuild expressions
to play with -}
matchChar = Step . LiteralChar
-- regex: ab+a
m1 = matchChar 'a' $ repeating (matchChar 'b') $ matchChar 'a' $ MatchEnd
-- regex: a(bb)+a
m2 = matchChar 'a'
$ repeating (matchChar 'b' . matchChar 'b') $ matchChar 'a' $ MatchEnd
-- regex: bob|fred
m3 = Split (literal "bob" $ MatchEnd) (literal "fred" $ MatchEnd)
m4 = repeating (Step AnyChar) $ literal ".txt" $ MatchEnd
test = [ "a" `matches` (matchChar 'a' $ MatchEnd)
, not $ "aa" `matches` (matchChar 'a' $ MatchEnd)
, "aba" `matches` m1
, "abba" `matches` m2
, "bob" `matches` m3
, "fred" `matches` m3
, not $ "frank" `matches` m3
, not $ "freddy" `matches` m3
, "foo.txt" `matches` m4
, not $ "foo.html" `matches` m4]
main = print test
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment