Skip to content

Instantly share code, notes, and snippets.

@Agnishom
Created June 25, 2021 03:18
Show Gist options
  • Save Agnishom/baa2c9150873a42b9bb0b57fa2dbc894 to your computer and use it in GitHub Desktop.
Save Agnishom/baa2c9150873a42b9bb0b57fa2dbc894 to your computer and use it in GitHub Desktop.
{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_GHC -Wall #-}
module Regex.Glushkov where
import Control.Monad.State
import Data.Aeson.Encode.Pretty (encodePretty)
import qualified Data.Text as T
import qualified Data.ByteString.Lazy as BSL
import Regex.Types
import MNRL.Utils
import MNRL.Types (MNRL)
linearize :: Regex a -> Regex (Int, a)
linearize re = evalState (go re) 0 where
go :: Regex a -> State Int (Regex (Int, a))
go Empty = pure Empty
go Epsilon = pure Epsilon
go (Char x) = (get <* modify (+1)) >>= \i -> pure (Char (i, x))
go (Star e) = Star <$> go e
go (Concat es) = Concat <$> traverse go es
go (Union es) = Union <$> traverse go es
chars :: Regex a -> [a]
chars Empty = []
chars Epsilon = []
chars (Char x) = [x]
chars (Concat es) = concatMap chars es
chars (Union es) = concatMap chars es
chars (Star e) = chars e
hasEpsilon :: Regex a -> Bool
hasEpsilon Empty = False
hasEpsilon Epsilon = True
hasEpsilon (Char _) = False
hasEpsilon (Star _) = True
hasEpsilon (Union es) = or $ map hasEpsilon es
hasEpsilon (Concat es) = and $ map hasEpsilon es
firsts :: Regex a -> [a]
firsts Empty = []
firsts Epsilon = []
firsts (Char x) = [x]
firsts (Star e) = firsts e
firsts (Union es) = concatMap firsts es
firsts (Concat []) = []
firsts (Concat (e:es))
| hasEpsilon e = firsts e ++ firsts (Concat es)
| otherwise = firsts e
lasts :: Regex a -> [a]
lasts Empty = []
lasts Epsilon = []
lasts (Char x) = [x]
lasts (Star e) = lasts e
lasts (Union es) = concatMap lasts es
lasts (Concat []) = []
lasts (Concat es)
| hasEpsilon r = lasts r ++ lasts (Concat rs)
| otherwise = lasts r
where
r = last es
rs = init es
fac2s :: Regex a -> [(a, a)]
fac2s Empty = []
fac2s Epsilon = []
fac2s (Char _) = []
fac2s (Union es) = concatMap fac2s es
fac2s (Star e) = fac2s e ++ [(a, b) | a <- lasts e, b <- firsts e]
fac2s (Concat es) = concatMap fac2s es ++ cross es
where
cross :: [Regex a] -> [(a, a)]
cross [] = []
cross [_] = []
cross l = concat $ zipWith (\x y -> [(a, b) | a <- lasts x, b <- firsts y]) l (tail l)
toId :: Int -> T.Text
toId = T.pack . show
statify :: Int -> T.Text -> MNRL -> MNRL
statify i c = addComponent (toId i) (mkHStateComp c)
transify :: (Int, a) -> (Int, a) -> MNRL -> MNRL
transify (i1, _) (i2, _) = addHS2HS (toId i1) (toId i2)
fromRegex :: Regex T.Text -> MNRL
fromRegex reg = addTransitions . addStates $ defaultMNRL
where
lreg = linearize reg
ch = chars lreg
fs = fac2s lreg
addStates = foldr1 (.) $ map (uncurry statify) ch
addTransitions = foldr1 (.) $ map (uncurry transify) fs
rAB, rABstar, rAABstar, rAABstarstar, rBAstar, rExample :: Regex T.Text
rAB = Concat [Char "a", Char "b"]
rABstar = Star rAB
rAABstar = Concat [Char "a", rABstar]
rAABstarstar = Star rAABstar
rBAstar = Star $ Concat [Char "b", Char "a"]
rExample = Union [rAABstarstar, rBAstar]
runExample :: IO ()
runExample = BSL.writeFile "example.mnrl" $ encodePretty (fromRegex rExample)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment