Created
June 25, 2021 03:18
-
-
Save Agnishom/baa2c9150873a42b9bb0b57fa2dbc894 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings #-} | |
{-# OPTIONS_GHC -Wall #-} | |
module Regex.Glushkov where | |
import Control.Monad.State | |
import Data.Aeson.Encode.Pretty (encodePretty) | |
import qualified Data.Text as T | |
import qualified Data.ByteString.Lazy as BSL | |
import Regex.Types | |
import MNRL.Utils | |
import MNRL.Types (MNRL) | |
linearize :: Regex a -> Regex (Int, a) | |
linearize re = evalState (go re) 0 where | |
go :: Regex a -> State Int (Regex (Int, a)) | |
go Empty = pure Empty | |
go Epsilon = pure Epsilon | |
go (Char x) = (get <* modify (+1)) >>= \i -> pure (Char (i, x)) | |
go (Star e) = Star <$> go e | |
go (Concat es) = Concat <$> traverse go es | |
go (Union es) = Union <$> traverse go es | |
chars :: Regex a -> [a] | |
chars Empty = [] | |
chars Epsilon = [] | |
chars (Char x) = [x] | |
chars (Concat es) = concatMap chars es | |
chars (Union es) = concatMap chars es | |
chars (Star e) = chars e | |
hasEpsilon :: Regex a -> Bool | |
hasEpsilon Empty = False | |
hasEpsilon Epsilon = True | |
hasEpsilon (Char _) = False | |
hasEpsilon (Star _) = True | |
hasEpsilon (Union es) = or $ map hasEpsilon es | |
hasEpsilon (Concat es) = and $ map hasEpsilon es | |
firsts :: Regex a -> [a] | |
firsts Empty = [] | |
firsts Epsilon = [] | |
firsts (Char x) = [x] | |
firsts (Star e) = firsts e | |
firsts (Union es) = concatMap firsts es | |
firsts (Concat []) = [] | |
firsts (Concat (e:es)) | |
| hasEpsilon e = firsts e ++ firsts (Concat es) | |
| otherwise = firsts e | |
lasts :: Regex a -> [a] | |
lasts Empty = [] | |
lasts Epsilon = [] | |
lasts (Char x) = [x] | |
lasts (Star e) = lasts e | |
lasts (Union es) = concatMap lasts es | |
lasts (Concat []) = [] | |
lasts (Concat es) | |
| hasEpsilon r = lasts r ++ lasts (Concat rs) | |
| otherwise = lasts r | |
where | |
r = last es | |
rs = init es | |
fac2s :: Regex a -> [(a, a)] | |
fac2s Empty = [] | |
fac2s Epsilon = [] | |
fac2s (Char _) = [] | |
fac2s (Union es) = concatMap fac2s es | |
fac2s (Star e) = fac2s e ++ [(a, b) | a <- lasts e, b <- firsts e] | |
fac2s (Concat es) = concatMap fac2s es ++ cross es | |
where | |
cross :: [Regex a] -> [(a, a)] | |
cross [] = [] | |
cross [_] = [] | |
cross l = concat $ zipWith (\x y -> [(a, b) | a <- lasts x, b <- firsts y]) l (tail l) | |
toId :: Int -> T.Text | |
toId = T.pack . show | |
statify :: Int -> T.Text -> MNRL -> MNRL | |
statify i c = addComponent (toId i) (mkHStateComp c) | |
transify :: (Int, a) -> (Int, a) -> MNRL -> MNRL | |
transify (i1, _) (i2, _) = addHS2HS (toId i1) (toId i2) | |
fromRegex :: Regex T.Text -> MNRL | |
fromRegex reg = addTransitions . addStates $ defaultMNRL | |
where | |
lreg = linearize reg | |
ch = chars lreg | |
fs = fac2s lreg | |
addStates = foldr1 (.) $ map (uncurry statify) ch | |
addTransitions = foldr1 (.) $ map (uncurry transify) fs | |
rAB, rABstar, rAABstar, rAABstarstar, rBAstar, rExample :: Regex T.Text | |
rAB = Concat [Char "a", Char "b"] | |
rABstar = Star rAB | |
rAABstar = Concat [Char "a", rABstar] | |
rAABstarstar = Star rAABstar | |
rBAstar = Star $ Concat [Char "b", Char "a"] | |
rExample = Union [rAABstarstar, rBAstar] | |
runExample :: IO () | |
runExample = BSL.writeFile "example.mnrl" $ encodePretty (fromRegex rExample) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment