Last active
July 25, 2024 09:48
-
-
Save noughtmare/f8ce3a601d30d975d5c6d5ab82ec6728 to your computer and use it in GitHub Desktop.
Parsing data dependent grammars based on https://www.well-typed.com/blog/2020/06/fix-ing-regular-expressions/#references
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE GHC2021, GADTs, DataKinds #-} | |
import Control.Monad (ap, (>=>), replicateM_, guard) | |
import Control.Applicative | |
import Prelude hiding (or) | |
import Data.Kind | |
import Data.Char | |
asumMap :: Alternative f => (a -> f b) -> [a] -> f b | |
asumMap f x = asum (map f x) | |
type Ctx :: Type | |
type Ctx = [Type] | |
type Var :: [k] -> k -> Type | |
data Var ctx a where | |
Here :: Var (a : ctx) a | |
There :: Var ctx a -> Var (c : ctx) a | |
deriving instance Show (Var ctx a) | |
varInt :: Var ctx a -> Int | |
varInt Here = 0 | |
varInt (There v) = 1 + varInt v | |
showVar :: Var ctx a -> String | |
showVar = show . varInt | |
elimVar :: ((x ~ a) => c) -> (Var ctx a -> c) -> Var (x : ctx) a -> c | |
elimVar here _ Here = here | |
elimVar _ there (There x) = there x | |
absurdVar :: Var '[] a -> c | |
absurdVar x = case x of {} | |
-- semantics of functions | |
type Fun a b = a -> [b] | |
type Gram :: Ctx -> Type -> Type | |
data Gram ctx a | |
= Fail | |
| Pure a | |
| Char Char (Gram ctx a) | |
| Gram ctx a :|: Gram ctx a | |
| forall x y. Var (Var ctx (Fun x y)) x (y -> Gram ctx a) | |
| forall x y. Fix (x -> Gram (Fun x y : ctx) y) x (y -> Gram ctx a) | |
deriving instance Functor (Gram ctx) | |
instance Applicative (Gram ctx) where | |
pure = Pure | |
(<*>) = ap | |
instance Alternative (Gram ctx) where | |
empty = Fail | |
Fail <|> q = q | |
-- p <|> Fail = p | |
p <|> q = p :|: q | |
instance Monad (Gram ctx) where | |
Fail >>= _ = Fail | |
Pure x >>= k = k x | |
Char c k >>= k' = Char c (k >>= k') | |
Var v x k >>= k' = Var v x (k >=> k') | |
(p :|: q) >>= k = (p >>= k) :|: (q >>= k) | |
Fix p x k >>= k' = Fix p x (k >=> k') | |
var :: Var ctx (a -> [b]) -> a -> Gram ctx b | |
var v x = Var v x Pure | |
char :: Char -> Gram ctx () | |
char c = Char c (Pure ()) | |
fix :: ((x -> Gram (Fun x y : ctx) y) -> x -> Gram (Fun x y : ctx) y) -> x -> Gram ctx y | |
fix f x = Fix (f (var Here)) x Pure | |
wkGram :: (forall x. Var ctx x -> Var ctx' x) -> Gram ctx a -> Gram ctx' a | |
wkGram f = subst (var . f) | |
-- wkGram _ Fail = Fail | |
-- wkGram _ (Pure x) = Pure x | |
-- wkGram f (Char c k) = Char c (wkGram f k) | |
-- wkGram f (Var v x k) = Var (f v) x (wkGram f . k) | |
-- wkGram f (g1 :|: g2) = (wkGram f g1 :|: wkGram f g2) | |
-- wkGram f (Fix g x k) = Fix (wkGram (elimVar Here (There . f)) . g) x (wkGram f . k) | |
-- Technically it should be possible to do without this substitution function | |
-- because it is only used in computing the identity transformation over | |
-- grammars. That should only reuqire renaming as implemented by wkGram. But | |
-- formulating it this way is slightly easier for now. | |
subst :: (forall x y. Var ctx (Fun x y) -> x -> Gram ctx' y) -> Gram ctx a -> Gram ctx' a | |
subst _ Fail = Fail | |
subst _ (Pure x) = Pure x | |
subst k' (Char c k) = Char c (subst k' k) | |
subst k' (Var v x k) = k' v x >>= subst k' . k | |
subst k' (g1 :|: g2) = subst k' g1 <|> subst k' g2 | |
subst k' (Fix g x0 k) = Fix (subst (elimVar (\x -> Var Here x Pure) (\v -> wkGram There . k' v)) . g) x0 (subst k' . k) | |
type DEv :: Ctx -> Type -> Type | |
data DEv ctx a = DEv { ev_n :: [a], ev_d :: Char -> Gram ctx a, ev_i :: Gram ctx a } | |
deriving Functor | |
instance Applicative (DEv ctx) where | |
pure x = DEv [x] (const empty) (pure x) | |
(<*>) = ap | |
instance Alternative (DEv ctx) where | |
empty = DEv [] (const empty) empty | |
DEv a b c <|> DEv x y z = DEv (a <|> x) (liftA2 (<|>) b y) (c <|> z) | |
instance Monad (DEv ctx) where | |
DEv x y z >>= k = | |
DEv | |
(x >>= ev_n . k) | |
(\c -> (y c >>= ev_i . k) <|> asumMap (\x' -> ev_d (k x') c) x) | |
(z >>= ev_i . k) | |
wkDEv :: (forall x. Var ctx x -> Var ctx' x) -> DEv ctx a -> DEv ctx' a | |
wkDEv f (DEv n d i) = DEv n (wkGram f . d) (wkGram f i) | |
-- The main workhorse | |
sem :: forall ctx ctx' a. (forall x y. Var ctx (Fun x y) -> x -> DEv ctx' y) -> Gram ctx a -> DEv ctx' a | |
sem _ Fail = DEv [] (const Fail) Fail | |
sem _ (Pure x) = DEv [x] (const Fail) (Pure x) | |
sem ev (Char c k) = | |
DEv [] (\c' -> guard (c == c')) (char c) >> sem ev k | |
sem ev (Var v x k) = ev v x >>= sem ev . k | |
sem ev (g1 :|: g2) = sem ev g1 <|> sem ev g2 | |
sem ev (Fix @_ @_ @x @y g x0 k) = | |
let | |
f :: (forall z. Var ctx' z -> Var ctx'' z) -> (Char -> Gram ctx'' y) -> x -> DEv ctx'' y | |
f e p x = DEv | |
(ev_n $ sem (elimVar (\_ -> DEv [] (const Fail) Fail) ev) (g x)) | |
p | |
(wkGram e $ subst (\v -> ev_i . ev v) (Fix g x Pure)) | |
in | |
f id (\c -> | |
Fix | |
(\x -> | |
ev_d (sem (elimVar (\x' -> f There (\_ -> Var Here x' Pure) x') (\v -> wkDEv There . ev v)) (g x)) c) | |
x0 | |
Pure) | |
x0 | |
>>= sem ev . k | |
-- Derived functions | |
nullable :: Gram '[] a -> [a] | |
nullable = ev_n . sem absurdVar | |
derivative :: Char -> Gram '[] a -> Gram '[] a | |
derivative c x = ev_d (sem absurdVar x) c | |
parse :: String -> Gram '[] a -> [a] | |
parse = foldr (\c k -> k . derivative c) nullable | |
-- Examples | |
data Exp = Atom Int | Add Exp Exp | Mul Exp Exp deriving Show | |
digit :: Gram ctx Int | |
digit = asumMap (\i -> i <$ char (intToDigit i)) [0..9] | |
number :: Gram ctx Int | |
number = fix (\p _ -> ((\x d -> 10 * x + d) <$> p () <*> digit) <|> digit) () | |
ex :: Gram ctx Exp | |
ex = fix (\p (a,m) -> Add <$ guard a <*> p (False,True) <* char '+' <*> p (True,True) | |
<|> Mul <$ guard m <*> p (False,False) <* char '*' <*> p (False,True) | |
<|> Atom <$> number | |
<|> char '(' *> p (True,True) <* char ')' | |
) (True, True) | |
ndots :: Gram ctx () | |
--ndots = number >>= \n -> replicateM_ n (char '.') | |
ndots = number >>= | |
fix (\p i -> guard (i > 0) *> char '.' *> p (i - 1) | |
<|> guard (i == 0)) | |
main = do | |
print (parse "1+2*3" ex) | |
print (parse "5....." ndots) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment