Last active
July 19, 2024 22:41
-
-
Save noughtmare/11d1823d8c46fd731bd3490ac2da5c06 to your computer and use it in GitHub Desktop.
Parsing context free grammars based on https://www.well-typed.com/blog/2020/06/fix-ing-regular-expressions/#references
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE GHC2021, GADTs, DataKinds #-} | |
import Control.Monad (ap, (>=>)) | |
import Control.Applicative | |
import Prelude hiding (or) | |
import Data.Kind | |
import Data.Char | |
type Ctx :: Type | |
type Ctx = [Type] | |
type Var :: Ctx -> Type -> Type | |
data Var ctx a where | |
Here :: Var (a : ctx) a | |
There :: Var ctx a -> Var (b : ctx) a | |
deriving instance Show a => Show (Var ctx a) | |
elimVar :: (b ~ c => a) -> (Var ctx b -> a) -> Var (c : ctx) b -> a | |
elimVar here _ Here = here | |
elimVar _ there (There x) = there x | |
absurdVar :: Var '[] b -> a | |
absurdVar x = case x of {} | |
type Gram :: Ctx -> Type -> Type | |
data Gram ctx a | |
= Fail | |
| Pure a | |
| Char Char (Gram ctx a) | |
| forall x. Var (Var ctx x) (x -> Gram ctx a) | |
| Gram ctx a :|: Gram ctx a | |
| forall x. Fix (Gram (x : ctx) x) (x -> Gram ctx a) | |
deriving instance Functor (Gram ctx) | |
instance Applicative (Gram ctx) where | |
pure = Pure | |
(<*>) = ap | |
instance Alternative (Gram ctx) where | |
empty = Fail | |
Fail <|> q = q | |
p <|> Fail = p | |
p <|> q = p :|: q | |
instance Monad (Gram ctx) where | |
Fail >>= _ = Fail | |
Pure x >>= k = k x | |
Char c k >>= k' = Char c (k >>= k') | |
Var v k >>= k' = Var v (k >=> k') | |
(p :|: q) >>= k = (p >>= k) :|: (q >>= k) | |
Fix p k >>= k' = Fix p (k >=> k') | |
fix' :: Gram (a : ctx) a -> Gram ctx a | |
fix' Fail = Fail | |
fix' (Pure x) = Pure x | |
-- fix' (Char c k) = Char c k | |
-- fix' (Var (There x) k) = Var x | |
fix' x = | |
case trim x of | |
Nothing -> Fix x Pure | |
Just x' -> x' | |
where | |
trim :: Gram (b : ctx) c -> Maybe (Gram ctx c) | |
trim Fail = Just Fail | |
trim (Pure x) = Just (Pure x) | |
trim (Char c k) = Char c <$> trim k | |
trim (Var Here k) = Nothing | |
trim (p :|: q) = (:|:) <$> trim p <*> trim q | |
-- sadly we can't inspect our grammars enough to optimize these cases: | |
trim (Var (There v) k) = Nothing -- Var v <$> trim k | |
trim (Fix p k) = Nothing -- Fix <$> trim p <*> trim k | |
class Mono f where | |
wk :: (forall x. Var ctx x -> Var ctx' x) -> f ctx a -> f ctx' a | |
instance Mono Var where | |
wk f = f | |
instance Mono Gram where | |
wk _ Fail = Fail | |
wk _ (Pure x) = Pure x | |
wk f (Char c k) = Char c (wk f k) | |
wk f (Var v k) = Var (f v) (wk f . k) | |
wk f (g1 :|: g2) = (wk f g1 :|: wk f g2) | |
wk f (Fix g k) = Fix (wk (elimVar Here (There . f)) g) (wk f . k) | |
subst :: (forall x. Var ctx x -> Gram ctx' x) -> Gram ctx a -> Gram ctx' a | |
subst _ Fail = Fail | |
subst _ (Pure x) = Pure x | |
subst k' (Char c k) = Char c (subst k' k) | |
subst k' (Var x k) = k' x >>= subst k' . k | |
subst k' (g1 :|: g2) = (subst k' g1 <|> subst k' g2) | |
subst k' (Fix g k) = fix' (subst (elimVar (Var Here Pure) (wk There . k')) g) >>= (subst k' . k) | |
-- assumes the only change is that elements are added to the list. | |
listfix :: ([a] -> [a]) -> [a] -> [a] | |
listfix f xs = | |
let | |
!n = length xs | |
xs' = f xs | |
in if length xs' > n | |
then listfix f xs' | |
else xs' | |
n :: (forall x. Var ctx x -> [x]) -> Gram ctx a -> [a] | |
n _ Fail = [] | |
n _ (Pure x) = [x] | |
n _ Char{} = [] | |
n ev (Var x k) = ev x >>= n ev . k | |
n ev (g1 :|: g2) = n ev g1 ++ n ev g2 | |
n ev (Fix g k) = listfix (\xs -> n (elimVar xs ev) g) [] >>= n ev . k | |
nullable :: Gram '[] a -> [a] | |
nullable = n absurdVar | |
type DEv :: Ctx -> Type -> Type | |
data DEv ctx a = DEv { ev_n :: [a], ev_d :: Gram ctx a, ev_i :: Gram ctx a } | |
instance Mono DEv where | |
wk f (DEv n d i) = DEv n (wk f d) (wk f i) | |
-- derivative | |
d :: (forall a. Var ctx a -> DEv ctx' a) -> Char -> Gram ctx a -> Gram ctx' a | |
d _ _ Fail = Fail | |
d _ _ Pure{} = Fail | |
d ev c1 (Char c2 k) | |
| c1 == c2 = subst (ev_i . ev) k | |
| otherwise = Fail | |
d ev c (Var x k) = (ev_d (ev x) >>= subst (ev_i . ev) . k) | |
<|> foldr (<|>) Fail (map (d ev c . k) (ev_n (ev x))) | |
d ev c (g1 :|: g2) = d ev c g1 <|> d ev c g2 | |
d ev c (Fix g k) = | |
let xs = n (ev_n . ev) (Fix g Pure) in | |
(fix' ( | |
d (elimVar | |
(DEv | |
xs | |
(Var Here Pure) | |
(fix' | |
(subst | |
(elimVar (Var Here Pure) (wk (There . There) . ev_i . ev)) | |
g))) | |
(wk There . ev)) | |
c | |
g) | |
>>= subst (ev_i . ev) . k) | |
<|> foldr (<|>) Fail (map (d ev c . k) xs) | |
derivative :: Char -> Gram '[] a -> Gram '[] a | |
derivative c x = d absurdVar c x | |
derivatives :: String -> Gram '[] a -> Gram '[] a | |
derivatives s g0 = go s g0 where | |
go [] g = g | |
go (c:cs) g = go cs (derivative c g) | |
parse :: String -> Gram '[] a -> [a] | |
parse s g0 = nullable (derivatives s g0) | |
gfix :: (forall ctx. Var ctx a -> Gram ctx a) -> Gram ctx a | |
gfix f = Fix (f (Here)) Pure | |
data Exp = Atom | Add Exp Exp deriving Show | |
ex :: Gram '[] Exp | |
ex = gfix $ \x -> Var x (\l -> Char '+' (Var x (\r -> Pure (Add l r)))) | |
<|> Char 'x' (Pure Atom) | |
<|> Char '(' (Var x (\z -> Char ')' (Pure z))) | |
var :: Var ctx a -> Gram ctx a | |
var v = Var v Pure | |
char :: Char -> Gram ctx () | |
char c = Char c (Pure ()) | |
digit :: Gram ctx Int | |
digit = foldr (<|>) Fail (map (\i -> i <$ char (intToDigit i)) [0..9]) | |
number :: Gram ctx Int | |
number = fix' $ ((\n d -> 10 * n + d) <$> var Here <*> digit) <|> digit |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment