Created
September 13, 2020 15:03
-
-
Save yangzhixuan/06e341e2e1e2c3851228387e40f804c0 to your computer and use it in GitHub Desktop.
Higher order parser combinator langauge
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| {-# LANGUAGE GADTs, TypeFamilies, DataKinds, TypeOperators, RankNTypes, PolyKinds #-} | |
| module HiParser where | |
| import Control.Monad.Trans.State.Lazy | |
| import GHC.Types | |
| -- All types of the parser language: | |
| -- BaseTy: every Haskell type is a base type of the parser language | |
| -- Arrow: function types of the parser language | |
| data Tys = BaseTy GHC.Types.Type | Arrow Tys Tys | |
| -- Typing context of the parser language | |
| data Ctx = Nil | Cons Tys Ctx | |
| -- |Idx env t| evidences that |t| is in |env| | |
| data Idx (env :: Ctx) (t :: Tys) where | |
| ZeroIdx :: Idx (Cons t ts) t | |
| SuccIdx :: Idx ts t -> Idx (Cons s ts) t | |
| instance Show (Idx env t) where | |
| show = show . idxToInt | |
| idxToInt :: Idx env t -> Int | |
| idxToInt ZeroIdx = 0 | |
| idxToInt (SuccIdx n) = idxToInt n + 1 | |
| -- Syntax of the parser language | |
| -- It's a simply typed lambda calculus with some parsing primitives. | |
| -- Well-typedness is enforced using GADTs and well-scopedness is enforced with De Bruijn index. | |
| data Parser (env :: Ctx) (a :: Tys) where | |
| Base :: a -> Parser env (BaseTy a) | |
| (:<*>:) :: Parser e (BaseTy (a -> b)) -> Parser e (BaseTy a) -> Parser e (BaseTy b) | |
| Branch :: Parser env (BaseTy Bool) -> Parser env a -> Parser env a -> Parser env a | |
| Chr :: Parser env (BaseTy Char) | |
| Var :: Idx env t -> Parser env t | |
| Lam :: Parser (Cons a env) b -> Parser env (Arrow a b) | |
| Bind :: Parser env a -> Parser env (Arrow a b) -> Parser env b | |
| Then :: Parser env a -> Parser env b -> Parser env b | |
| -- Semantics | |
| -------------------- | |
| -- We interpret the semantics of the language with (coarse-grain) call-by-value semantics. | |
| -- Thus every type |t :: Tys| of the language is interpreted as the set |interpT t| of 'values' of the type, | |
| -- and every term |a :: Parser env t| is interpreted as a Kleisli arrow |interpE env -> M (interpT t)| | |
| -- (See Paul Levy's book _Call-By-Push-Value - A Functional/Imperative Synthesis_ for details) | |
| -- Underlying monad for the parser semantics | |
| -- (Maybe the correct monad for parser combinators should be |MaybeT (State String)|???) | |
| type M a = StateT String Maybe a | |
| -- Interpretation of types | |
| type family InterpT (a :: Tys) :: * where | |
| InterpT (BaseTy a) = a | |
| InterpT (Arrow a b) = InterpT a -> M (InterpT b) | |
| -- Interpretation of environments | |
| type family InterpE (a :: Ctx) :: * where | |
| InterpE Nil = () | |
| InterpE (Cons a b) = (InterpT a, InterpE b) | |
| -- Interpretation of terms | |
| runParser :: Parser e a -> InterpE e -> M (InterpT a) | |
| runParser (Base b) _ = return b | |
| runParser Chr _ = StateT f where | |
| f [] = Nothing | |
| f (s:ss) = Just (s, ss) | |
| runParser (Var idx) env = return (prj idx env) | |
| runParser (Lam l) env = return (\a -> runParser l (a, env)) | |
| runParser (Bind x fc) env = do v <- runParser x env | |
| f <- runParser fc env | |
| f v | |
| runParser (Then x y) env = runParser x env >> runParser y env | |
| runParser (Branch bc x y) env = do b <- runParser bc env | |
| if b then runParser x env | |
| else runParser y env | |
| runParser (f :<*>: a) env = runParser f env <*> runParser a env | |
| prj :: Idx e v -> InterpE e -> InterpT v | |
| prj ZeroIdx e = fst e | |
| prj (SuccIdx n) e = prj n (snd e) | |
| -- |CParser t| is a closed parser of (parser-)type |t| | |
| type CParser = Parser Nil | |
| -- |ValParser t| is a closed parser that is capable of parsing Haskell |x| values. | |
| type ValParser x = Parser Nil (BaseTy x) | |
| runValParser :: ValParser x -> String -> Maybe (x, String) | |
| runValParser p s = runStateT (runParser p ()) s | |
| -- p0 is a parser function: if the argument is not '0' it simply returns it back. | |
| -- Otherwise it reads another character and returns it. | |
| p0 :: Parser env (Arrow (BaseTy Char) (BaseTy Char)) | |
| p0 = Lam ( -- a new variable is introduced by de brujin index |ZeroIdx| | |
| Branch ((Base (== '0')) :<*>: (Var ZeroIdx)) | |
| Chr | |
| (Var ZeroIdx)) | |
| -- Parser |p1| reads a character and if it is '0' it then reads another character | |
| p1 :: ValParser Char | |
| p1 = Bind Chr p0 | |
| -- runValParser p1 "" = Nothing | |
| -- runValParser p1 "abcd" = Just ('a', "bcd") | |
| -- runValParser p1 "0bcd" = Just ('b', "cd") | |
| type (a :-> b) = Arrow a b | |
| infixr 0 :-> | |
| -- Composition of functions | |
| pComp :: Parser env ((b :-> c) :-> (a :-> b) :-> a :-> c) | |
| pComp = Lam (Lam (Lam (Bind (Bind (Var ZeroIdx) (Var (SuccIdx ZeroIdx))) | |
| (Var (SuccIdx (SuccIdx ZeroIdx)))))) | |
| -- p0 after p0 | |
| p2 :: ValParser Char | |
| p2 = Bind Chr (Bind p0 (Bind p0 pComp)) | |
| -- runValParser p2 "" = Nothing | |
| -- runValParser p2 "abcd" = Just ('a', "bcd") | |
| -- runValParser p2 "0bcd" = Just ('b', "cd") | |
| -- runValParser p2 "00cd" = Just ('c',"d") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment