Last active
June 23, 2016 00:15
-
-
Save lambdageek/d397252f07304578a9fc to your computer and use it in GitHub Desktop.
Symmetric version of CommaSep
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE RankNTypes #-} | |
{-# LANGUAGE PolyKinds, TypeOperators, TypeFamilies, KindSignatures, DataKinds, GADTs #-} | |
{-# OPTIONS_GHC -Wall #-} | |
-- | The C Pre-Processor, modeled. | |
-- | |
-- I wanted to understand what happens in the C pre-processor when you put groups of tokens together. | |
-- It turns out that since C99, the pre-processor has "variadic macros" which essentially allow you to treat | |
-- comma-separated sequences of token ("parameter packs" in C spec terms) as some sort of grouping construct. | |
-- | |
-- Initially, parameter packs seem pretty uninspiring... all you can do is capture a variable number of macro arguments and | |
-- then expand them all in a single place. | |
-- | |
-- Clever people, however, figure out that with only a modicum of trickery you can write a macro that will expand | |
-- to a number corresponding to the number of comma-separated groups in a parameter pack (e.g. COUNT(x,y,,z) will expand to 4). | |
-- This turns out to be enough to write a map-like FOREACH (ACTION, ...) macro that will expand the ACTION macro to each | |
-- argument with spaces between each expansion. | |
-- | |
-- It seems like FOREACH is just 'map', but if ACTION expands to an empty token, or if it expands to something containing commas, | |
-- you can get a varying number of things back out. | |
-- | |
-- So this Gist is all about trying to model the C pre-processor in order to understand how FOREACH works... | |
module CommaSep where | |
-- classify tokens as either the empty token, or not | |
data P = Empty | NonEmpty | |
-- token values a with a kind index tracking whether they're populated | |
data Token a :: P -> * where | |
-- the empty token | |
EmptyT :: Token a 'Empty | |
NET :: NonEmptyToken a -> Token a 'NonEmpty | |
-- one or more non-empty tokens | |
data NonEmptyToken a = OneT !a | !(NonEmptyToken a) :*: !(NonEmptyToken a) | |
infixl 7 :*: | |
-- classify groups by whether at lest one comma occurs in them or not | |
data Comma = NoComma | Comma | |
-- comma separate groups of a with kind indices tracking whether the | |
-- leading and trailing tokens are empty, and whether the group has at | |
-- least one comma. | |
data Group a :: P -> Comma -> P -> * where | |
-- the empty group | |
EmptyG :: Group a 'Empty 'NoComma 'Empty | |
-- singleton group of a non-empty token | |
OneG :: !(Token a 'NonEmpty) -> Group a 'NonEmpty 'NoComma 'NonEmpty | |
-- (possibly empty) token comma separated from a comma-separated token group. | |
-- we don't care what happens in the middle (m1 and m2) only at the outer fringes of the tree | |
(:+:) :: !(Group a l c1 m1) -> !(Group a m2 c2 r) -> Group a l 'Comma r | |
infixl 6 :+: | |
-- | join in the "is populated" lattice | |
-- | |
-- One may wonder why there are seemingly redundant cases here, after all the first two | |
-- cases ought to cover all the possibilities for x and therefore the latter two cases seem redundant. | |
-- But this is not so! In the course of typechecking, every even a closed kind like P actually has a third | |
-- possibility, namely "a type variable which we don't know whether it will unify with Empty or NonEmpty". | |
-- We need the latter two cases because in some of the case analysis in the functions below we'll know things | |
-- about 'y' while 'x' is unknown, or vice versa. | |
type family (x :: P) \/ (y :: P) :: P where | |
'Empty \/ a = a | |
'NonEmpty \/ b = 'NonEmpty | |
a \/ 'NonEmpty = 'NonEmpty | |
a \/ 'Empty = a | |
-- when adjoining a token to a group, what happens at the far end of the group | |
-- depends on whether the group has a comma - the comma absorbs any modifications | |
-- T <| [empty group] = T | |
-- T <| left , ...rest... end = T * left , ... rest ... end | |
type family Absorb c old new where | |
Absorb 'Comma old new = old | |
Absorb 'NoComma old new = new | |
-- existential quantification over the rightmost index of a type constructor | |
data ExR (f :: k -> *) :: * where | |
SomeR :: f r -> ExR f | |
-- place a token next to a group with a space between them | |
(<|) :: Token a l -> Group a m c r -> Group a (l \/ m) c (Absorb c r (l \/ r)) | |
EmptyT <| EmptyG = EmptyG | |
t@NET {} <| EmptyG = OneG t | |
EmptyT <| g@OneG {} = g | |
t <| (g1 :+: g2) = (t <| g1) :+: g2 | |
NET t1 <| OneG (NET t2) = OneG (NET (t1 :*: t2)) | |
(|>) :: Group a l c m -> Token a r -> Group a (Absorb c l (r \/ l)) c (r \/ m) | |
EmptyG |> EmptyT = EmptyG | |
EmptyG |> t@NET {} = OneG t | |
OneG (NET t1) |> NET t2 = OneG (NET (t1 :*: t2)) | |
g@OneG {} |> EmptyT = g | |
(g1 :+: g2) |> t = g1 :+: g2 |> t | |
infixl 6 |> | |
infixr 6 <| | |
-- | Join in the comma lattice. | |
type family JoinC c1 c2 where | |
JoinC 'NoComma c2 = c2 | |
JoinC 'Comma c2 = 'Comma | |
JoinC c1 'NoComma = c1 | |
-- | g1 |><| g2 models what happens when you place one group next to another with a space between them. | |
(|><|) :: Group a l c1 m1 -> Group a m2 c2 r -> Group a (Absorb c1 l (m2 \/ l)) (JoinC c1 c2) (Absorb c2 r (m1 \/ r)) | |
EmptyG |><| EmptyG = EmptyG | |
OneG (t@NET{}) |><| g = t <| g | |
g |><| OneG (t@NET{}) = g |> t | |
g@(_ :+: _) |><| EmptyG = g | |
EmptyG |><| g@(_ :+: _) = g | |
(gl :+: g1) |><| (g2 :+: gr) = gl :+: (g1 |><| g2) :+: gr | |
-- | Existentially hide all the indices on a group | |
data SomeGroup a where | |
SomeGroup :: Group a l c r -> SomeGroup a | |
-- | This is the C pre-processor FOREACH (ACTION, ...) macro that applies | |
-- ACTION to each argument in ... and smashes them all together with spaces in between. | |
foreach :: (ExR (Token a) -> SomeGroup b) -> Group a l c r -> SomeGroup b | |
foreach _f EmptyG = SomeGroup EmptyG | |
foreach f (OneG t) = f (SomeR t) | |
foreach f (gl :+: gr) = case foreach f gl of | |
SomeGroup gl' -> case foreach f gr of | |
SomeGroup gr' -> SomeGroup (gl' |><| gr') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment