lambdageek · June 23, 2016 00:15
diff --git a/CommaSep.hs b/CommaSep.hs
 {-# LANGUAGE RankNTypes #-}
 {-# LANGUAGE PolyKinds, TypeOperators, TypeFamilies, KindSignatures, DataKinds, GADTs #-}
 {-# OPTIONS_GHC -Wall #-}
 -- | The C Pre-Processor, modeled.
 --
 -- I wanted to understand what happens in the C pre-processor when you put groups of tokens together.
 -- It turns out that since C99, the pre-processor has "variadic macros" which essentially allow you to treat
 -- comma-separated sequences of token ("parameter packs" in C spec terms) as some sort of grouping construct.
 --
 -- Initially, parameter packs seem pretty uninspiring... all you can do is capture a variable number of macro arguments and
 -- then expand them all in a single place.
 --
 -- Clever people, however, figure out that with only a modicum of trickery you can write a macro that will expand
 -- to a number corresponding to the number of comma-separated groups in a parameter pack (e.g. COUNT(x,y,,z) will expand to 4).
 -- This turns out to be enough to write a map-like FOREACH (ACTION, ...) macro that will expand the ACTION macro to each
 -- argument with spaces between each expansion.
 --
 -- It seems like FOREACH is just 'map', but if ACTION expands to an empty token, or if it expands to something containing commas,
 -- you can get a varying number of things back out.
 --
 -- So this Gist is all about trying to model the C pre-processor in order to understand how FOREACH works...
 module CommaSep where

 -- classify tokens as either the empty token, or not 
 data P = Empty | NonEmpty

 -- token values a with a kind index tracking whether they're populated
 data Token a :: P -> * where
  -- the empty token
  EmptyT :: Token a 'Empty
  NET :: NonEmptyToken a -> Token a 'NonEmpty

 -- one or more non-empty tokens
 data NonEmptyToken a = OneT !a | !(NonEmptyToken a) :*: !(NonEmptyToken a)

 infixl 7 :*:

 -- classify groups by whether at lest one comma occurs in them or not
 data Comma = NoComma | Comma

 -- comma separate groups of a with kind indices tracking whether the
 -- leading and trailing tokens are empty, and whether the group has at
 -- least one comma.
 data Group a :: P -> Comma -> P -> * where
  -- the empty group
  EmptyG :: Group a 'Empty 'NoComma 'Empty
  -- singleton group of a non-empty token
  OneG :: !(Token a 'NonEmpty) -> Group a 'NonEmpty 'NoComma 'NonEmpty
  -- (possibly empty) token comma separated from a comma-separated token group.
  -- we don't care what happens in the middle (m1 and m2) only at the outer fringes of the tree
  (:+:) :: !(Group a l c1 m1) -> !(Group a m2 c2 r) -> Group a l 'Comma r

 infixl 6 :+:

 -- | join in the "is populated" lattice
 --
 -- One may wonder why there are seemingly redundant cases here, after all the first two
 -- cases ought to cover all the possibilities for x and therefore the latter two cases seem redundant.
 -- But this is not so!  In the course of typechecking, every even a closed kind like P actually has a third
 -- possibility, namely "a type variable which we don't know whether it will unify with Empty or NonEmpty".
 -- We need the latter two cases because in some of the case analysis in the functions below we'll know things
 -- about 'y' while 'x' is unknown, or vice versa. 
 type family (x :: P) \/ (y :: P) :: P where
  'Empty \/ a = a
  'NonEmpty \/ b = 'NonEmpty
  a \/ 'NonEmpty = 'NonEmpty
  a \/ 'Empty = a

 -- when adjoining a token to a group, what happens at the far end of the group
 -- depends on whether the group has a comma - the comma absorbs any modifications
 --   T <| [empty group] = T
 --   T <| left , ...rest... end =  T * left , ... rest ... end
 type family Absorb c old new where
  Absorb 'Comma old new = old
  Absorb 'NoComma old new = new

 -- existential quantification over the rightmost index of a type constructor 
 data ExR (f :: k -> *) :: * where
  SomeR :: f r -> ExR f


 -- place a token next to a group with a space between them
 (<|) :: Token a l -> Group a m c r -> Group a (l \/ m) c (Absorb c r (l \/ r))
 EmptyT   <| EmptyG        = EmptyG
 t@NET {} <| EmptyG        = OneG t
 EmptyT   <| g@OneG {}     = g
 t        <| (g1 :+: g2)   = (t <| g1) :+: g2
 NET t1   <| OneG (NET t2) = OneG (NET (t1 :*: t2))

 (|>) :: Group a l c m -> Token a r -> Group a (Absorb c l (r \/ l)) c (r \/ m)
 EmptyG |> EmptyT = EmptyG
 EmptyG |> t@NET {} = OneG t
 OneG (NET t1) |> NET t2 = OneG (NET (t1 :*: t2))
 g@OneG {} |> EmptyT = g
 (g1 :+: g2) |> t = g1 :+: g2 |> t

 infixl 6 |>
 infixr 6 <|

 -- | Join in the comma lattice.
 type family JoinC c1 c2 where
  JoinC 'NoComma c2 = c2
  JoinC 'Comma c2 = 'Comma
  JoinC c1 'NoComma = c1

 -- | g1 |><| g2 models what happens when you place one group next to another with a space between them.
 (|><|) :: Group a l c1 m1 -> Group a m2 c2 r -> Group a (Absorb c1 l (m2 \/ l)) (JoinC c1 c2) (Absorb c2 r (m1 \/ r))
 EmptyG |><| EmptyG = EmptyG
 OneG (t@NET{}) |><| g = t <| g
 g |><| OneG (t@NET{}) = g |> t
 g@(_ :+: _) |><| EmptyG = g
 EmptyG |><| g@(_ :+: _) = g
 (gl :+: g1) |><| (g2 :+: gr) = gl :+: (g1 |><| g2) :+: gr

 -- | Existentially hide all the indices on a group
 data SomeGroup a where
  SomeGroup :: Group a l c r -> SomeGroup a


 -- | This is the C pre-processor FOREACH (ACTION, ...) macro that applies
 -- ACTION to each argument in ... and smashes them all together with spaces in between.
 foreach :: (ExR (Token a) -> SomeGroup b) -> Group a l c r -> SomeGroup b
 foreach _f EmptyG = SomeGroup EmptyG
 foreach f (OneG t) = f (SomeR t)
 foreach f (gl :+: gr) = case foreach f gl of
   SomeGroup gl' -> case foreach f gr of
     SomeGroup gr' -> SomeGroup (gl' |><| gr')
	{-# LANGUAGE RankNTypes #-}
	{-# LANGUAGE PolyKinds, TypeOperators, TypeFamilies, KindSignatures, DataKinds, GADTs #-}
	{-# OPTIONS_GHC -Wall #-}
	-- \| The C Pre-Processor, modeled.
	--
	-- I wanted to understand what happens in the C pre-processor when you put groups of tokens together.
	-- It turns out that since C99, the pre-processor has "variadic macros" which essentially allow you to treat
	-- comma-separated sequences of token ("parameter packs" in C spec terms) as some sort of grouping construct.
	--
	-- Initially, parameter packs seem pretty uninspiring... all you can do is capture a variable number of macro arguments and
	-- then expand them all in a single place.
	--
	-- Clever people, however, figure out that with only a modicum of trickery you can write a macro that will expand
	-- to a number corresponding to the number of comma-separated groups in a parameter pack (e.g. COUNT(x,y,,z) will expand to 4).
	-- This turns out to be enough to write a map-like FOREACH (ACTION, ...) macro that will expand the ACTION macro to each
	-- argument with spaces between each expansion.
	--
	-- It seems like FOREACH is just 'map', but if ACTION expands to an empty token, or if it expands to something containing commas,
	-- you can get a varying number of things back out.
	--
	-- So this Gist is all about trying to model the C pre-processor in order to understand how FOREACH works...
	module CommaSep where

	-- classify tokens as either the empty token, or not
	data P = Empty \| NonEmpty

	-- token values a with a kind index tracking whether they're populated
	data Token a :: P -> * where
	-- the empty token
	EmptyT :: Token a 'Empty
	NET :: NonEmptyToken a -> Token a 'NonEmpty

	-- one or more non-empty tokens
	data NonEmptyToken a = OneT !a \| !(NonEmptyToken a) :*: !(NonEmptyToken a)

	infixl 7 :*:

	-- classify groups by whether at lest one comma occurs in them or not
	data Comma = NoComma \| Comma

	-- comma separate groups of a with kind indices tracking whether the
	-- leading and trailing tokens are empty, and whether the group has at
	-- least one comma.
	data Group a :: P -> Comma -> P -> * where
	-- the empty group
	EmptyG :: Group a 'Empty 'NoComma 'Empty
	-- singleton group of a non-empty token
	OneG :: !(Token a 'NonEmpty) -> Group a 'NonEmpty 'NoComma 'NonEmpty
	-- (possibly empty) token comma separated from a comma-separated token group.
	-- we don't care what happens in the middle (m1 and m2) only at the outer fringes of the tree
	(:+:) :: !(Group a l c1 m1) -> !(Group a m2 c2 r) -> Group a l 'Comma r

	infixl 6 :+:

	-- \| join in the "is populated" lattice
	--
	-- One may wonder why there are seemingly redundant cases here, after all the first two
	-- cases ought to cover all the possibilities for x and therefore the latter two cases seem redundant.
	-- But this is not so! In the course of typechecking, every even a closed kind like P actually has a third
	-- possibility, namely "a type variable which we don't know whether it will unify with Empty or NonEmpty".
	-- We need the latter two cases because in some of the case analysis in the functions below we'll know things
	-- about 'y' while 'x' is unknown, or vice versa.
	type family (x :: P) \/ (y :: P) :: P where
	'Empty \/ a = a
	'NonEmpty \/ b = 'NonEmpty
	a \/ 'NonEmpty = 'NonEmpty
	a \/ 'Empty = a

	-- when adjoining a token to a group, what happens at the far end of the group
	-- depends on whether the group has a comma - the comma absorbs any modifications
	-- T <\| [empty group] = T
	-- T <\| left , ...rest... end = T * left , ... rest ... end
	type family Absorb c old new where
	Absorb 'Comma old new = old
	Absorb 'NoComma old new = new

	-- existential quantification over the rightmost index of a type constructor
	data ExR (f :: k -> ) :: where
	SomeR :: f r -> ExR f


	-- place a token next to a group with a space between them
	(<\|) :: Token a l -> Group a m c r -> Group a (l \/ m) c (Absorb c r (l \/ r))
	EmptyT <\| EmptyG = EmptyG
	t@NET {} <\| EmptyG = OneG t
	EmptyT <\| g@OneG {} = g
	t <\| (g1 :+: g2) = (t <\| g1) :+: g2
	NET t1 <\| OneG (NET t2) = OneG (NET (t1 :*: t2))

	(\|>) :: Group a l c m -> Token a r -> Group a (Absorb c l (r \/ l)) c (r \/ m)
	EmptyG \|> EmptyT = EmptyG
	EmptyG \|> t@NET {} = OneG t
	OneG (NET t1) \|> NET t2 = OneG (NET (t1 :*: t2))
	g@OneG {} \|> EmptyT = g
	(g1 :+: g2) \|> t = g1 :+: g2 \|> t

	infixl 6 \|>
	infixr 6 <\|

	-- \| Join in the comma lattice.
	type family JoinC c1 c2 where
	JoinC 'NoComma c2 = c2
	JoinC 'Comma c2 = 'Comma
	JoinC c1 'NoComma = c1

	-- \| g1 \|><\| g2 models what happens when you place one group next to another with a space between them.
	(\|><\|) :: Group a l c1 m1 -> Group a m2 c2 r -> Group a (Absorb c1 l (m2 \/ l)) (JoinC c1 c2) (Absorb c2 r (m1 \/ r))
	EmptyG \|><\| EmptyG = EmptyG
	OneG (t@NET{}) \|><\| g = t <\| g
	g \|><\| OneG (t@NET{}) = g \|> t
	g@(_ :+: _) \|><\| EmptyG = g
	EmptyG \|><\| g@(_ :+: _) = g
	(gl :+: g1) \|><\| (g2 :+: gr) = gl :+: (g1 \|><\| g2) :+: gr

	-- \| Existentially hide all the indices on a group
	data SomeGroup a where
	SomeGroup :: Group a l c r -> SomeGroup a


	-- \| This is the C pre-processor FOREACH (ACTION, ...) macro that applies
	-- ACTION to each argument in ... and smashes them all together with spaces in between.
	foreach :: (ExR (Token a) -> SomeGroup b) -> Group a l c r -> SomeGroup b
	foreach _f EmptyG = SomeGroup EmptyG
	foreach f (OneG t) = f (SomeR t)
	foreach f (gl :+: gr) = case foreach f gl of
	SomeGroup gl' -> case foreach f gr of
	SomeGroup gr' -> SomeGroup (gl' \|><\| gr')