suhailshergill · August 29, 2015 14:06
diff --git a/BB_ADT.hs b/BB_ADT.hs
 -- The illustration of the Boehm-Berarducci encoding

 -- This file is the baseline: using ordinary algebraic
 -- data type and operations of constructions, deconstructions
 -- and transformation.
 -- We use the running example of the Exp data type from
 -- tagless-final lectures

 module BB_ADT where

 -- Sample data type of expressions
 data Exp = Lit Int 
 	 | Neg Exp
 	 | Add Exp Exp


 -- Constructing a sample expression
 ti1 = Add (Lit 8) (Neg (Add (Lit 1) (Lit 2)))

 -- A sample consumer
 -- It is structurally recursive
 view:: Exp -> String
 view (Lit n) = show n
 view (Neg e) = "(-" ++ view e ++ ")"
 view (Add e1 e2) = "(" ++ view e1 ++ " + " ++ view e2 ++ ")"

 ti1_view = view ti1
 -- "(8 + (-(1 + 2)))"

 -- Transformer
 -- * Pushing the negation down
 -- Previously, expressions were constructed according to this grammar:
 -- * General grammar of expressions
 -- *     e ::= int | neg e | add e e
 -- *
 -- * Restricted grammar:
 -- *     e ::= factor | add e e
 -- *     factor ::= int | neg int
 -- Now, only integer literals can be negated, and only once.

 -- This function is NOT structurally recursive
 push_neg :: Exp -> Exp
 push_neg e@Lit{} = e
 push_neg e@(Neg (Lit _)) = e
 push_neg (Neg (Neg e)) = push_neg e
 push_neg (Neg (Add e1 e2)) = Add (push_neg (Neg e1)) (push_neg (Neg e2))
 push_neg (Add e1 e2) = Add (push_neg e1) (push_neg e2)

 -- A few sample transformations and their results
 ti1_norm = push_neg ti1
 ti1_norm_view = view ti1_norm
 -- "(8 + ((-1) + (-2)))"

 -- Add an extra negation
 ti1n_norm_view = view (push_neg (Neg ti1))
 -- "((-8) + (1 + 2))"
diff --git a/BB_ExpSYM.hs b/BB_ExpSYM.hs
 {-# LANGUAGE GADTs #-}
 {-# LANGUAGE RankNTypes #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE InstanceSigs #-}
 {-# LANGUAGE KindSignatures #-}
 {-# LANGUAGE TypeFamilies #-}
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE NoMonomorphismRestriction #-}
 -- {-# LANGUAGE ImpredicativeTypes #-}
 {-# LANGUAGE StandaloneDeriving #-}
 {-# LANGUAGE TypeOperators #-}
 -- {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE NoImplicitPrelude #-}
 -- {-# LANGUAGE OverlappingInstances #-}

 module BB_ExpSYM where

 import Control.Monad.IO.Class
 import Control.Monad.Identity
 import Control.Monad.Free
 import Control.Monad.State
 import Control.Monad.Writer
 import Prelude ((.), ($), ($!), Bool(..), (&&), String, (++), Show(..))
 import Control.Applicative
 import qualified Prelude as P

 --------------------------------------------------------------------------------

 -- let's start with final semantics parameterizing on evaluation strategy
 class LamSym repr where
  lam :: (repr a -> repr b) -> repr (a -> b)
  app :: repr (a -> b) -> repr a -> repr b

 -- A convenient abbreviation
 let_ :: LamSym repr => repr a -> (repr a -> repr b) -> repr b
 let_ x y = (lam y) `app` x

 app3 :: LamSym repr
        => repr (a -> b -> c)
        -> repr a -> repr b -> repr c
 app3 x = app . (app x)
 lam3 :: LamSym repr
        => (repr a -> repr b -> repr c)
        -> repr (a -> b -> c)
 lam3 x = lam (lam . x)


 data a :-> b
 infixr 5 :->

 newtype BoolBB = BoolBB {
  unBoolBB :: forall a. (a -> a -> a)
  }

 class BoolSym repr where
  bool :: Bool -> repr BoolBB
  not :: repr BoolBB -> repr BoolBB
  true :: repr BoolBB
  false :: repr BoolBB
  and :: repr BoolBB -> repr BoolBB -> repr BoolBB

 type family Sem (m :: * -> *) a :: *
 type instance Sem m BoolBB = Bool
 type instance Sem m (a -> b) = m (Sem m a) -> m (Sem m b)
 -- 'S' isn't really a monad, but it is up to Sem interpretation. so we can
 -- define a Monad instance which simply applies 'unS' and 'S' as needed. hmm
 -- it's not as straightforward because we need some way to convert a type 'a'
 -- into 'Sem m a', whatever that may mean.
 newtype S l m a = S { unS :: m (Sem m a) }

 newtype BoolSem a = BoolSem { unBoolSem :: Bool }
 instance BoolSym BoolSem where
  bool = BoolSem
  true = bool True
  false = bool False
  not = BoolSem . P.not . unBoolSem
  and x y = BoolSem $ (unBoolSem x) && (unBoolSem y)

 -- let's use BoolSem semantics for (S l (State String))
 withBoolSem :: (Monad m) => BoolSem BoolBB -> m (Sem m BoolBB)
 withBoolSem = return . unBoolSem
 -- the reason we need to give below definition in terms of 'S' is because that
 -- is what ties 'repr BoolBB' to 'Bool'. if we had another way of connecting the
 -- two, then we could possibly define BoolSym in terms of (Monad repr). so one
 -- way of connecting the two would be using the instance constraint, not sure if
 -- this works however
 instance BoolSym (S l (State String)) where
  -- treating 'bool' as compile-time 'constant', so no logging
  bool = S . withBoolSem . bool
  true = S $ do
    modify (++ "True" ++ "; ")
    withBoolSem true
  false = S $ do
    modify (++ "False" ++ "; ")
    withBoolSem false
  not x = S $ do
    arg <- unS x
    modify (++ "(not " ++ show arg ++ "); ")
    withBoolSem . not . BoolSem $ arg

  and x y = S $ do
    a <- unS x
    b <- unS y
    modify (++ "(and " ++ show a ++ " " ++ show b ++ "); ")
    withBoolSem $ (BoolSem a) `and` (BoolSem b)


 data Name
 instance LamSym (S Name (State String)) where
  -- lam :: (S a -> S b) -> S (a -> b)
  -- lam f = S . return $ (unS . f . S)
  lam f = S $ do
    return $ \x -> unS(f(S x))
  -- app :: S (a -> b) -> S a -> S b
  -- app x y = S $ unS x >>= ($ (unS y))
  app x y = S $ do
    a <- unS x
    a (unS y)

 runName :: S Name m a -> m (Sem m a)
 runName = unS
 viewName x = (runState . runName) x "Trace :: "


 data Value

 -- We reuse most of EDSL (S Name) except for lam
 vn :: S Value m x -> S Name m x
 vn = S . unS
 nv :: S Name m x -> S Value m x
 nv = S . unS

 instance LamSym (S Value (State String)) where
  -- lam :: S a -> S b -> S (a -> b)
  -- lam f = S . return $ (\x -> x >>= unS . f . S . return)
  -- S (a -> b) = S{ m(Sem m (a -> b)) }
  -- Sem m (a -> b) = m(Sem m a) -> m(Sem m b)
  -- S (a -> b) = S{ m( m(Sem m a) -> m(Sem m b) ) }
  -- S (B -> (B -> (B -> B)))
  -- S{ m( m(Sem m B) -> m(Sem m (B -> (B -> B))) ) }
  -- S{ m( m(Sem m B) -> m( m(Sem m B) -> m(Sem m (B -> B)) ) ) }
  -- S{ m( m(B) -> m( m(B) -> m( m(B) -> m(B) ) ) ) }
  lam f = S $ do
    return $ \x -> do
      y <- x
      unS(f(S(return y)))
  -- app :: S (a -> b) -> S a -> S b
  app x y = nv $ app (vn x) (vn y)

 runValue :: S Value m a -> m (Sem m a)
 runValue = unS
 viewValue x = (runState . runValue) x "Trace :: "


 -- so this is just plain wrong. it works, but evaluation hasn't been
 -- parameterized
 -- t = (and2 (not2 true2) (not2 true2))
 --
 -- this doesn't work because we're passing in arguments twice
 -- explicitly. basically this needs a CSE pass, which can be done in final style
 -- by building sharing DAG from bottom up during interpretation
 t0 = lam (lam . and)
       `app` (lam not `app` true)
       `app` (lam not `app` true)
 t1 = lam (\x -> let_ (not x)
                $ \y -> y `and` y) `app` true
 t2 = lam (\_ -> lam (\x -> let_ (not x)
                                $ \y -> y `and` y))
     `app` false
     `app` true
diff --git a/BB_LAM.hs b/BB_LAM.hs
 {-# LANGUAGE Rank2Types #-}

 -- The illustration of the Boehm-Berarducci encoding

 -- We re-write an Algebraic Data Type example in BB_ADT.hs
 -- using Boehm-Berarducci encoding to represent the
 -- data type with lambda-terms.

 -- Boehm-Berarducci encoding represents an inductive data type
 -- as a non-recursive but higher-rank data type

 module BB_LAM where

 import BB_ADT (Exp(..))


 -- We define the Boehm-Berarducci encoding of the data type
 -- Exp from BB_ADT.hs in two steps.

 -- First we clarify the signature of the Exp algebra: the
 -- types of the algebra constructors. We represent the signature
 -- as a Haskell record. It is NOT recursive.

 data ExpDict a = ExpDict {
  dlit :: Int -> a
  , dneg :: a   -> a
  , dadd :: a -> a -> a
  }

 -- We can define (a version of) Boehm-Berarducci encoding as
 type ExpBB1 = forall a. (ExpDict a -> a)


 -- To use only applications and abstractions, we curry the record
 -- argument ExpDict a.
 -- The wrapper newtype is needed for the first-class polymorphism
 -- (actually we can go some way without newtype wrapper but eventually
 -- GHC becomes confused. After all, the source language of GHC is not
 -- system F.)
 newtype ExpBB
  = ExpBB {
    unExpBB :: forall a.
       (Int -> a)
       -> (a -> a)
       -> (a -> a -> a)
       -> a
    }

 -- A data type declaration defines type, constructors, deconstructors
 -- and the induction principle (fold). We already have a new type ExpBB,
 -- which represents fold, as will become clear soon. We need constructors:

 lit :: Int -> ExpBB
 lit x = ExpBB $ \dlit dneg dadd -> dlit x

 neg :: ExpBB -> ExpBB
 neg e = ExpBB $ \dlit dneg dadd -> dneg (unExpBB e dlit dneg dadd)

 add :: ExpBB -> ExpBB -> ExpBB
 add e1 e2 = ExpBB $ \dlit dneg dadd ->
 dadd (unExpBB e1 dlit dneg dadd) (unExpBB e2 dlit dneg dadd)

 -- A sample expression: our first running example
 tbb1 = add (lit 8) (neg (add (lit 1) (lit 2)))

 -- A sample consumer
 -- viewBB: can be implemented simply via the induction principle
 -- It is not recursive!
 viewBB:: ExpBB -> String
 viewBB e = unExpBB e dlit dneg dadd
 where
   dlit n = show n
   dneg e = "(-" ++ e ++ ")"
   dadd e1 e2 = "(" ++ e1 ++ " + " ++ e2 ++ ")"

 tbb1v = viewBB tbb1
 -- "(8 + (-(1 + 2)))"


 -- How to define a deconstructor?

 -- To give an intuition, we define the following functions roll and unroll.
 -- The names meant to invoke terms witnessing the isomorphism between an
 -- iso-recursive type and its one-step unrolling.
 -- The deconstructor will use a form of these functions.

 {- The following shows that we can replace type-level induction
   (in BB_ADT.Exp) with a term-level induction.

  The following definitions of roll and unroll are actually present
  in Boehm-Berarducci's paper -- but in a very indirect way.

  The crucial part is Defn 7.1: sort of self-interpreter: v cons nil ~= v
  Informally, it shows that roll . unroll should be equivalent to the
  identity. The equivalence relation (~=) is strictly speaking not
  the equality; in modern terms, (~=) is contextual equivalence.
 -}

 -- First we re-write the signature of the Exp algebra, ExpDic,
 -- in the form of a strictly-positive functor

 data ExpF a = FLit Int
 	    | FNeg a
 	    | FAdd a a

 instance Functor ExpF where
    fmap f (FLit n) = FLit n
    fmap f (FNeg e) = FNeg (f e)
    fmap f (FAdd e1 e2) = FAdd (f e1) (f e2)

 roll :: ExpF ExpBB -> ExpBB
 roll (FLit n) = lit n
 roll (FNeg e) = neg e
 roll (FAdd e1 e2) = add e1 e2

 -- The occurrence of roll below is the source of major inefficiency
 -- as we do repeated pattern-matching.

 unroll :: ExpBB -> ExpF ExpBB
 unroll e = unExpBB e dlit dneg dadd
 where
 dlit :: Int -> ExpF ExpBB
 dlit n = FLit n
 dneg :: ExpF ExpBB -> ExpF ExpBB
 dneg e = FNeg (roll e)
 dadd :: ExpF ExpBB -> ExpF ExpBB -> ExpF ExpBB
 dadd e1 e2 = FAdd (roll e1) (roll e2)

 -- Compare with the signature for ExpBB!
 -- Informally, ExpBB unfolds all the way whereas ExpD unfolds only one step
 newtype ExpD =
 ExpD {
   unED :: forall w.
 	   (Int -> w)
 	   -> (ExpBB -> w)
 	   -> (ExpBB -> ExpBB -> w)
 	   -> w
   }

 decon :: ExpBB -> ExpD
 decon e = unExpBB e dlit dneg dadd
  where
    dlit n = ExpD $ \onlit onneg onadd -> onlit n
    dneg e = ExpD $ \onlit onneg onadd -> onneg (unED e lit neg add)
    dadd e1 e2 = ExpD $ \onlit onneg onadd ->
      onadd (unED e1 lit neg add) (unED e2 lit neg add)


 -- We redo viewBB, using pattern-matching this time, and recursion

 viewBBd:: ExpBB -> String
 viewBBd e = unED (decon e) dlit dneg dadd
  where
    dlit n = show n
    dneg e = "(-" ++ viewBBd e ++ ")"
    dadd e1 e2 = "(" ++ viewBBd e1 ++ " + " ++ viewBBd e2 ++ ")"

 tbb1vd = viewBBd tbb1
 -- "(8 + (-(1 + 2)))"

 -- So, we can do any pattern-matching!
 -- And do non-structurally--recursive processing on ExpBB

 {-
 push_neg :: Exp -> Exp
 push_neg e@Lit{} = e
 push_neg e@(Neg (Lit _)) = e
 push_neg (Neg (Neg e)) = push_neg e
 push_neg (Neg (Add e1 e2)) = Add (push_neg (Neg e1)) (push_neg (Neg e2))
 push_neg (Add e1 e2) = Add (push_neg e1) (push_neg e2)
 -}

 push_negBB :: ExpBB -> ExpBB
 push_negBB e = unED (decon e) dlit dneg dadd
 where
   dlit _ = e
   dneg e2 = unED (decon e2) dlit2 dneg2 dadd2
     where
       dlit2 n = e
       dneg2 e = push_negBB e
       dadd2 e1 e2 = add (push_negBB (neg e1)) (push_negBB (neg e2))
   dadd e1 e2 = add (push_negBB e1) (push_negBB e2)


 tbb1_norm = push_negBB tbb1
 tbb1_norm_viewBB = viewBBd tbb1_norm
 -- "(8 + ((-1) + (-2)))"

 -- Add an extra negation
 tbb1n_norm_viewBB = viewBBd (push_negBB (neg tbb1))
 -- "((-8) + (1 + 2))"


 -- ------------------------------------------------------------------------
 -- A bit of algebra

 -- The types (ExpF a -> a) and ExpDict a are isomorphic.
 -- Hence, the operations of an Exp algebra with the carrier U
 -- could be specified either in the form of a value of the type ExpDict U,
 -- or in the form of a function ExpF U -> U.

 sigma_dict :: (ExpF a -> a) -> ExpDict a
 sigma_dict sigma = ExpDict {
  dlit = \n -> sigma (FLit n)
  , dneg = \e -> sigma (FNeg e)
  , dadd = \e1 e2 -> sigma (FAdd e1 e2)
  }

 dict_sigma :: ExpDict a -> (ExpF a -> a)
 dict_sigma dict (FLit n) = dlit dict n
 dict_sigma dict (FNeg e) = dneg dict e
 dict_sigma dict (FAdd e1 e2) = dadd dict e1 e2


 -- Recall ExpBB1 as an uncurried form of the Boehm-Berarducci encoding.
 -- It follows then the encoding can be written in the equivalent form
 newtype ExpC = ExpC {
  unExpC :: forall a.
 	    (ExpF a -> a)
 	    -> a
  }

 -- ExpC is indeed fully equivalent to ExpBB, and inter-convertible
 h :: ExpDict a -> ExpBB -> a
 h dict e  = unExpBB e (dlit dict) (dneg dict) (dadd dict)

 exp_BB_C :: ExpBB -> ExpC
 exp_BB_C e = ExpC $ \f -> h (sigma_dict f) e

 exp_C_BB :: ExpC -> ExpBB
 exp_C_BB e = ExpBB $ \dlit dneg dadd ->
  unExpC e (dict_sigma (ExpDict dlit dneg dadd))

 -- Let us write the constructors explicitly:

 sigma_expC :: ExpF ExpC -> ExpC
 sigma_expC (FLit n) = ExpC $ \f -> f (FLit n)
 sigma_expC (FNeg e) = ExpC $ \f -> f (FNeg (unExpC e f))
 sigma_expC (FAdd e1 e2) = ExpC $ \f -> f (FAdd (unExpC e1 f) (unExpC e2 f))

 -- Sample term
 t1c = exp_BB_C tbb1

 -- The advantage of ExpC is that functions roll and unroll take
 -- a particularly elegant form
 rollC :: ExpF ExpC -> ExpC
 rollC = sigma_expC

 unrollC :: ExpC -> ExpF ExpC
 unrollC e = unExpC e (fmap sigma_expC)

 -- (ExpC, sigma_ExpC) is the initial algebra of the functor ExpF.
 -- (the same holds for ExpBB since it is isomorphic to ExpC)

 -- Indeed, consider an arbitrary ExpF algebra with the carrier U
 -- and the operations sigma :: ExpF U -> U.
 -- We show that there exists a unique homomorphism
 -- (hc sigma) :: ExpC -> U such that the following holds
 -- hc sigma . sigma_ExpC = sigma . fmap (hc sigma)

 -- Consider x = FLit n :: ExpF ExpC.
 -- Then (sigma . fmap (hc sigma)) x = sigma (FLit n)
 -- Thus we obtain
 -- hc sigma (ExpC $ \f -> f (FLit n)) = sigma (FLit n)
 -- from which we get
 -- hc sigma e = unExpC e sigma

 -- Consider x = FNeg e :: ExpF ExpC.
 -- Then (sigma . fmap (hc sigma)) x = sigma (FNeg (hc sigma e))
 -- Thus we obtain
 -- hc sigma (ExpC $ \f -> f (FNeg (unExpC e f))) = sigma (FNeg (hc sigma e))
 -- Again, the only way to satisfy the equality for all sigma and e is to set
 -- hc sigma e = unExpC e sigma

 -- The case of x = FAdd e1 e2 :: ExpF ExpC is similar.

 -- Thus we get the unique morphism from (ExpC, sigma_ExpC) to
 -- any other ExpF algebra.

 hc :: (ExpF a -> a) -> ExpC -> a
 hc sigma e = unExpC e sigma


 -- Here is the illustration of the morphism.

 -- Define an ExpF algebra,
 -- with Strings as the carrier and the following operations

 sigma_view :: ExpF String -> String
 sigma_view (FLit n) = show n
 sigma_view (FNeg e) = "(-" ++ e ++ ")"
 sigma_view (FAdd e1 e2) = "(" ++ e1 ++ " + " ++ e2 ++ ")"

 -- We immediately get the function to view the values of ExpC type:

 viewC :: ExpC -> String
 viewC = hc sigma_view

 t1c_view = viewC t1c
 -- "(8 + (-(1 + 2)))"

 -- (ExpBB, sigma_ExpBB) is also initial algebra: there is a unique
 -- homomorphism from it to any other Exp algebra. Here is this morphism
 -- h dict . roll = (dict_sigma dict) . fmap (h dict)
diff --git a/car-fstream.lhs b/car-fstream.lhs
 		How to take a TAIL of a functional stream

 Or how to obtain the first few elements of a functional (success/failure
 continuation stream) without first converting it to the regular
 Cons/Nil stream. The latter conversion is unreliable: it diverges for
 a monadic functional stream over a strict monad. Our technique works
 even in the latter case.


 $Id: car-fstream.lhs,v 1.3 2012/04/04 03:40:11 oleg Exp oleg $

 > {-# LANGUAGE Rank2Types #-}
 > module Streams where
 > import Control.Monad.Identity

 The most common way to represent (a potentially infinite) stream is as
 a data structure of the following _recursive_ type:

 > data Stream a = Nil | Cons a (() -> Stream a)

 The thunking is explicit so the definition applies to strict
 languages too. We can also represent a stream pure functionally.
 The following is _not_ a recursive type, but a higher-ranked one.

 > newtype FStream a = SFK (forall ans. SK a ans -> FK ans -> ans)
 > type FK ans = () -> ans            -- failure continuation
 > type SK a ans = a -> FK ans -> ans -- success continuation
 > unSFK (SFK a) = a

 If we ignore the newtype tagging (needed to properly deal with the
 higher-ranked types in Haskell), an FStream is a function of two
 continuations, success and failure continuations. The latter is
 invoked if the stream has no elements. The stream invokes the success
 continuation if the stream has at least one element. The continuation
 receives that element and another failure continuation, to ask for
 more elements. As an example of such a functional stream, see Ralf
 Hinze's ``Deriving Backtracking Monad Transformers'' (ICFP'00).


 Obviously there is a correspondence between the two
 representations. For example:

 > fstream_to_stream fstream = unSFK fstream sk fk
 >    where fk () = Nil
 >	   sk a fk' = Cons a fk'
 >
 > stream_to_fstream Nil = SFK( \sk fk -> fk () )
 > stream_to_fstream (Cons a rest) = 
 >     SFK(\sk fk ->
 >           sk a (\ () -> unSFK (stream_to_fstream (rest ())) sk fk))

 A test data stream

 > s1  = Cons 'a' (\ () -> Cons 'b' (\ () -> Cons 'c' (\ () -> Nil)))
 > f1  = stream_to_fstream s1
 > s1' = fstream_to_stream f1
 >
 > test1 = unSFK f1 (\a k -> a:(k())) (\() -> [])

 	*Streams> test1
 	"abc"

 Mitch Wand and D.Vaillancourt (see ICFP'04) have described the
 correspondence in more formal terms.

 There is an obvious difference between the two representations
 however. It is easy to find the first two elements of a Stream:

 > stream_take:: Int -> Stream a -> [a]
 > stream_take 0 _ = []
 > stream_take n (Cons a r) = a : (stream_take (n-1) $ r ())


 It seems all but impossible to find the first two elements of an
 FStream without first converting it all to Stream. That conversion
 becomes problematic if we had a monadic functional stream:

 > newtype MFStream m a = MSFK (forall ans. MSK m a ans -> MFK m ans -> m ans)
 > type MFK m ans = m ans                     -- failure continuation
 > type MSK m a ans = a -> MFK m ans -> m ans -- success continuation
 > unMSFK (MSFK a) = a
 >
 >
 > mfstream_to_stream:: (Monad m) => MFStream m a -> m (Stream a)
 > mfstream_to_stream mfstream = unMSFK mfstream sk fk
 >    where fk = return Nil
 >	   sk a fk' = fk' >>= (\rest -> return (Cons a (\ () -> rest)))

 if m is a strict monad and fstream is infinite, the conversion to Stream
 diverges. Indeed,

 > nats = nats' 0 where
 >   nats' n = MSFK (\sk fk -> sk n (unMSFK (nats' (n+1)) sk fk))
 >
 > test2 = runIdentity (mfstream_to_stream nats >>= (return . stream_take 5))

  *Streams> test2
  [0,1,2,3,4]

 but for a strict monad like IO:

 > test3 = mfstream_to_stream nats >>= (print . stream_take 5)

 it diverges without printing anything. Indeed, before we can print
 even the first element from nats, we must convert it entirely to a
 stream.


 It turns out, we can split fstream as if it were just a data
 structure. The following function returns an FStream of at most one item.
 The item is a pair of the head of the original stream and of the 
 _rest stream_

 > split_fstream :: FStream a -> FStream (a, FStream a)
 > split_fstream fstream = unSFK fstream ssk caseB 
 >  where caseB () = SFK(\a b -> b ())
 >        ssk v fk = SFK(\a b -> 
 > 		      a (v, (SFK (\s' f' -> 
 > 				  unSFK (fk ())
 >				  (\ (v'',x) _ -> s' v'' 
 >				   (\ () -> unSFK x s' f'))
 >				  f')))
 >		        (\ () -> b ()))
 >
 > f11 = split_fstream f1
 > e1  = unSFK f11 (\ (a,_) _ -> a)  undefined
 > f1r = unSFK f11 (\ (_,r) _ -> r)  undefined
 > f1rs = split_fstream f1r
 > e2   = unSFK f1rs (\ (a,_) _ -> a)  undefined

 Indeed, we can obtain the first two elements from the finite stream f1

 	*Streams> e1
 	'a'
 	*Streams> e2
 	'b'

 Monadic case (quoted almost directly from the paper with Chung-chieh Shan, 
 Amr A. Sabry, Daniel P. Friedman)

 > split_mfstream :: Monad m => MFStream m a -> m (MFStream m (a, MFStream m a))
 > split_mfstream m = unMSFK m ssk caseB
 >  where caseB =    return $ MSFK(\a b -> b)
 >        ssk v fk = return $ MSFK(\a b -> 
 > 		      a (v, (MSFK (\s' f' -> 
 > 				  fk >>= (\r -> unMSFK r
 >				  (\ (v'',x) _ -> s' v'' (unMSFK x s' f'))
 >				  f'))))
 >		        b)


 > mfstream_take:: Monad m => Int -> MFStream m a -> m [a]
 > mfstream_take 0 _ = return []
 > mfstream_take n m = do
 >                     m' <- split_mfstream m
 >                     (v,rs) <- unMSFK m' (\ (v,rs) _ -> return (v,rs) )
 >                               undefined
 >                     rl <- mfstream_take (n-1) rs
 >                     return (v:rl)

 > test2' = runIdentity (mfstream_take 5 nats)

 	*Streams> test2'
 	[0,1,2,3,4]


 > test3' = mfstream_take 5 nats >>= print

 But now also

 	*Streams> test3'
 	[0,1,2,3,4]


diff --git a/CB.hs b/CB.hs
 {-# LANGUAGE TypeFamilies, EmptyDataDecls, TypeOperators #-}
 {-# LANGUAGE FlexibleInstances #-}

 -- Embedding a higher-order domain-specific language (simply-typed
 -- lambda-calculus with constants) with a selectable evaluation order:
 -- Call-by-value, call-by-name, call-by-need in the same Final Tagless framework

 module CB where

 import Data.IORef
 import Control.Monad.IO.Class

 -- Our EDSL is typed. EDSL types are built from the following two
 -- type constructors:
 data IntT
 data a :-> b
 infixr 5 :->

 -- We could have used Haskell's type Int and the arrow -> constructor.
 -- We would like to emphasize however that EDSL types need not be identical
 -- to the host language types. To give the type system to EDSL, we merely
 -- need `labels' -- which is what IntT and :-> are

 -- The (higher-order abstract) syntax of our DSL
 class EDSL exp where
     -- if we were using (->) instead of (:->) this would be
     -- (f a -> f b) -> f (a -> b)
     lam :: (exp a -> exp b) -> exp (a :-> b)
     -- can we make all of this work even if we use (->) instead of (:->) and
     -- ($) instead of (app)?
     app :: exp (a :-> b) -> exp a -> exp b

     int :: Int -> exp IntT             -- Integer literal
     add :: exp IntT -> exp IntT -> exp IntT
     sub :: exp IntT -> exp IntT -> exp IntT

 -- A convenient abbreviation
 let_ :: EDSL exp => exp a -> (exp a -> exp b) -> exp b
 let_ x y = (lam y) `app` x

 -- A sample EDSL term
 t :: EDSL exp => exp IntT
 t = lam (\x -> let_ (x `add` x)
                      $ \y -> y `add` y) `app` int 10

 -- typed BB style booleans?
 true :: EDSL exp => exp (a :-> a :-> a)
 true = lam (\x -> lam (\y -> x))
 false :: EDSL exp => exp (a :-> a :-> a)
 false = lam (\x -> lam (\y -> y))
 -- not2 :: EDSL exp => exp ((a :-> a :-> a) :-> (b :-> b :-> b))
 -- not2 = lam (\x -> x `app` true) `app` _foo

 -- Interpretation of EDSL types as host language types
 -- The type interpretation function Sem is parameterized by 'm',
 -- which is assumed to be a Monad.
 --
 -- HMM: why do we need 'm' to be a monad? right now, 'm' is simply a type
 -- constructor taking one parameter
 -- - if we wanted to use an associated type (family?) then whom would we
 --   associate this with? hmm with 'S l m' perhaps and turn it into a class
 --   (instead of newtype)?
 type family Sem (m :: * -> *) a :: *
 type instance Sem m IntT      = Int
 -- if we wanted to map (->) to (->) (instead of (:->) this is where we'd do it
 type instance Sem m (a :-> b) = m (Sem m a) -> m (Sem m b)

 -- Interpretation of EDSL expressions as values of the host language (Haskell)
 -- An EDSL expression of the type a is interpreted as a Haskell value
 -- of the type S l m a, where m is a Monad (the parameter of the interpretation)
 -- and l is the label for the evaluation order (one of Name, Value, or Lazy).
 -- (S l m) is not quite a monad -- only up to the Sem interpretation
 --
 -- HMM: can we constrain 'l' somehow? and what would be the benefit of doing
 -- that? that we disallow certain other type tags?
 newtype S l m a = S { unS :: m (Sem m a) }




 -- Call-by-name

 data Name
 instance MonadIO m => EDSL (S Name m) where
  int = S . return
  add x y = S $ do a <- unS x
                   b <- unS y
                   liftIO $ putStrLn "Adding"
                   return (a + b)
  sub x y = S $ do a <- unS x
                   b <- unS y
                   liftIO $ putStrLn "Subtracting"
                   return (a - b)
  -- (S . return) :: (m (Sem m a) -> m (Sem m b)) -> S Name m (a :-> b)
  -- (unS . f . S) :: m (Sem m a) -> m (Sem m b)
  -- S :: m (Sem m a) -> S Name m a
  -- f :: S Name m a -> S Name m b
  -- unS :: S Name m b -> m (Sem m b)
  lam f   = S . return $ (unS . f . S)
  -- hmm (app) actually carries out (>>=), so probably can't map it to ($)
  app x y = S $ unS x >>= ($ (unS y))


 -- Tests
 runName :: S Name m a -> m (Sem m a)
 runName = unS

 -- Evaluating:
 -- t = (lam $ \x -> let_ (x `add` x)
 --                       $ \y -> y `add` y) `app` int 10
 -- The addition (x `add` x) is performed twice because y is bound
 -- to a computation, and y is evaluated twice
 t0SN :: IO ()
 t0SN = runName t >>= print
 {-
 Adding
 Adding
 Adding
 40
 -}

 -- A more elaborate example
 t1 :: EDSL exp => exp IntT
 t1 = lam (\x -> let_ (x `add` x)
                  $ \y -> lam $ \z ->
                  z `add` (z `add` (y `add` y))) `app` (int 10 `sub` int 5)
                                                 `app` (int 20 `sub` int 10)

 t1SN :: IO ()
 t1SN = runName t1 >>= print
 {-
 *CB> t1SN
 Subtracting
 Subtracting
 Subtracting
 Subtracting
 Adding
 Subtracting
 Subtracting

 Adding
 Adding
 Adding
 Adding
 40
 -}

 -- A better example
 t2 :: EDSL exp => exp IntT
 t2 = lam (\_ -> lam $ \x -> let_ (x `add` x)
                              $ \y -> y `add` y)
     `app` (int 100 `sub` int 10)
     `app` (int 5 `add` int 5)

 -- The result of subtraction was not needed, and so it was not performed
 -- OTH, (int 5 `add` int 5) was computed four times
 t2SN :: IO ()
 t2SN = runName t2 >>= print
 {-
 *CB> t2SN
 Adding
 Adding
 Adding
 Adding
 Adding
 Adding
 Adding
 40
 -}

 -- Call-by-value

 data Value

 -- We reuse most of EDSL (S Name) except for lam
 vn :: S Value m x -> S Name m x
 vn = S . unS
 nv :: S Name m x -> S Value m x
 nv = S . unS

 instance MonadIO m => EDSL (S Value m) where
    int = nv . int
    add x y = nv $ add (vn x) (vn y)
    sub x y = nv $ sub (vn x) (vn y)
    -- the good news is that the magic doesn't happen in (app), it's really
    -- (lam) which differentiate the various CB schemes
    app x y = nv $ app (vn x) (vn y)
    -- This is the only difference between CBN and CBV:
    -- lam first evaluates its argument, no matter what
    -- This is the definition of CBV after all
    lam f   = S . return $ (\x -> x >>= unS . f . S . return)

 runValue :: S Value m a -> m (Sem m a)
 runValue = unS

 -- We now evaluate the previously written tests t, t1, t2
 -- under the new interpretation

 t0SV :: IO ()
 t0SV = runValue t >>= print
 {-
 *CB> t0SV
 Adding
 Adding
 40
 -}

 t1SV :: IO ()
 t1SV = runValue t1 >>= print
 {-
 *CB> t1SV
 Subtracting
 Adding
 Subtracting
 Adding
 Adding
 Adding
 40
 -}

 -- Although the result of subs-traction was not needed, it was still performed
 -- OTH, (int 5 `add` int 5) was computed only once
 t2SV :: IO ()
 t2SV = runValue t2 >>= print
 {-
 *CB> t2SV
 Subtracting
 Adding
 Adding
 Adding
 40
 -}

 -- Call-by-need

 share :: MonadIO m => m a -> m (m a)
 share m = do
          r <- liftIO $ newIORef (False,m)
          let ac = do
                   (f,m) <- liftIO $ readIORef r
                   if f then m
                      else do -- forget effect (from `app`?) + record it
                           v <- m
                           liftIO $ writeIORef r (True,return v)
                           return v
          return ac

 data Lazy

 -- We reuse most of EDSL (S Name) except for lam
 ln :: S Lazy m x -> S Name m x
 ln = S . unS
 nl :: S Name m x -> S Lazy m x
 nl = S . unS

 instance MonadIO m => EDSL (S Lazy m) where
    int = nl . int
    add x y = nl $ add (ln x) (ln y)
    sub x y = nl $ sub (ln x) (ln y)
    app x y = nl $ app (ln x) (ln y)
    -- This is the only difference between CBN and CBNeed
    -- lam shares its argument, no matter what
    -- This is the definition of CBNeed after all
    lam f           = S . return $ (\x -> share x >>= unS . f . S)


 runLazy :: S Lazy m a -> m (Sem m a)
 runLazy = unS

 -- We now evaluate the previously written tests t, t1, t2
 -- under the new interpretation

 -- Here, Lazy is just as efficient as CBV
 t0SL :: IO ()
 t0SL = runLazy t  >>= print
 {-
 *CB> t0SL
 Adding
 Adding
 40
 -}

 -- Ditto
 t1SL :: IO ()
 t1SL = runLazy t1 >>= print
 {-
 *CB> t1SL
 Subtracting
 Subtracting
 Adding
 Adding
 Adding
 Adding
 40
 -}

 -- Now, Lazy is better than both CBN and CBV: subtraction was not needed,
 -- and it was not performed.
 -- All other expressions were needed, and evaluated once.
 t2SL :: IO ()
 t2SL = runLazy t2 >>= print
 {-
 *CB> t2SL
 Adding
 Adding
 Adding
 40
 -}
diff --git a/CSE_BiMap.hs b/CSE_BiMap.hs
 -- Establishing a bijection between the values of the type a and integers, with
 -- the operations to retrieve the value given its key,
 -- to find the key for the existing value, and to extend the
 -- bijection with a new association.

 -- The type 'a' of values should at least permit equality comparison;
 -- In the present implementation, we require 'a' to be a member
 -- of Ord.

 -- There are many ways to implement bi-maps, for example, using hash tables,
 -- or maps.
 -- Our implementation uses Data.Map and Data.IntMap to record
 -- both parts of the association.

 module CSE_BiMap (
              BiMap, empty,
              lookup_key,
              lookup_val,
              insert,
              size,
             )
    where

 import qualified Data.Map    as M
 import qualified Data.IntMap as IM

 data BiMap a = BiMap (M.Map a Int) (IM.IntMap a)

 -- Find a key for a value
 lookup_key :: Ord a => a -> BiMap a -> Maybe Int
 lookup_key v (BiMap m _) = M.lookup v m

 -- Find a value for a key
 lookup_val :: Int -> BiMap a -> a
 lookup_val k (BiMap _ m) = m IM.! k

 -- Insert the value and return the corresponding key
 -- and the new map
 -- Alas, Map interface does not have an operation to insert and find the index
 -- at the same time (although such an operation is easily possible)
 insert :: Ord a => a -> BiMap a -> (Int, BiMap a)
 insert v (BiMap m im) = (k, BiMap m' im')
 where m'  = M.insert v k m
       im' = IM.insert k v im
       k   = IM.size im

 empty :: BiMap a
 empty = BiMap (M.empty) (IM.empty)

 instance Show a => Show (BiMap a) where
    show (BiMap _ m) =  "BiMap" ++ show (IM.toList m)

 size :: BiMap a -> Int
 size (BiMap _ m) = IM.size m
diff --git a/CSE_ExpF.hs b/CSE_ExpF.hs
 {-# LANGUAGE NoMonomorphismRestriction #-}

 -- * Implicit sharing in the tagless-final style, or:
 -- * seemingly pure hash-consing
 -- The imperative details of hash-consing are hidden better in a
 -- final-tagless style of the DSL embedding, described next.


 module CSE_ExpF (Exp(..),
             ExpI(..),
             R(..),
             N(..),NodeId, DAG(..),
             run_expN,
             do_bench,
             mul,
            ) where

 import Control.Monad.State
 import CSE_BiMap
 import qualified CSE_ExpI as ExpI (Exp(..), sklansky)


 -- * //
 -- * Tagless-final EDSL embedding

 -- In the tagless-final approach, embedded DSL
 -- expressions are built with `constructor functions' such as |constant|,
 -- |variable|, |add| rather than the data constructors |Constant|,
 -- |Variable|, |Add|. The constructor functions yield a representation
 -- for the DSL expression being built. The representation could be a
 -- string (for pretty-printing), an integer (for evaluator), etc. Since
 -- the same DSL expression may be concretely represented in several ways,
 -- the constructor functions are polymorphic, parameterized by the
 -- representation |repr|. In other words, the constructor functions are
 -- the members of the type class

 class Exp repr where
   constant :: Int -> repr Int
   variable :: String -> repr Int
   add      :: repr Int -> repr Int -> repr Int

 -- The data type Exp from ExpI.hs becomes a type class

 -- Our expressions here are of only one type, Int. We could have dropped
 -- Int and made 'repr' to be a type variable of the kind *.
 -- We keep the 'repr Int' notation nevertheless, for consistency
 -- with the tagless final paper, and to allow for extensions (e.g.,
 -- the addition of booleans).

 -- Sample expressions from ExpI.hs now look as follows
 exp_a = add (constant 10) (variable "i1")
 exp_b = add exp_a (variable "i2")

 -- * like those in ExpI.hs modulo the case of the identifiers:
 -- * everything is in lower case.

 -- The multiplication example:
 -- the only two differences is the case of the identifiers
 -- and the type signature
 mul :: Exp repr => Int -> repr Int -> repr Int
 mul 0 _ = constant 0
 mul 1 x = x
 mul n x | n `mod` 2 == 0 = mul (n `div` 2) (add x x)
 mul n x = add x (mul (n-1) x)

 exp_mul4 = mul 4 (variable "i1")
 exp_mul8 = mul 8 (variable "i1")

 -- * Interpreters for our DSL: instances of Exp

 -- The first interpreter interprets a tagless-final expression
 -- as ExpI.Exp data type, converting the DSL expressions here into
 -- the so-called `initial form' of ExpI.hs
 -- This is one way to print the tagless-final expressions, since
 -- we have derived a Show instance for ExpI.Exp

 newtype ExpI t = ExpI (ExpI.Exp)
 -- * Question: why do we need the wrapper?

 instance Exp ExpI where
    constant = ExpI . ExpI.Constant
    variable = ExpI . ExpI.Variable
    add (ExpI x) (ExpI y) = ExpI (ExpI.Add x y)

 test_shb = case exp_b of ExpI e -> e
 -- Add (Add (Constant 10) (Variable "i1")) (Variable "i2")

 test_sh4 = case exp_mul4 of ExpI e -> e
 {-
 Add (Add (Variable "i1") (Variable "i1"))
     (Add (Variable "i1") (Variable "i1"))
 -}

 -- The conversion to ExpI takes the form of an instance
 -- of the class Exp that provides the interpretation for the expression
 -- primitives, as the values for the domain ExpI.

 -- * Another interpreter: the evaluator
 -- That interpretation of tagless-final expressions is not
 -- the only one possible. We can write an evaluator, interpreting
 -- each expression as an element of the domain R

 type REnv = [(String,Int)]
 newtype R t = R{unR :: REnv -> t} -- A reader Monad, actually

 -- that is, an integer in an environment that gives values for
 -- free `variables' that may occur in the expression

 instance Exp R where
    constant x = R (\_ -> x)
    variable x = R (\env -> maybe (error $ "no var: " ++ x) id $
                     lookup x env)
    add e1 e2  = R (\env -> unR e1 env + unR e2 env)


 test_val4 = unR exp_mul4 [("i1",5)] -- 20
 -- * Evaluating sample expressions is good for debugging
 -- * Write once (exp_mul4), interpret many times
 -- We are using exactly the same exp_mul4 as in
 -- test_sh4. We are evaluating it differently. The gist of
 -- the final tagless approach is to write an expression once
 -- and evaluate it many times.

 -- * //
 -- ------------------------------------------------------------------------
 -- * Detecting implicit sharing (common subexpression elimination)

 -- * Goal: detect structurally equal subexpressions and share them,
 -- * converting an expression tree into a DAG
 -- * Idea: rather than convert an ASTree to ASDag, build the
 -- * ASDag to begin with.

 -- * Representing a DAG
 -- a collection of Nodes identified by NodeIds,

 type NodeId = Int

 -- We stress: Node is NOT a recursive data structure, so the comparison
 -- of Node values takes constant time!
 data Node = NConst Int
          | NVar   String
          | NAdd   !NodeId !NodeId
            deriving (Eq,Ord,Show)

 -- we could use several bimaps for different operations (for
 -- addition, subtraction, etc).

 newtype DAG = DAG (BiMap Node) deriving Show

 -- * BiMap -- (partial) bijection a <-> Int
 -- The mapping between |Node|s and |NodeId|s is realized
 -- through a BiMap interface
 -- BiMap a establishes a bijection between the values of the type |a|
 -- and integers, with the operations to retrieve the value given its key,
 -- to find the key for the existing value, and to extend the bijection
 -- with a new association.

 -- * //
 -- * Bottom-up DAG construction
 -- As we construct a node for a subexpression, we check if the DAG already
 -- has the equal node. If so, we return its |NodeId|; otherwise, we add the node
 -- to the DAG.
 -- The computation is stateful (we are using the State monad)

 -- Hash-consing proper: insert Node into the DAG if it isn't
 -- there already, and return its hash code.
 hashcons :: Node -> State DAG NodeId
 hashcons e = do
  DAG m <- get
  case lookup_key e m of
    Nothing -> let (k,m') = insert e m
               in put (DAG m') >> return k
    Just k  -> return k

 -- * Bottom-up DAG construction: tagless-final helps
 -- The bottom-up DAG construction maps well to computing a representation
 -- for a tagless-final expression, which is also evaluated bottom-up. The
 -- DAG construction can therefore be written as a tagless-final
 -- interpreter, an instance of the type class Exp. The interpreter maps
 -- a tagless-final expression to the concrete representation

 -- * Interpreting Exp as a Node in the current DAG
 newtype N t = N{unN :: State DAG NodeId}

 instance Exp N where
  constant x = N(hashcons $ NConst x)
  variable x = N(hashcons $ NVar x)
  add e1 e2  = N(do
                 h1 <- unN e1
                 h2 <- unN e2
                 hashcons $ NAdd h1 h2)

 -- * The state is hidden behind the tagless-final veneer

 -- run the DAG-construction interpreter and the node,
 -- as a reference within a DAG.
 run_expN :: N t -> (NodeId, DAG)
 run_expN (N m) = runState m (DAG empty)

 -- We re-interpret exp_mul4 differently this time.
 -- The DAG-representation makes the sharing patent

 -- A DAG is printed as the list of |(NodeId,Node)| associations. The
 -- sharing of the left and right summands below is patent

 test_sm4 = run_expN exp_mul4
 -- (2,DAG BiMap[(0,NVar "i1"),(1,NAdd 0 0),(2,NAdd 1 1)])

 test_sm8 = run_expN exp_mul8
 -- (3,DAG BiMap[(0,NVar "i1"),(1,NAdd 0 0),(2,NAdd 1 1),(3,NAdd 2 2)])

 -- * We have constructed netlists, topologically sorted
 -- a netlist is a low-level representation of a circuit listing the gates and
 -- their connections, used in circuit manufacturing. Since our BiMap allocated
 -- monotonically increasing NodeIds, the resulting netlist comes out
 -- topologically sorted. Therefore, we can straightforwardly generate machine
 -- code after the standard register allocation.

 -- We retain all the information about exp_mul4. In addition, all
 -- sharing is fully explicit. As we can see, the evaluation process finds
 -- common subexpressions automatically.

 -- * //
 -- ------------------------------------------------------------------------
 -- * Superficially `effectless' common sub-expression elimination

 -- sklansky example by Matthew Naylor, with further credit to
 -- Chalmers folk, Mary Sheeran and Emil Axelsson.
 -- It is said to be similar to scanl1, but contains more parallelism
 -- The function sklansky is defined in ExpI.hs

 -- * To remind what sklansky produces:
 test_sklansky_o n = ExpI.sklansky addition xs
   where
     addition x y = "(" ++ x ++ "+" ++ y ++ ")"
     xs = Prelude.map (("v"++) . show) [1..n]

 -- (v1+v2) is used three times
 {-
 test_sklansky_o 4
 ["v1","(v1+v2)","((v1+v2)+v3)","((v1+v2)+(v3+v4))"]
 -}

 -- (v1+v2) is used seven times
 {-
 test_sklansky_o 8
 ["v1","(v1+v2)",
 "((v1+v2)+v3)",
 "((v1+v2)+(v3+v4))",
 "(((v1+v2)+(v3+v4))+v5)",
 "(((v1+v2)+(v3+v4))+(v5+v6))",
 "(((v1+v2)+(v3+v4))+((v5+v6)+v7))",
 "(((v1+v2)+(v3+v4))+((v5+v6)+(v7+v8)))"]
 -}

 -- * sklansky challenge: sharing across expressions, not within an expression

 -- * We re-write in the tagless-final style
 -- * scratch that: we use sklansky as it was
 -- :t ExpI.sklansky
 -- ExpI.sklansky :: (a -> a -> a) -> [a] -> [a]
 -- Actually, there is no re-write
 -- We use Matthew Naylor's code, in pure Haskell, as it was

 -- * //
 -- We run it differently though
 test_sklansky n = runState sk (DAG empty)
  where
  sk = sequence (map unN (ExpI.sklansky add xs))
  xs = map (variable . show) [1..n]

 -- Implicit sharing works: hash code 2 (for v1+v2) is used three times

 {-
 *ExpF> test_sklansky 4
 ([0,2,4,7],
 DAG BiMap[(0,NVar "1"),(1,NVar "2"),
  (2,NAdd 0 1),(3,NVar "3"),
  (4,NAdd 2 3),(5,NVar "4"),
  (6,NAdd 3 5),(7,NAdd 2 6)])
 -}
 -- We indeed obtained the set of nodes all pointing within the same DAG
 -- The stateful nature was well-hidden till the very end (running)

 -- We see the deep sharing: hash code 2, which is v1+v2, is used twice.
 -- Then code 7, which is (v1+v2)+(v3+v4), is used 4 times
 -- after being computed
 {-
 *ExpF> test_sklansky 8
 ([0,2,4,7,9,12,15,19],
 DAG BiMap[(0,NVar "1"),(1,NVar "2"),
 (2,NAdd 0 1),(3,NVar "3"),
 (4,NAdd 2 3),(5,NVar "4"),
 (6,NAdd 3 5),(7,NAdd 2 6),  -- just as it was for test_sklansky 4
 (8,NVar "5"),(9,NAdd 7 8),
 (10,NVar "6"),(11,NAdd 8 10),
 (12,NAdd 7 11),(13,NVar "7"),
 (14,NAdd 11 13),(15,NAdd 7 14),
 (16,NVar "8"),(17,NAdd 13 16),
 (18,NAdd 11 17),(19,NAdd 7 18)])
 -}

 -- * //
 -- ------------------------------------------------------------------------
 -- * Performance is still a problem

 -- We have demonstrated the sharing detection technique that represents
 -- a DSL program as a DAG, eliminating multiple occurrences of common
 -- subexpressions. Alas, to find all these common subexpressions we have
 -- to examine the entire expression _tree_, which may take long time for large
 -- programs (large circuits).
 -- See ExpLet.hs for the exponential speed-up.

 -- To force the evaluation
 do_bench :: (a,DAG) -> Int
 do_bench (_,DAG d) = size d

 bench_mul n = do_bench $
              run_expN (mul n (variable "i"))


 -- It takes effort to find the common subexpressions, just because
 -- we have to traverse the whole tree.

 bench12 = bench_mul (2^12)
 bench13 = bench_mul (2^13)

 {-
 -- Interpreted code
 *ExpF> bench_mul (2^11)
 12
 (0.06 secs, 4688952 bytes)
 *ExpF> bench_mul (2^12)
 13
 (0.09 secs, 5344760 bytes)
 *ExpF> bench_mul (2^13)
 14
 (0.20 secs, 9791092 bytes)


 -- compiled with -O2
 Prelude ExpF> bench_mul (2^20)
 21
 (0.44 secs, 80752592 bytes)
 Prelude ExpF> bench_mul (2^21)
 22
 (0.87 secs, 160434744 bytes)
 -}


 {-
 *ExpF> test_sklansky 128
 ([0,2,4,7,9,12,15,19,21, ... 560,567,575], ...)
 (0.37 secs, 13650184 bytes)
 -}

 -- Not so bad... The implicit sharing detection is quite fast
 bench_skl n = do_bench $ test_sklansky n

 {-
 -- Interpreted code
 *ExpF> bench_skl 128
 576
 (0.33 secs, 10854396 bytes)
 *ExpF> bench_skl 256
 1280
 (1.79 secs, 38952968 bytes)

 -- compiled with -O2
 Prelude ExpF> bench_skl 128
 576
 (0.02 secs, 2163436 bytes)
 Prelude ExpF> bench_skl 256
 1280
 (0.08 secs, 4333380 bytes)
 Prelude ExpF> bench_skl 512
 2816
 (0.37 secs, 13908376 bytes)
 -}


 main = do
       print test_shb
       print test_sh4

       print test_val4
       print test_sm4
       print test_sm8

       print $ test_sklansky_o 4
       print $ test_sklansky 4
diff --git a/CSE_ExpI.hs b/CSE_ExpI.hs
 -- Implicit sharing (common sub-expression elimination) in
 -- an embedded DSL for arithmetic expressions
 -- The DSL is embedded as a data type
 --
 -- This file is based on the message by Tom Hawkins
 -- posted on Haskell-Cafe on Feb 8, 2008
 -- http://www.haskell.org/pipermail/haskell-cafe/2008-February/039339.html
 -- We fill the missing details so to run the example and exhibit
 -- the problem.

 module CSE_ExpI (Exp(..), sklansky, bench_mul, bench_skl, main) where

 import CSE_BiMap

 -- Tom Hawkins:
 --   My DSLs invariably define a datatype to capture expressions; something
 --   like this:

 -- * //
 -- * A sub-DSL of arithmetic expressions, embedded as a datatype

 data Exp
   = Add Exp Exp			-- one can add Sub, Neg, etc.
   | Variable String
   | Constant Int
   deriving (Eq, Ord, Show)

 -- Sample expressions
 exp_a = Add (Constant 10) (Variable "i1")
 exp_b = Add exp_a (Variable "i2")

 -- We can indeed generate assembly or Verilog from Exp after
 -- a topological sorting
 -- But first we have to detect identical sub-expressions and
 -- share then, converting an ASTree to a DAG

 -- * Common sub-expressions are abundant and easy to create
 -- * Two running examples, absracted from real life

 -- * Example 1: multiplication by a known integer constant
 -- Our DSL does not have the multiplication
 -- operation (many 8-bit CPUs don't have it). We can generate
 -- code for constant multiplication by repeated addition.

 mul :: Int -> Exp -> Exp
 mul 0 _ = Constant 0
 mul 1 x = x
 mul n x | n `mod` 2 == 0 = mul (n `div` 2) (Add x x)
 mul n x = Add x (mul (n-1) x)

 -- Examples of duplication
 exp_mul4 = mul 4 (Variable "i1")
 {-
 Add (Add (Variable "i1") (Variable "i1")) (Add (Variable "i1") (Variable "i1"))
 -}

 exp_mul8 = mul 8 (Variable "i1")
 -- Twice as much duplication
 {-
 Add
 (Add (Add (Variable "i1") (Variable "i1")) (Add (Variable "i1") (Variable "i1")))
 (Add (Add (Variable "i1") (Variable "i1")) (Add (Variable "i1") (Variable "i1")))
 -}

 -- * Question: why do we see all this duplication?
 -- * Wasn't the multiplication algorithm supposed to be optimal?
 -- When we apply something like exp_mul8 to an integer 5, do
 -- we keep repeating 5+5 four times in a row?

 -- * //
 -- The other running example:
 -- * sklansky by Matthew Naylor, with further credit to
 -- * Chalmers folk, Mary Sheeran and Emil Axelsson.
 -- * sklansky add [e1,e2, ... en]
 -- *   ===> [e1, e1+e2, ... e1+e2+...en]
 -- It produces a running sum, with more parallelism and smaller latency.
 -- It is somewhat in the spirit of the optimal multiplication mul above
 -- The original code, from Matthew Naylor's message:
 --  http://www.haskell.org/pipermail/haskell-cafe/2008-February/039671.html


 sklansky :: (a -> a -> a) -> [a] -> [a]
 sklansky f [] = []
 sklansky f [x] = [x]
 sklansky f xs = left' ++ [ f (last left') r | r <- right' ]
  where
    (left, right) = splitAt (length xs `div` 2) xs
    left'  = sklansky f left
    right' = sklansky f right


 test_sklansky_i n = sklansky Add xs
   where
     xs = Prelude.map (Variable . show) [1..n]

 -- * //
 -- * test_sklansky_i 4
 -- (v1+v2) is repeated three times
 {-
  [Variable "1",
   Add (Variable "1") (Variable "2"),
   Add (Add (Variable "1") (Variable "2")) (Variable "3"),
   Add (Add (Variable "1") (Variable "2")) (Add (Variable "3") (Variable "4"))]
 -}
 -- * Common sub-expressions appear across independent expressions

 {-
 Tom Hawkins wrote (Haskell-Cafe on Feb 8, 2008)
   The problem comes when I want to generate efficient code from an
   Exp (ie. to C or some other target language).  The method I use
   involves converting the tree of subexpressions into an acyclic graphic
   to eliminate common subexpressions.  The nodes are then topologically
   ordered and assigned an instruction, or statement for each node....

   The process of converting an expression tree to a graph uses either Eq
   or Ord (either derived or a custom instance) to search and build a set
   of unique nodes to be ordered for execution.  In this case "a", then
   "b", then "c".  The problem is expressions often have shared,
   equivalent subnodes, which dramatically grows the size of the tree.

   As these trees grow in size, the equality comparison in graph
   construction quickly becomes the bottleneck for DSL compilation.
   What's worse, the phase transition from tractable to intractable is
   very sharp.  In one of my DSL programs, I made a seemingly small
   change, and compilation time went from milliseconds to
   not-in-a-million-years.
 -}
 -- Tom Hawkins was about to give up on Haskell
 -- His message was entitled: ``I love purity, but it's killing me''

 -- *
 -- Tom Hawkins's message did not show the algorithm for converting
 -- a tree of sub-expressions into a DAG.
 -- The message does say that the algorithm uses Eq or Ord instances
 -- of Exp
 -- Probably Tom Hawkins adds all sub-expressions of an expression
 -- into a DAG, ensuring that duplicates are not added

 newtype DAG = DAG (BiMap Exp) deriving Show

 -- Enter all sub-expressions of an expression into a DAG,
 -- removing the duplicates
 exp_to_dag :: Exp -> DAG -> DAG
 exp_to_dag e d@(DAG m) | Just _ <- lookup_key e m = d
 exp_to_dag e@(Add e1 e2) d =
    let d1 = exp_to_dag e1 d
        d2 = exp_to_dag e2 d1
        DAG m2  = d2
        (k,m')  = insert e m2
    in DAG m'
 -- No sub-expressions
 exp_to_dag e d = d


 -- There are only two expressions two compute
 dag_4 = exp_to_dag exp_mul4 (DAG empty)
 {-
 DAG BiMap[(0,Add (Variable "i1") (Variable "i1")),
          (1,Add (Add (Variable "i1") (Variable "i1"))
                 (Add (Variable "i1") (Variable "i1")))]
 -}

 dag_8 = exp_to_dag exp_mul8 (DAG empty)

 bench_mul n =
  case exp_to_dag (mul n (Variable "i")) (DAG empty) of DAG d -> size d

 bench20 = bench_mul (2^20)
 bench21 = bench_mul (2^21)

 -- The size of the DAG increased by 1 node; yet it took twice as
 -- much time...
 {-
 -- Interpreted code
 *ExpI> bench20
 20
 (4.51 secs, 1585728 bytes)
 *ExpI> bench21
 21
 (8.76 secs, 530728 bytes)

 -- compiled with -O2
 Prelude ExpI> bench_mul (2^20)
 20
 (0.06 secs, 1053460 bytes)
 Prelude ExpI> bench_mul (2^21)
 21
 (0.11 secs, 1062568 bytes)
 Prelude ExpI> bench_mul (2^22)
 22
 (0.22 secs, 1600568 bytes)
 Prelude ExpI> bench_mul (2^23)
 23
 (0.43 secs, 1068432 bytes)
 -}


 test_skl_dag = foldl (flip exp_to_dag) (DAG empty) (test_sklansky_i 4)

 bench_skl n =
  case foldl (flip exp_to_dag) (DAG empty) (test_sklansky_i n) of
   DAG d -> size d

 {-
 -- Interpreted code
 *ExpI> bench_skl 128
 448
 (0.32 secs, 1637380 bytes)
 *ExpI> bench_skl 256
 1024
 (1.58 secs, 2234148 bytes)

 -- compiled with -O2
 Prelude ExpI> bench_skl 128
 448
 (0.03 secs, 1053144 bytes)
 Prelude ExpI> bench_skl 256
 1024
 (0.12 secs, 2297932 bytes)
 Prelude ExpI> bench_skl 512
 2304
 (0.56 secs, 3419484 bytes)
 -}

 main = do
       print exp_b
       print exp_mul4
       print exp_mul8
       print $ test_sklansky_i 4
       print dag_8
       print $ bench_skl 128
diff --git a/CSE_ExpLet.hs b/CSE_ExpLet.hs
 {-# LANGUAGE NoMonomorphismRestriction #-}

 -- * Adding explicit sharing to our DSL

 module ExpLet where

 import ExpF

 -- * Why do we even need explicit sharing
 -- * Help the human reader (cf. Principia Mathematica)

 -- * Can't we just use Haskell's let?
 -- Haskell's lets is certainly explicit and so clearly
 -- helps the reader. Does it helps CSE eliminators?
 -- Does it help tagless interpreters?

 -- The multiplication-by-4 example written explicitly
 -- * exp_mul4 = mul 4 (variable "i1")

 exp_mul4 = 
 let x = variable "i1" in
 let y = add x x in
 add y y

 -- * Question: Does Haskell guarantee sharing?
 -- * Question: can we rely on memoization?

 tree_mul4 = case exp_mul4 of ExpI t -> t
 {-
  Add (Add (Variable "i1") (Variable "i1")) 
      (Add (Variable "i1") (Variable "i1"))
 -}

 dag_mul4 = run_expN exp_mul4
 -- (2,DAG BiMap[(0,NVar "i1"),(1,NAdd 0 0),(2,NAdd 1 1)])

 -- * //
 -- * Haskell's let does work, sometimes!
 -- The question about Haskell let is non-trivial because
 -- it sometimes works.

 -- To demonstrate how Haskell let helps some interpreters
 -- but not the others, we define two tagless-final
 -- interpreters.

 -- One computes the size of an expression (the number of the
 -- constructors)

 -- * Haskell's let does help some DSL interpreters
 -- * The size interpreter
 newtype Size t = Size Int

 instance Exp Size where
    constant _ = Size 1
    variable _ = Size 1
    add (Size x) (Size y) = Size (x+y+1)

 Size size_mul4 = exp_mul4  -- 7

 Size size_large = mul (2^30) (variable "i1")
 -- 2147483647
 -- Nearly instantaneous: only 0.01 secs

 -- * The print interpreter
 -- Another interpreter will print the expression
 newtype Print t = Print (IO ())
 instance Exp Print where
    constant = Print . putStr . show 
    variable = Print . putStr
    add (Print x) (Print y) = Print (x >> putStr " + " >> y)

 -- This interpreter will take a long time to print
 -- (mul (2^30) (variable "i1")), _even_ if we redirect
 -- the output to /dev/null
 Print print_mul4 = exp_mul4
 -- i1 + i1 + i1 + i1
 -- We see the duplication in the output, thus the duplication
 -- of the effort to print. 
 -- * Print pm30 = mul (2^30) (variable "i1")
 -- Here, running will take a long time, even if we print to /dev/null
 -- So, here Haskell's let did not help!

 -- We thus need a sharing form in the DSL itself

 -- * //
 -- * Adding a new form let_ to our DSL
 -- We extend the DSL language with a new expression form, let_,
 -- to indicate the explicit sharing
 -- Tagless-final interpreters are easily extensible
 class ExpLet repr where
  let_ :: repr a -> (repr a -> repr b) -> repr b

 -- Re-written exp_mul4 using the explicit sharing
 exp_mul4' = 
 let_ (variable "i1") (\x ->
 let_ (add x x)       (\y->
 add y y))

 -- * Extending the existing interpreters with let_
 -- As one might expect, let_ is basically the reverse application
 instance ExpLet R where
  let_ x f = f x

 -- As expected, exp_mul4 with and without explicit sharing evaluate
 -- to the same results. After all, sharing is an optimization,
 -- it should not affect the results of DSL programs
 val_mul4  = unR exp_mul4 [("i1",5)] -- 20
 val_mul4' = unR exp_mul4' [("i1",5)] -- 20

 -- * How to see sharing
 -- We would like to `see' the sharing.
 -- We have to define a show-like function then, an interpreter
 -- of tagless-final expressions as strings.
 -- Actually, we need a bit more than strings: to show sharing
 -- (as let-expressions) we need a way to generate `pointers',
 -- or local variable names.

 type LetVarCount = Int			-- counter for generating let-var-names
 newtype S t = S{unS :: LetVarCount -> String}

 instance Exp S where
    constant  = S . const . show
    variable  = S . const
    add e1 e2 = S(\c -> unS e1 c ++ " + " ++ unS e2 c)

 instance ExpLet S where
    let_ e f  = S(\c -> let vname = "v" ++ show c
 		        in unwords ["let",vname,"=",unS e c,
 				    "in",unS (f (S (const vname))) (succ c)])

 run_expS :: S t -> String
 run_expS (S m) = m 0

 sh_mul4  = run_expS exp_mul4
 -- "i1 + i1 + i1 + i1"

 sh_mul4' = run_expS exp_mul4'
 -- "let v0 = i1 in let v1 = v0 + v0 in v1 + v1"

 -- * //
 -- * Payoff: extending the N interpreter to handle explicit sharing
 -- Sharing a computation means performing a computation and sharing 
 -- result. The code below says exactly that.
 instance ExpLet N where
    let_ e f = N(do
 		 x <- unN e
 		 unN $ f (N (return x)))

 -- Now, we can evaluate exp_mul4' as a DAG
 -- The result is the same as dag_mul4.
 dag_mul4' = run_expN exp_mul4'
 -- (2,DAG BiMap[(0,NVar "i1"),(1,NAdd 0 0),(2,NAdd 1 1)])

 -- * Benchmarking
 -- To really see the difference, we need bigger multiplications

 -- We don't want to write the expressions like |exp_mul4'| by hand,
 -- we want to generate them.
 -- We rewrite the |mul| generator to use the explicit sharing.
 -- The difference from |mul| is on the last-but-one line.
 mul' :: (ExpLet repr, Exp repr) => Int -> repr Int -> repr Int
 mul' 0 _ = constant 0
 mul' 1 x = x
 mul' n x | n `mod` 2 == 0 = let_ x (\x' -> mul' (n `div` 2) (add x' x'))
 mul' n x = add x (mul' (n-1) x)

 -- Is there another way to write mul'?
 -- mul' n x | n `mod` 2 == 0 = let_ (add x x) (\y -> mul' (n `div` 2) y)
 -- "let v0 = i1 + i1 in let v1 = v0 + v0 in v1"

 sh_mul4'' = run_expS (mul' 4 (variable "i1"))
 -- "let v0 = i1 in let v1 = v0 + v0 in v1 + v1"

 -- * //
 -- * Benchmarking. Compare with bench12 and bench13 from ExpF.hs
 bench_mul' n = do_bench $
 	       run_expN (mul' n (variable "i"))

 bench12 = bench_mul' (2^12)
 bench13 = bench_mul' (2^13)

 -- The exponential speed-up is apparent
 {-
 *ExpLet> bench12
 13
 (0.02 secs, 3171764 bytes)
 *ExpLet> bench13
 14
 (0.01 secs, 534548 bytes)
 -}

 -- The mul (2^30) is the expression tree with 2^30 leaves!
 {-
 *ExpLet> bench_mul' (2^20)
 21
 (0.01 secs, 1585752 bytes)
 *ExpLet> bench_mul' (2^30)
 31
 (0.01 secs, 1055932 bytes)
 -}

 -- * Some sharing is left to discover:
 {-
 *ExpLet> run_expS (mul' 15 (variable "i"))
 "i + let v0 = i in v0 + v0 + 
      let v1 = v0 + v0 in v1 + v1 + let v2 = v1 + v1 in v2 + v2"
 -}

 -- It is found and eliminated
 {-
 *ExpLet> run_expN (mul' 15 (variable "i"))
 (6,DAG BiMap[
   (0,NVar "i"),
   (1,NAdd 0 0),
   (2,NAdd 1 1),
   (3,NAdd 2 2),
   (4,NAdd 2 3),
   (5,NAdd 1 4),
   (6,NAdd 0 5)])
 -}

 -- rather quickly:
 {-
 *ExpLet> bench_mul' (2^30-1)
 59
 (0.01 secs, 1078232 bytes)
 -}


 main = do
       print tree_mul4
       print dag_mul4
       print val_mul4
       print val_mul4'
       print sh_mul4'
       print sh_mul4''

       print bench13
diff --git a/ExpSYMFinal.hs b/ExpSYMFinal.hs
 {-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-}
 {-# LANGUAGE RankNTypes #-}

 module ExpSYMFinal where

 import Control.Monad

 -- typeclass representing final encoding of the Exp dsl
 -- 'repr' is the semantic domain
 class ExpSYM repr where
  lit :: Int -> repr
  neg :: repr -> repr
  add :: repr -> repr -> repr

 -- interpret Exp AST in the Int semantic domain
 instance ExpSYM Int where
  lit n = n
  neg e = - e
  add e1 e2 = e1 + e2
 -- run the Int interpretor
 eval :: Int -> Int
 eval = id

 -- interpret Exp AST in the String semantic domain
 instance ExpSYM String where
  lit n = show n
  neg e = "(-" ++ e ++ ")"
  add e1 e2 = "(" ++ e1 ++ " + " ++ e2 ++ ")"
 -- run the String interpretor
 view :: String -> String
 view = id

 tf1 :: forall repr. ExpSYM repr => repr
 tf1 = add (lit 8) (neg (add (lit 1) (lit 2)))

 -- add multiplication operation to the Exp dsl
 class MulSYM repr where
  mul :: repr -> repr -> repr

 -- mul for Int domain
 instance MulSYM Int where
  mul e1 e2 = e1 * e2
 -- mul for String domain
 instance MulSYM String where
  mul e1 e2 = "(" ++ e1 ++ " * " ++ e2 ++ ")"

 tfm1 :: forall repr. (MulSYM repr, ExpSYM repr) => repr
 tfm1 = add (lit 7) (neg (mul (lit 1) (lit 2)))

 -- serialization problem
 data Tree = Leaf String
          | Node String [Tree]
          deriving (Eq, Read, Show)

 instance ExpSYM Tree where
  lit n = Node "Lit" [Leaf $ show n]
  neg e = Node "Neg" [e]
  add e1 e2 = Node "Add" [e1,e2]
 -- run the Tree interpretor
 toTree :: Tree -> Tree
 toTree = id

 tf1_tree :: Tree
 tf1_tree = toTree tf1 -- with the type sig the call to toTree is optional

 type ErrMsg = String
 safeRead :: Read a => String -> Either ErrMsg a
 safeRead s = case reads s of
  [(x, "")] -> Right x
  _ -> Left $ "Read error " ++ s

 -- deserializer, closed recursion style
 fromTree :: (ExpSYM repr) => Tree -> Either ErrMsg repr
 fromTree (Node "Lit" [Leaf n]) = liftM lit $ safeRead n
 fromTree (Node "Neg" [e]) = liftM neg $ fromTree e
 fromTree (Node "Add" [e1, e2]) = liftM2 add (fromTree e1) (fromTree e2)
 fromTree e = Left $ "Invalid tree: " ++ show e

 tf1_eval :: IO ()
 tf1_eval = case fromTree tf1_tree of
  Left e -> putStrLn $ "Error: " ++ e
  Right x -> do
    print $ eval x
    -- below doesn't work!
    -- print $ view x

 -- duplicating interpreter, creates monomorphic copies
 instance (ExpSYM repr, ExpSYM repr') => ExpSYM (repr,repr') where
  lit x = (lit x, lit x)
  neg (e1,e2) = (neg e1, neg e2)
  add (e11,e12) (e21, e22) = (add e11 e21, add e12 e22)
 -- run the duplicating interpreter
 duplicate :: (ExpSYM repr, ExpSYM repr') => (repr,repr') -> (repr,repr')
 duplicate = id

 -- error reporting for deserializer
 check_consume f (Left e) = putStrLn $ "Error: " ++ e
 check_consume f (Right x) = f x

 -- dup_consume makes no restrictions on what the type of x2 is
 dup_consume ev x = print (ev x1) >> return x2
  where (x1,x2) = duplicate x

 -- this works, because we basically instantiate |dup_consume| + 1 x's (three in
 -- this case, hence the name) and then use them appropriately.
 -- TODO: make them use a common copy?
 thrice x = dup_consume eval x >>= dup_consume view >>= print . toTree

 tf1_int3 = check_consume thrice . fromTree $ tf1_tree

 -- deserializer, open recursion style. replace all self-recursive calls to
 -- applications of the explicit reference to the *-version of itself (this will
 -- be filled in later using Y-combinator)
 -- this is needed for the deserializer to be extensible
 fromTree' :: (ExpSYM repr)
            => (Tree -> Either ErrMsg repr)
            -> (Tree -> Either ErrMsg repr)
 fromTree' self (Node "Lit" [Leaf n]) = liftM lit $ safeRead n
 fromTree' self (Node "Neg" [e]) = liftM neg $ self e
 fromTree' self (Node "Add" [e1, e2]) = liftM2 add (self e1) (self e2)
 fromTree' self e = Left $ "Invalid tree: " ++ show e
 -- now tie the not explicitly using Y-combinator
 -- if f takes a first argument which is the *-version of itself then fill that
 -- argument's place
 fix f = f (fix f)
 -- now fromTree' = fromTree (*-version of fromTree)
 fromTree'' = fix fromTree'

 tf1E_int3 = check_consume thrice . fromTree'' $ tf1_tree
 -- this results in error as expected
 tfxE_int3 = check_consume thrice . fromTree'' $
            Node "Lit" [Leaf "1", Leaf "2"]

 -- let's extend the deserializer now to include multiplication
 -- 1] extend wire format for multiplication
 instance MulSYM Tree where
  mul e1 e2 = Node "Mul" [e1,e2]
 -- 2] extend duplicator for multiplication
 instance (MulSYM repr, MulSYM repr') => MulSYM (repr, repr') where
  mul (e11, e12) (e21, e22) = (mul e11 e21, mul e12 e22)
 -- 3] extend serializer
 fromTreeExt self (Node "Mul" [e1,e2]) = liftM2 mul (self e1) (self e2)
 fromTreeExt self e =  fromTree' self e
 fromTreeExt' = fix fromTreeExt
 -- use same 'check_consume' for error reporting and previously defined
 -- computation (i.e., 'thrice') with new interpreter
 tf1'_int3 = check_consume thrice . fromTreeExt' $ tf1_tree
 tfm1_int3 = check_consume thrice . fromTreeExt' $ toTree tfm1

 -- make the context on which the operation depends. in this case it depends on
 -- the sign
 data Ctx = Pos | Neg

 -- interpretor transformer
 instance ExpSYM repr => ExpSYM (Ctx -> repr) where
  -- define how operations in current semantics (ExpSYM) interact with
  -- transformation
  lit n Pos = lit n
  lit n Neg = neg (lit n)
  add e1 e2 ctx = add (e1 ctx) (e2 ctx)
  -- add definition for the transformation. in this case the transformation adds
  -- contextual information where there previously was none. addition is also a
  -- homomorphism wrt push_neg
  neg e Pos = e Neg
  neg e Neg = e Pos
 -- the interpretor for pushing negation down. this needs to supply the base
 -- context
 push_neg :: forall t. (Ctx -> t) -> t
 push_neg e = e Pos

 -- extend the transformer to include multiplication
 instance MulSYM repr => MulSYM (Ctx -> repr) where
  -- it's clear that multiplication is not a homomorphism with respect to
  -- negation. we pattern match on the context
  mul e1 e2 Pos = mul (e1 Pos) (e2 Pos)
  mul e1 e2 Neg = mul (e1 Pos) (e2 Neg)

 main :: IO ()
 main = tf1_eval
	-- The illustration of the Boehm-Berarducci encoding

	-- This file is the baseline: using ordinary algebraic
	-- data type and operations of constructions, deconstructions
	-- and transformation.
	-- We use the running example of the Exp data type from
	-- tagless-final lectures

	module BB_ADT where

	-- Sample data type of expressions
	data Exp = Lit Int
	\| Neg Exp
	\| Add Exp Exp


	-- Constructing a sample expression
	ti1 = Add (Lit 8) (Neg (Add (Lit 1) (Lit 2)))

	-- A sample consumer
	-- It is structurally recursive
	view:: Exp -> String
	view (Lit n) = show n
	view (Neg e) = "(-" ++ view e ++ ")"
	view (Add e1 e2) = "(" ++ view e1 ++ " + " ++ view e2 ++ ")"

	ti1_view = view ti1
	-- "(8 + (-(1 + 2)))"

	-- Transformer
	-- * Pushing the negation down
	-- Previously, expressions were constructed according to this grammar:
	-- * General grammar of expressions
	-- * e ::= int \| neg e \| add e e
	-- *
	-- * Restricted grammar:
	-- * e ::= factor \| add e e
	-- * factor ::= int \| neg int
	-- Now, only integer literals can be negated, and only once.

	-- This function is NOT structurally recursive
	push_neg :: Exp -> Exp
	push_neg e@Lit{} = e
	push_neg e@(Neg (Lit _)) = e
	push_neg (Neg (Neg e)) = push_neg e
	push_neg (Neg (Add e1 e2)) = Add (push_neg (Neg e1)) (push_neg (Neg e2))
	push_neg (Add e1 e2) = Add (push_neg e1) (push_neg e2)

	-- A few sample transformations and their results
	ti1_norm = push_neg ti1
	ti1_norm_view = view ti1_norm
	-- "(8 + ((-1) + (-2)))"

	-- Add an extra negation
	ti1n_norm_view = view (push_neg (Neg ti1))
	-- "((-8) + (1 + 2))"
	{-# LANGUAGE GADTs #-}
	{-# LANGUAGE RankNTypes #-}
	{-# LANGUAGE FlexibleInstances #-}
	{-# LANGUAGE InstanceSigs #-}
	{-# LANGUAGE KindSignatures #-}
	{-# LANGUAGE TypeFamilies #-}
	{-# LANGUAGE FlexibleContexts #-}
	{-# LANGUAGE NoMonomorphismRestriction #-}
	-- {-# LANGUAGE ImpredicativeTypes #-}
	{-# LANGUAGE StandaloneDeriving #-}
	{-# LANGUAGE TypeOperators #-}
	-- {-# LANGUAGE ScopedTypeVariables #-}
	{-# LANGUAGE NoImplicitPrelude #-}
	-- {-# LANGUAGE OverlappingInstances #-}

	module BB_ExpSYM where

	import Control.Monad.IO.Class
	import Control.Monad.Identity
	import Control.Monad.Free
	import Control.Monad.State
	import Control.Monad.Writer
	import Prelude ((.), ($), ($!), Bool(..), (&&), String, (++), Show(..))
	import Control.Applicative
	import qualified Prelude as P

	--------------------------------------------------------------------------------

	-- let's start with final semantics parameterizing on evaluation strategy
	class LamSym repr where
	lam :: (repr a -> repr b) -> repr (a -> b)
	app :: repr (a -> b) -> repr a -> repr b

	-- A convenient abbreviation
	let_ :: LamSym repr => repr a -> (repr a -> repr b) -> repr b
	let_ x y = (lam y) `app` x

	app3 :: LamSym repr
	=> repr (a -> b -> c)
	-> repr a -> repr b -> repr c
	app3 x = app . (app x)
	lam3 :: LamSym repr
	=> (repr a -> repr b -> repr c)
	-> repr (a -> b -> c)
	lam3 x = lam (lam . x)


	data a :-> b
	infixr 5 :->

	newtype BoolBB = BoolBB {
	unBoolBB :: forall a. (a -> a -> a)
	}

	class BoolSym repr where
	bool :: Bool -> repr BoolBB
	not :: repr BoolBB -> repr BoolBB
	true :: repr BoolBB
	false :: repr BoolBB
	and :: repr BoolBB -> repr BoolBB -> repr BoolBB

	type family Sem (m :: * -> ) a ::
	type instance Sem m BoolBB = Bool
	type instance Sem m (a -> b) = m (Sem m a) -> m (Sem m b)
	-- 'S' isn't really a monad, but it is up to Sem interpretation. so we can
	-- define a Monad instance which simply applies 'unS' and 'S' as needed. hmm
	-- it's not as straightforward because we need some way to convert a type 'a'
	-- into 'Sem m a', whatever that may mean.
	newtype S l m a = S { unS :: m (Sem m a) }

	newtype BoolSem a = BoolSem { unBoolSem :: Bool }
	instance BoolSym BoolSem where
	bool = BoolSem
	true = bool True
	false = bool False
	not = BoolSem . P.not . unBoolSem
	and x y = BoolSem $ (unBoolSem x) && (unBoolSem y)

	-- let's use BoolSem semantics for (S l (State String))
	withBoolSem :: (Monad m) => BoolSem BoolBB -> m (Sem m BoolBB)
	withBoolSem = return . unBoolSem
	-- the reason we need to give below definition in terms of 'S' is because that
	-- is what ties 'repr BoolBB' to 'Bool'. if we had another way of connecting the
	-- two, then we could possibly define BoolSym in terms of (Monad repr). so one
	-- way of connecting the two would be using the instance constraint, not sure if
	-- this works however
	instance BoolSym (S l (State String)) where
	-- treating 'bool' as compile-time 'constant', so no logging
	bool = S . withBoolSem . bool
	true = S $ do
	modify (++ "True" ++ "; ")
	withBoolSem true
	false = S $ do
	modify (++ "False" ++ "; ")
	withBoolSem false
	not x = S $ do
	arg <- unS x
	modify (++ "(not " ++ show arg ++ "); ")
	withBoolSem . not . BoolSem $ arg

	and x y = S $ do
	a <- unS x
	b <- unS y
	modify (++ "(and " ++ show a ++ " " ++ show b ++ "); ")
	withBoolSem $ (BoolSem a) `and` (BoolSem b)


	data Name
	instance LamSym (S Name (State String)) where
	-- lam :: (S a -> S b) -> S (a -> b)
	-- lam f = S . return $ (unS . f . S)
	lam f = S $ do
	return $ \x -> unS(f(S x))
	-- app :: S (a -> b) -> S a -> S b
	-- app x y = S $ unS x >>= ($ (unS y))
	app x y = S $ do
	a <- unS x
	a (unS y)

	runName :: S Name m a -> m (Sem m a)
	runName = unS
	viewName x = (runState . runName) x "Trace :: "


	data Value

	-- We reuse most of EDSL (S Name) except for lam
	vn :: S Value m x -> S Name m x
	vn = S . unS
	nv :: S Name m x -> S Value m x
	nv = S . unS

	instance LamSym (S Value (State String)) where
	-- lam :: S a -> S b -> S (a -> b)
	-- lam f = S . return $ (\x -> x >>= unS . f . S . return)
	-- S (a -> b) = S{ m(Sem m (a -> b)) }
	-- Sem m (a -> b) = m(Sem m a) -> m(Sem m b)
	-- S (a -> b) = S{ m( m(Sem m a) -> m(Sem m b) ) }
	-- S (B -> (B -> (B -> B)))
	-- S{ m( m(Sem m B) -> m(Sem m (B -> (B -> B))) ) }
	-- S{ m( m(Sem m B) -> m( m(Sem m B) -> m(Sem m (B -> B)) ) ) }
	-- S{ m( m(B) -> m( m(B) -> m( m(B) -> m(B) ) ) ) }
	lam f = S $ do
	return $ \x -> do
	y <- x
	unS(f(S(return y)))
	-- app :: S (a -> b) -> S a -> S b
	app x y = nv $ app (vn x) (vn y)

	runValue :: S Value m a -> m (Sem m a)
	runValue = unS
	viewValue x = (runState . runValue) x "Trace :: "


	-- so this is just plain wrong. it works, but evaluation hasn't been
	-- parameterized
	-- t = (and2 (not2 true2) (not2 true2))
	--
	-- this doesn't work because we're passing in arguments twice
	-- explicitly. basically this needs a CSE pass, which can be done in final style
	-- by building sharing DAG from bottom up during interpretation
	t0 = lam (lam . and)
	`app` (lam not `app` true)
	`app` (lam not `app` true)
	t1 = lam (\x -> let_ (not x)
	$ \y -> y `and` y) `app` true
	t2 = lam (\_ -> lam (\x -> let_ (not x)
	$ \y -> y `and` y))
	`app` false
	`app` true
	{-# LANGUAGE Rank2Types #-}

	-- The illustration of the Boehm-Berarducci encoding

	-- We re-write an Algebraic Data Type example in BB_ADT.hs
	-- using Boehm-Berarducci encoding to represent the
	-- data type with lambda-terms.

	-- Boehm-Berarducci encoding represents an inductive data type
	-- as a non-recursive but higher-rank data type

	module BB_LAM where

	import BB_ADT (Exp(..))


	-- We define the Boehm-Berarducci encoding of the data type
	-- Exp from BB_ADT.hs in two steps.

	-- First we clarify the signature of the Exp algebra: the
	-- types of the algebra constructors. We represent the signature
	-- as a Haskell record. It is NOT recursive.

	data ExpDict a = ExpDict {
	dlit :: Int -> a
	, dneg :: a -> a
	, dadd :: a -> a -> a
	}

	-- We can define (a version of) Boehm-Berarducci encoding as
	type ExpBB1 = forall a. (ExpDict a -> a)


	-- To use only applications and abstractions, we curry the record
	-- argument ExpDict a.
	-- The wrapper newtype is needed for the first-class polymorphism
	-- (actually we can go some way without newtype wrapper but eventually
	-- GHC becomes confused. After all, the source language of GHC is not
	-- system F.)
	newtype ExpBB
	= ExpBB {
	unExpBB :: forall a.
	(Int -> a)
	-> (a -> a)
	-> (a -> a -> a)
	-> a
	}

	-- A data type declaration defines type, constructors, deconstructors
	-- and the induction principle (fold). We already have a new type ExpBB,
	-- which represents fold, as will become clear soon. We need constructors:

	lit :: Int -> ExpBB
	lit x = ExpBB $ \dlit dneg dadd -> dlit x

	neg :: ExpBB -> ExpBB
	neg e = ExpBB $ \dlit dneg dadd -> dneg (unExpBB e dlit dneg dadd)

	add :: ExpBB -> ExpBB -> ExpBB
	add e1 e2 = ExpBB $ \dlit dneg dadd ->
	dadd (unExpBB e1 dlit dneg dadd) (unExpBB e2 dlit dneg dadd)

	-- A sample expression: our first running example
	tbb1 = add (lit 8) (neg (add (lit 1) (lit 2)))

	-- A sample consumer
	-- viewBB: can be implemented simply via the induction principle
	-- It is not recursive!
	viewBB:: ExpBB -> String
	viewBB e = unExpBB e dlit dneg dadd
	where
	dlit n = show n
	dneg e = "(-" ++ e ++ ")"
	dadd e1 e2 = "(" ++ e1 ++ " + " ++ e2 ++ ")"

	tbb1v = viewBB tbb1
	-- "(8 + (-(1 + 2)))"


	-- How to define a deconstructor?

	-- To give an intuition, we define the following functions roll and unroll.
	-- The names meant to invoke terms witnessing the isomorphism between an
	-- iso-recursive type and its one-step unrolling.
	-- The deconstructor will use a form of these functions.

	{- The following shows that we can replace type-level induction
	(in BB_ADT.Exp) with a term-level induction.

	The following definitions of roll and unroll are actually present
	in Boehm-Berarducci's paper -- but in a very indirect way.

	The crucial part is Defn 7.1: sort of self-interpreter: v cons nil ~= v
	Informally, it shows that roll . unroll should be equivalent to the
	identity. The equivalence relation (~=) is strictly speaking not
	the equality; in modern terms, (~=) is contextual equivalence.
	-}

	-- First we re-write the signature of the Exp algebra, ExpDic,
	-- in the form of a strictly-positive functor

	data ExpF a = FLit Int
	\| FNeg a
	\| FAdd a a

	instance Functor ExpF where
	fmap f (FLit n) = FLit n
	fmap f (FNeg e) = FNeg (f e)
	fmap f (FAdd e1 e2) = FAdd (f e1) (f e2)

	roll :: ExpF ExpBB -> ExpBB
	roll (FLit n) = lit n
	roll (FNeg e) = neg e
	roll (FAdd e1 e2) = add e1 e2

	-- The occurrence of roll below is the source of major inefficiency
	-- as we do repeated pattern-matching.

	unroll :: ExpBB -> ExpF ExpBB
	unroll e = unExpBB e dlit dneg dadd
	where
	dlit :: Int -> ExpF ExpBB
	dlit n = FLit n
	dneg :: ExpF ExpBB -> ExpF ExpBB
	dneg e = FNeg (roll e)
	dadd :: ExpF ExpBB -> ExpF ExpBB -> ExpF ExpBB
	dadd e1 e2 = FAdd (roll e1) (roll e2)

	-- Compare with the signature for ExpBB!
	-- Informally, ExpBB unfolds all the way whereas ExpD unfolds only one step
	newtype ExpD =
	ExpD {
	unED :: forall w.
	(Int -> w)
	-> (ExpBB -> w)
	-> (ExpBB -> ExpBB -> w)
	-> w
	}

	decon :: ExpBB -> ExpD
	decon e = unExpBB e dlit dneg dadd
	where
	dlit n = ExpD $ \onlit onneg onadd -> onlit n
	dneg e = ExpD $ \onlit onneg onadd -> onneg (unED e lit neg add)
	dadd e1 e2 = ExpD $ \onlit onneg onadd ->
	onadd (unED e1 lit neg add) (unED e2 lit neg add)


	-- We redo viewBB, using pattern-matching this time, and recursion

	viewBBd:: ExpBB -> String
	viewBBd e = unED (decon e) dlit dneg dadd
	where
	dlit n = show n
	dneg e = "(-" ++ viewBBd e ++ ")"
	dadd e1 e2 = "(" ++ viewBBd e1 ++ " + " ++ viewBBd e2 ++ ")"

	tbb1vd = viewBBd tbb1
	-- "(8 + (-(1 + 2)))"

	-- So, we can do any pattern-matching!
	-- And do non-structurally--recursive processing on ExpBB

	{-
	push_neg :: Exp -> Exp
	push_neg e@Lit{} = e
	push_neg e@(Neg (Lit _)) = e
	push_neg (Neg (Neg e)) = push_neg e
	push_neg (Neg (Add e1 e2)) = Add (push_neg (Neg e1)) (push_neg (Neg e2))
	push_neg (Add e1 e2) = Add (push_neg e1) (push_neg e2)
	-}

	push_negBB :: ExpBB -> ExpBB
	push_negBB e = unED (decon e) dlit dneg dadd
	where
	dlit _ = e
	dneg e2 = unED (decon e2) dlit2 dneg2 dadd2
	where
	dlit2 n = e
	dneg2 e = push_negBB e
	dadd2 e1 e2 = add (push_negBB (neg e1)) (push_negBB (neg e2))
	dadd e1 e2 = add (push_negBB e1) (push_negBB e2)


	tbb1_norm = push_negBB tbb1
	tbb1_norm_viewBB = viewBBd tbb1_norm
	-- "(8 + ((-1) + (-2)))"

	-- Add an extra negation
	tbb1n_norm_viewBB = viewBBd (push_negBB (neg tbb1))
	-- "((-8) + (1 + 2))"


	-- ------------------------------------------------------------------------
	-- A bit of algebra

	-- The types (ExpF a -> a) and ExpDict a are isomorphic.
	-- Hence, the operations of an Exp algebra with the carrier U
	-- could be specified either in the form of a value of the type ExpDict U,
	-- or in the form of a function ExpF U -> U.

	sigma_dict :: (ExpF a -> a) -> ExpDict a
	sigma_dict sigma = ExpDict {
	dlit = \n -> sigma (FLit n)
	, dneg = \e -> sigma (FNeg e)
	, dadd = \e1 e2 -> sigma (FAdd e1 e2)
	}

	dict_sigma :: ExpDict a -> (ExpF a -> a)
	dict_sigma dict (FLit n) = dlit dict n
	dict_sigma dict (FNeg e) = dneg dict e
	dict_sigma dict (FAdd e1 e2) = dadd dict e1 e2


	-- Recall ExpBB1 as an uncurried form of the Boehm-Berarducci encoding.
	-- It follows then the encoding can be written in the equivalent form
	newtype ExpC = ExpC {
	unExpC :: forall a.
	(ExpF a -> a)
	-> a
	}

	-- ExpC is indeed fully equivalent to ExpBB, and inter-convertible
	h :: ExpDict a -> ExpBB -> a
	h dict e = unExpBB e (dlit dict) (dneg dict) (dadd dict)

	exp_BB_C :: ExpBB -> ExpC
	exp_BB_C e = ExpC $ \f -> h (sigma_dict f) e

	exp_C_BB :: ExpC -> ExpBB
	exp_C_BB e = ExpBB $ \dlit dneg dadd ->
	unExpC e (dict_sigma (ExpDict dlit dneg dadd))

	-- Let us write the constructors explicitly:

	sigma_expC :: ExpF ExpC -> ExpC
	sigma_expC (FLit n) = ExpC $ \f -> f (FLit n)
	sigma_expC (FNeg e) = ExpC $ \f -> f (FNeg (unExpC e f))
	sigma_expC (FAdd e1 e2) = ExpC $ \f -> f (FAdd (unExpC e1 f) (unExpC e2 f))

	-- Sample term
	t1c = exp_BB_C tbb1

	-- The advantage of ExpC is that functions roll and unroll take
	-- a particularly elegant form
	rollC :: ExpF ExpC -> ExpC
	rollC = sigma_expC

	unrollC :: ExpC -> ExpF ExpC
	unrollC e = unExpC e (fmap sigma_expC)

	-- (ExpC, sigma_ExpC) is the initial algebra of the functor ExpF.
	-- (the same holds for ExpBB since it is isomorphic to ExpC)

	-- Indeed, consider an arbitrary ExpF algebra with the carrier U
	-- and the operations sigma :: ExpF U -> U.
	-- We show that there exists a unique homomorphism
	-- (hc sigma) :: ExpC -> U such that the following holds
	-- hc sigma . sigma_ExpC = sigma . fmap (hc sigma)

	-- Consider x = FLit n :: ExpF ExpC.
	-- Then (sigma . fmap (hc sigma)) x = sigma (FLit n)
	-- Thus we obtain
	-- hc sigma (ExpC $ \f -> f (FLit n)) = sigma (FLit n)
	-- from which we get
	-- hc sigma e = unExpC e sigma

	-- Consider x = FNeg e :: ExpF ExpC.
	-- Then (sigma . fmap (hc sigma)) x = sigma (FNeg (hc sigma e))
	-- Thus we obtain
	-- hc sigma (ExpC $ \f -> f (FNeg (unExpC e f))) = sigma (FNeg (hc sigma e))
	-- Again, the only way to satisfy the equality for all sigma and e is to set
	-- hc sigma e = unExpC e sigma

	-- The case of x = FAdd e1 e2 :: ExpF ExpC is similar.

	-- Thus we get the unique morphism from (ExpC, sigma_ExpC) to
	-- any other ExpF algebra.

	hc :: (ExpF a -> a) -> ExpC -> a
	hc sigma e = unExpC e sigma


	-- Here is the illustration of the morphism.

	-- Define an ExpF algebra,
	-- with Strings as the carrier and the following operations

	sigma_view :: ExpF String -> String
	sigma_view (FLit n) = show n
	sigma_view (FNeg e) = "(-" ++ e ++ ")"
	sigma_view (FAdd e1 e2) = "(" ++ e1 ++ " + " ++ e2 ++ ")"

	-- We immediately get the function to view the values of ExpC type:

	viewC :: ExpC -> String
	viewC = hc sigma_view

	t1c_view = viewC t1c
	-- "(8 + (-(1 + 2)))"

	-- (ExpBB, sigma_ExpBB) is also initial algebra: there is a unique
	-- homomorphism from it to any other Exp algebra. Here is this morphism
	-- h dict . roll = (dict_sigma dict) . fmap (h dict)
	How to take a TAIL of a functional stream

	Or how to obtain the first few elements of a functional (success/failure
	continuation stream) without first converting it to the regular
	Cons/Nil stream. The latter conversion is unreliable: it diverges for
	a monadic functional stream over a strict monad. Our technique works
	even in the latter case.


	$Id: car-fstream.lhs,v 1.3 2012/04/04 03:40:11 oleg Exp oleg $

	> {-# LANGUAGE Rank2Types #-}
	> module Streams where
	> import Control.Monad.Identity

	The most common way to represent (a potentially infinite) stream is as
	a data structure of the following _recursive_ type:

	> data Stream a = Nil \| Cons a (() -> Stream a)

	The thunking is explicit so the definition applies to strict
	languages too. We can also represent a stream pure functionally.
	The following is _not_ a recursive type, but a higher-ranked one.

	> newtype FStream a = SFK (forall ans. SK a ans -> FK ans -> ans)
	> type FK ans = () -> ans -- failure continuation
	> type SK a ans = a -> FK ans -> ans -- success continuation
	> unSFK (SFK a) = a

	If we ignore the newtype tagging (needed to properly deal with the
	higher-ranked types in Haskell), an FStream is a function of two
	continuations, success and failure continuations. The latter is
	invoked if the stream has no elements. The stream invokes the success
	continuation if the stream has at least one element. The continuation
	receives that element and another failure continuation, to ask for
	more elements. As an example of such a functional stream, see Ralf
	Hinze's ``Deriving Backtracking Monad Transformers'' (ICFP'00).


	Obviously there is a correspondence between the two
	representations. For example:

	> fstream_to_stream fstream = unSFK fstream sk fk
	> where fk () = Nil
	> sk a fk' = Cons a fk'
	>
	> stream_to_fstream Nil = SFK( \sk fk -> fk () )
	> stream_to_fstream (Cons a rest) =
	> SFK(\sk fk ->
	> sk a (\ () -> unSFK (stream_to_fstream (rest ())) sk fk))

	A test data stream

	> s1 = Cons 'a' (\ () -> Cons 'b' (\ () -> Cons 'c' (\ () -> Nil)))
	> f1 = stream_to_fstream s1
	> s1' = fstream_to_stream f1
	>
	> test1 = unSFK f1 (\a k -> a:(k())) (\() -> [])

	*Streams> test1
	"abc"

	Mitch Wand and D.Vaillancourt (see ICFP'04) have described the
	correspondence in more formal terms.

	There is an obvious difference between the two representations
	however. It is easy to find the first two elements of a Stream:

	> stream_take:: Int -> Stream a -> [a]
	> stream_take 0 _ = []
	> stream_take n (Cons a r) = a : (stream_take (n-1) $ r ())


	It seems all but impossible to find the first two elements of an
	FStream without first converting it all to Stream. That conversion
	becomes problematic if we had a monadic functional stream:

	> newtype MFStream m a = MSFK (forall ans. MSK m a ans -> MFK m ans -> m ans)
	> type MFK m ans = m ans -- failure continuation
	> type MSK m a ans = a -> MFK m ans -> m ans -- success continuation
	> unMSFK (MSFK a) = a
	>
	>
	> mfstream_to_stream:: (Monad m) => MFStream m a -> m (Stream a)
	> mfstream_to_stream mfstream = unMSFK mfstream sk fk
	> where fk = return Nil
	> sk a fk' = fk' >>= (\rest -> return (Cons a (\ () -> rest)))

	if m is a strict monad and fstream is infinite, the conversion to Stream
	diverges. Indeed,

	> nats = nats' 0 where
	> nats' n = MSFK (\sk fk -> sk n (unMSFK (nats' (n+1)) sk fk))
	>
	> test2 = runIdentity (mfstream_to_stream nats >>= (return . stream_take 5))

	*Streams> test2
	[0,1,2,3,4]

	but for a strict monad like IO:

	> test3 = mfstream_to_stream nats >>= (print . stream_take 5)

	it diverges without printing anything. Indeed, before we can print
	even the first element from nats, we must convert it entirely to a
	stream.


	It turns out, we can split fstream as if it were just a data
	structure. The following function returns an FStream of at most one item.
	The item is a pair of the head of the original stream and of the
	_rest stream_

	> split_fstream :: FStream a -> FStream (a, FStream a)
	> split_fstream fstream = unSFK fstream ssk caseB
	> where caseB () = SFK(\a b -> b ())
	> ssk v fk = SFK(\a b ->
	> a (v, (SFK (\s' f' ->
	> unSFK (fk ())
	> (\ (v'',x) _ -> s' v''
	> (\ () -> unSFK x s' f'))
	> f')))
	> (\ () -> b ()))
	>
	> f11 = split_fstream f1
	> e1 = unSFK f11 (\ (a,_) _ -> a) undefined
	> f1r = unSFK f11 (\ (_,r) _ -> r) undefined
	> f1rs = split_fstream f1r
	> e2 = unSFK f1rs (\ (a,_) _ -> a) undefined

	Indeed, we can obtain the first two elements from the finite stream f1

	*Streams> e1
	'a'
	*Streams> e2
	'b'

	Monadic case (quoted almost directly from the paper with Chung-chieh Shan,
	Amr A. Sabry, Daniel P. Friedman)

	> split_mfstream :: Monad m => MFStream m a -> m (MFStream m (a, MFStream m a))
	> split_mfstream m = unMSFK m ssk caseB
	> where caseB = return $ MSFK(\a b -> b)
	> ssk v fk = return $ MSFK(\a b ->
	> a (v, (MSFK (\s' f' ->
	> fk >>= (\r -> unMSFK r
	> (\ (v'',x) _ -> s' v'' (unMSFK x s' f'))
	> f'))))
	> b)


	> mfstream_take:: Monad m => Int -> MFStream m a -> m [a]
	> mfstream_take 0 _ = return []
	> mfstream_take n m = do
	> m' <- split_mfstream m
	> (v,rs) <- unMSFK m' (\ (v,rs) _ -> return (v,rs) )
	> undefined
	> rl <- mfstream_take (n-1) rs
	> return (v:rl)

	> test2' = runIdentity (mfstream_take 5 nats)

	*Streams> test2'
	[0,1,2,3,4]


	> test3' = mfstream_take 5 nats >>= print

	But now also

	*Streams> test3'
	[0,1,2,3,4]
	{-# LANGUAGE TypeFamilies, EmptyDataDecls, TypeOperators #-}
	{-# LANGUAGE FlexibleInstances #-}

	-- Embedding a higher-order domain-specific language (simply-typed
	-- lambda-calculus with constants) with a selectable evaluation order:
	-- Call-by-value, call-by-name, call-by-need in the same Final Tagless framework

	module CB where

	import Data.IORef
	import Control.Monad.IO.Class

	-- Our EDSL is typed. EDSL types are built from the following two
	-- type constructors:
	data IntT
	data a :-> b
	infixr 5 :->

	-- We could have used Haskell's type Int and the arrow -> constructor.
	-- We would like to emphasize however that EDSL types need not be identical
	-- to the host language types. To give the type system to EDSL, we merely
	-- need `labels' -- which is what IntT and :-> are

	-- The (higher-order abstract) syntax of our DSL
	class EDSL exp where
	-- if we were using (->) instead of (:->) this would be
	-- (f a -> f b) -> f (a -> b)
	lam :: (exp a -> exp b) -> exp (a :-> b)
	-- can we make all of this work even if we use (->) instead of (:->) and
	-- ($) instead of (app)?
	app :: exp (a :-> b) -> exp a -> exp b

	int :: Int -> exp IntT -- Integer literal
	add :: exp IntT -> exp IntT -> exp IntT
	sub :: exp IntT -> exp IntT -> exp IntT

	-- A convenient abbreviation
	let_ :: EDSL exp => exp a -> (exp a -> exp b) -> exp b
	let_ x y = (lam y) `app` x

	-- A sample EDSL term
	t :: EDSL exp => exp IntT
	t = lam (\x -> let_ (x `add` x)
	$ \y -> y `add` y) `app` int 10

	-- typed BB style booleans?
	true :: EDSL exp => exp (a :-> a :-> a)
	true = lam (\x -> lam (\y -> x))
	false :: EDSL exp => exp (a :-> a :-> a)
	false = lam (\x -> lam (\y -> y))
	-- not2 :: EDSL exp => exp ((a :-> a :-> a) :-> (b :-> b :-> b))
	-- not2 = lam (\x -> x `app` true) `app` _foo

	-- Interpretation of EDSL types as host language types
	-- The type interpretation function Sem is parameterized by 'm',
	-- which is assumed to be a Monad.
	--
	-- HMM: why do we need 'm' to be a monad? right now, 'm' is simply a type
	-- constructor taking one parameter
	-- - if we wanted to use an associated type (family?) then whom would we
	-- associate this with? hmm with 'S l m' perhaps and turn it into a class
	-- (instead of newtype)?
	type family Sem (m :: * -> ) a ::
	type instance Sem m IntT = Int
	-- if we wanted to map (->) to (->) (instead of (:->) this is where we'd do it
	type instance Sem m (a :-> b) = m (Sem m a) -> m (Sem m b)

	-- Interpretation of EDSL expressions as values of the host language (Haskell)
	-- An EDSL expression of the type a is interpreted as a Haskell value
	-- of the type S l m a, where m is a Monad (the parameter of the interpretation)
	-- and l is the label for the evaluation order (one of Name, Value, or Lazy).
	-- (S l m) is not quite a monad -- only up to the Sem interpretation
	--
	-- HMM: can we constrain 'l' somehow? and what would be the benefit of doing
	-- that? that we disallow certain other type tags?
	newtype S l m a = S { unS :: m (Sem m a) }




	-- Call-by-name

	data Name
	instance MonadIO m => EDSL (S Name m) where
	int = S . return
	add x y = S $ do a <- unS x
	b <- unS y
	liftIO $ putStrLn "Adding"
	return (a + b)
	sub x y = S $ do a <- unS x
	b <- unS y
	liftIO $ putStrLn "Subtracting"
	return (a - b)
	-- (S . return) :: (m (Sem m a) -> m (Sem m b)) -> S Name m (a :-> b)
	-- (unS . f . S) :: m (Sem m a) -> m (Sem m b)
	-- S :: m (Sem m a) -> S Name m a
	-- f :: S Name m a -> S Name m b
	-- unS :: S Name m b -> m (Sem m b)
	lam f = S . return $ (unS . f . S)
	-- hmm (app) actually carries out (>>=), so probably can't map it to ($)
	app x y = S $ unS x >>= ($ (unS y))


	-- Tests
	runName :: S Name m a -> m (Sem m a)
	runName = unS

	-- Evaluating:
	-- t = (lam $ \x -> let_ (x `add` x)
	-- $ \y -> y `add` y) `app` int 10
	-- The addition (x `add` x) is performed twice because y is bound
	-- to a computation, and y is evaluated twice
	t0SN :: IO ()
	t0SN = runName t >>= print
	{-
	Adding
	Adding
	Adding
	40
	-}

	-- A more elaborate example
	t1 :: EDSL exp => exp IntT
	t1 = lam (\x -> let_ (x `add` x)
	$ \y -> lam $ \z ->
	z `add` (z `add` (y `add` y))) `app` (int 10 `sub` int 5)
	`app` (int 20 `sub` int 10)

	t1SN :: IO ()
	t1SN = runName t1 >>= print
	{-
	*CB> t1SN
	Subtracting
	Subtracting
	Subtracting
	Subtracting
	Adding
	Subtracting
	Subtracting

	Adding
	Adding
	Adding
	Adding
	40
	-}

	-- A better example
	t2 :: EDSL exp => exp IntT
	t2 = lam (\_ -> lam $ \x -> let_ (x `add` x)
	$ \y -> y `add` y)
	`app` (int 100 `sub` int 10)
	`app` (int 5 `add` int 5)

	-- The result of subtraction was not needed, and so it was not performed
	-- OTH, (int 5 `add` int 5) was computed four times
	t2SN :: IO ()
	t2SN = runName t2 >>= print
	{-
	*CB> t2SN
	Adding
	Adding
	Adding
	Adding
	Adding
	Adding
	Adding
	40
	-}

	-- Call-by-value

	data Value

	-- We reuse most of EDSL (S Name) except for lam
	vn :: S Value m x -> S Name m x
	vn = S . unS
	nv :: S Name m x -> S Value m x
	nv = S . unS

	instance MonadIO m => EDSL (S Value m) where
	int = nv . int
	add x y = nv $ add (vn x) (vn y)
	sub x y = nv $ sub (vn x) (vn y)
	-- the good news is that the magic doesn't happen in (app), it's really
	-- (lam) which differentiate the various CB schemes
	app x y = nv $ app (vn x) (vn y)
	-- This is the only difference between CBN and CBV:
	-- lam first evaluates its argument, no matter what
	-- This is the definition of CBV after all
	lam f = S . return $ (\x -> x >>= unS . f . S . return)

	runValue :: S Value m a -> m (Sem m a)
	runValue = unS

	-- We now evaluate the previously written tests t, t1, t2
	-- under the new interpretation

	t0SV :: IO ()
	t0SV = runValue t >>= print
	{-
	*CB> t0SV
	Adding
	Adding
	40
	-}

	t1SV :: IO ()
	t1SV = runValue t1 >>= print
	{-
	*CB> t1SV
	Subtracting
	Adding
	Subtracting
	Adding
	Adding
	Adding
	40
	-}

	-- Although the result of subs-traction was not needed, it was still performed
	-- OTH, (int 5 `add` int 5) was computed only once
	t2SV :: IO ()
	t2SV = runValue t2 >>= print
	{-
	*CB> t2SV
	Subtracting
	Adding
	Adding
	Adding
	40
	-}

	-- Call-by-need

	share :: MonadIO m => m a -> m (m a)
	share m = do
	r <- liftIO $ newIORef (False,m)
	let ac = do
	(f,m) <- liftIO $ readIORef r
	if f then m
	else do -- forget effect (from `app`?) + record it
	v <- m
	liftIO $ writeIORef r (True,return v)
	return v
	return ac

	data Lazy

	-- We reuse most of EDSL (S Name) except for lam
	ln :: S Lazy m x -> S Name m x
	ln = S . unS
	nl :: S Name m x -> S Lazy m x
	nl = S . unS

	instance MonadIO m => EDSL (S Lazy m) where
	int = nl . int
	add x y = nl $ add (ln x) (ln y)
	sub x y = nl $ sub (ln x) (ln y)
	app x y = nl $ app (ln x) (ln y)
	-- This is the only difference between CBN and CBNeed
	-- lam shares its argument, no matter what
	-- This is the definition of CBNeed after all
	lam f = S . return $ (\x -> share x >>= unS . f . S)


	runLazy :: S Lazy m a -> m (Sem m a)
	runLazy = unS

	-- We now evaluate the previously written tests t, t1, t2
	-- under the new interpretation

	-- Here, Lazy is just as efficient as CBV
	t0SL :: IO ()
	t0SL = runLazy t >>= print
	{-
	*CB> t0SL
	Adding
	Adding
	40
	-}

	-- Ditto
	t1SL :: IO ()
	t1SL = runLazy t1 >>= print
	{-
	*CB> t1SL
	Subtracting
	Subtracting
	Adding
	Adding
	Adding
	Adding
	40
	-}

	-- Now, Lazy is better than both CBN and CBV: subtraction was not needed,
	-- and it was not performed.
	-- All other expressions were needed, and evaluated once.
	t2SL :: IO ()
	t2SL = runLazy t2 >>= print
	{-
	*CB> t2SL
	Adding
	Adding
	Adding
	40
	-}
	-- Establishing a bijection between the values of the type a and integers, with
	-- the operations to retrieve the value given its key,
	-- to find the key for the existing value, and to extend the
	-- bijection with a new association.

	-- The type 'a' of values should at least permit equality comparison;
	-- In the present implementation, we require 'a' to be a member
	-- of Ord.

	-- There are many ways to implement bi-maps, for example, using hash tables,
	-- or maps.
	-- Our implementation uses Data.Map and Data.IntMap to record
	-- both parts of the association.

	module CSE_BiMap (
	BiMap, empty,
	lookup_key,
	lookup_val,
	insert,
	size,
	)
	where

	import qualified Data.Map as M
	import qualified Data.IntMap as IM

	data BiMap a = BiMap (M.Map a Int) (IM.IntMap a)

	-- Find a key for a value
	lookup_key :: Ord a => a -> BiMap a -> Maybe Int
	lookup_key v (BiMap m _) = M.lookup v m

	-- Find a value for a key
	lookup_val :: Int -> BiMap a -> a
	lookup_val k (BiMap _ m) = m IM.! k

	-- Insert the value and return the corresponding key
	-- and the new map
	-- Alas, Map interface does not have an operation to insert and find the index
	-- at the same time (although such an operation is easily possible)
	insert :: Ord a => a -> BiMap a -> (Int, BiMap a)
	insert v (BiMap m im) = (k, BiMap m' im')
	where m' = M.insert v k m
	im' = IM.insert k v im
	k = IM.size im

	empty :: BiMap a
	empty = BiMap (M.empty) (IM.empty)

	instance Show a => Show (BiMap a) where
	show (BiMap _ m) = "BiMap" ++ show (IM.toList m)

	size :: BiMap a -> Int
	size (BiMap _ m) = IM.size m
	{-# LANGUAGE NoMonomorphismRestriction #-}

	-- * Implicit sharing in the tagless-final style, or:
	-- * seemingly pure hash-consing
	-- The imperative details of hash-consing are hidden better in a
	-- final-tagless style of the DSL embedding, described next.


	module CSE_ExpF (Exp(..),
	ExpI(..),
	R(..),
	N(..),NodeId, DAG(..),
	run_expN,
	do_bench,
	mul,
	) where

	import Control.Monad.State
	import CSE_BiMap
	import qualified CSE_ExpI as ExpI (Exp(..), sklansky)


	-- * //
	-- * Tagless-final EDSL embedding

	-- In the tagless-final approach, embedded DSL
	-- expressions are built with `constructor functions' such as \|constant\|,
	-- \|variable\|, \|add\| rather than the data constructors \|Constant\|,
	-- \|Variable\|, \|Add\|. The constructor functions yield a representation
	-- for the DSL expression being built. The representation could be a
	-- string (for pretty-printing), an integer (for evaluator), etc. Since
	-- the same DSL expression may be concretely represented in several ways,
	-- the constructor functions are polymorphic, parameterized by the
	-- representation \|repr\|. In other words, the constructor functions are
	-- the members of the type class

	class Exp repr where
	constant :: Int -> repr Int
	variable :: String -> repr Int
	add :: repr Int -> repr Int -> repr Int

	-- The data type Exp from ExpI.hs becomes a type class

	-- Our expressions here are of only one type, Int. We could have dropped
	-- Int and made 'repr' to be a type variable of the kind *.
	-- We keep the 'repr Int' notation nevertheless, for consistency
	-- with the tagless final paper, and to allow for extensions (e.g.,
	-- the addition of booleans).

	-- Sample expressions from ExpI.hs now look as follows
	exp_a = add (constant 10) (variable "i1")
	exp_b = add exp_a (variable "i2")

	-- * like those in ExpI.hs modulo the case of the identifiers:
	-- * everything is in lower case.

	-- The multiplication example:
	-- the only two differences is the case of the identifiers
	-- and the type signature
	mul :: Exp repr => Int -> repr Int -> repr Int
	mul 0 _ = constant 0
	mul 1 x = x
	mul n x \| n `mod` 2 == 0 = mul (n `div` 2) (add x x)
	mul n x = add x (mul (n-1) x)

	exp_mul4 = mul 4 (variable "i1")
	exp_mul8 = mul 8 (variable "i1")

	-- * Interpreters for our DSL: instances of Exp

	-- The first interpreter interprets a tagless-final expression
	-- as ExpI.Exp data type, converting the DSL expressions here into
	-- the so-called `initial form' of ExpI.hs
	-- This is one way to print the tagless-final expressions, since
	-- we have derived a Show instance for ExpI.Exp

	newtype ExpI t = ExpI (ExpI.Exp)
	-- * Question: why do we need the wrapper?

	instance Exp ExpI where
	constant = ExpI . ExpI.Constant
	variable = ExpI . ExpI.Variable
	add (ExpI x) (ExpI y) = ExpI (ExpI.Add x y)

	test_shb = case exp_b of ExpI e -> e
	-- Add (Add (Constant 10) (Variable "i1")) (Variable "i2")

	test_sh4 = case exp_mul4 of ExpI e -> e
	{-
	Add (Add (Variable "i1") (Variable "i1"))
	(Add (Variable "i1") (Variable "i1"))
	-}

	-- The conversion to ExpI takes the form of an instance
	-- of the class Exp that provides the interpretation for the expression
	-- primitives, as the values for the domain ExpI.

	-- * Another interpreter: the evaluator
	-- That interpretation of tagless-final expressions is not
	-- the only one possible. We can write an evaluator, interpreting
	-- each expression as an element of the domain R

	type REnv = [(String,Int)]
	newtype R t = R{unR :: REnv -> t} -- A reader Monad, actually

	-- that is, an integer in an environment that gives values for
	-- free `variables' that may occur in the expression

	instance Exp R where
	constant x = R (\_ -> x)
	variable x = R (\env -> maybe (error $ "no var: " ++ x) id $
	lookup x env)
	add e1 e2 = R (\env -> unR e1 env + unR e2 env)


	test_val4 = unR exp_mul4 [("i1",5)] -- 20
	-- * Evaluating sample expressions is good for debugging
	-- * Write once (exp_mul4), interpret many times
	-- We are using exactly the same exp_mul4 as in
	-- test_sh4. We are evaluating it differently. The gist of
	-- the final tagless approach is to write an expression once
	-- and evaluate it many times.

	-- * //
	-- ------------------------------------------------------------------------
	-- * Detecting implicit sharing (common subexpression elimination)

	-- * Goal: detect structurally equal subexpressions and share them,
	-- * converting an expression tree into a DAG
	-- * Idea: rather than convert an ASTree to ASDag, build the
	-- * ASDag to begin with.

	-- * Representing a DAG
	-- a collection of Nodes identified by NodeIds,

	type NodeId = Int

	-- We stress: Node is NOT a recursive data structure, so the comparison
	-- of Node values takes constant time!
	data Node = NConst Int
	\| NVar String
	\| NAdd !NodeId !NodeId
	deriving (Eq,Ord,Show)

	-- we could use several bimaps for different operations (for
	-- addition, subtraction, etc).

	newtype DAG = DAG (BiMap Node) deriving Show

	-- * BiMap -- (partial) bijection a <-> Int
	-- The mapping between \|Node\|s and \|NodeId\|s is realized
	-- through a BiMap interface
	-- BiMap a establishes a bijection between the values of the type \|a\|
	-- and integers, with the operations to retrieve the value given its key,
	-- to find the key for the existing value, and to extend the bijection
	-- with a new association.

	-- * //
	-- * Bottom-up DAG construction
	-- As we construct a node for a subexpression, we check if the DAG already
	-- has the equal node. If so, we return its \|NodeId\|; otherwise, we add the node
	-- to the DAG.
	-- The computation is stateful (we are using the State monad)

	-- Hash-consing proper: insert Node into the DAG if it isn't
	-- there already, and return its hash code.
	hashcons :: Node -> State DAG NodeId
	hashcons e = do
	DAG m <- get
	case lookup_key e m of
	Nothing -> let (k,m') = insert e m
	in put (DAG m') >> return k
	Just k -> return k

	-- * Bottom-up DAG construction: tagless-final helps
	-- The bottom-up DAG construction maps well to computing a representation
	-- for a tagless-final expression, which is also evaluated bottom-up. The
	-- DAG construction can therefore be written as a tagless-final
	-- interpreter, an instance of the type class Exp. The interpreter maps
	-- a tagless-final expression to the concrete representation

	-- * Interpreting Exp as a Node in the current DAG
	newtype N t = N{unN :: State DAG NodeId}

	instance Exp N where
	constant x = N(hashcons $ NConst x)
	variable x = N(hashcons $ NVar x)
	add e1 e2 = N(do
	h1 <- unN e1
	h2 <- unN e2
	hashcons $ NAdd h1 h2)

	-- * The state is hidden behind the tagless-final veneer

	-- run the DAG-construction interpreter and the node,
	-- as a reference within a DAG.
	run_expN :: N t -> (NodeId, DAG)
	run_expN (N m) = runState m (DAG empty)

	-- We re-interpret exp_mul4 differently this time.
	-- The DAG-representation makes the sharing patent

	-- A DAG is printed as the list of \|(NodeId,Node)\| associations. The
	-- sharing of the left and right summands below is patent

	test_sm4 = run_expN exp_mul4
	-- (2,DAG BiMap[(0,NVar "i1"),(1,NAdd 0 0),(2,NAdd 1 1)])

	test_sm8 = run_expN exp_mul8
	-- (3,DAG BiMap[(0,NVar "i1"),(1,NAdd 0 0),(2,NAdd 1 1),(3,NAdd 2 2)])

	-- * We have constructed netlists, topologically sorted
	-- a netlist is a low-level representation of a circuit listing the gates and
	-- their connections, used in circuit manufacturing. Since our BiMap allocated
	-- monotonically increasing NodeIds, the resulting netlist comes out
	-- topologically sorted. Therefore, we can straightforwardly generate machine
	-- code after the standard register allocation.

	-- We retain all the information about exp_mul4. In addition, all
	-- sharing is fully explicit. As we can see, the evaluation process finds
	-- common subexpressions automatically.

	-- * //
	-- ------------------------------------------------------------------------
	-- * Superficially `effectless' common sub-expression elimination

	-- sklansky example by Matthew Naylor, with further credit to
	-- Chalmers folk, Mary Sheeran and Emil Axelsson.
	-- It is said to be similar to scanl1, but contains more parallelism
	-- The function sklansky is defined in ExpI.hs

	-- * To remind what sklansky produces:
	test_sklansky_o n = ExpI.sklansky addition xs
	where
	addition x y = "(" ++ x ++ "+" ++ y ++ ")"
	xs = Prelude.map (("v"++) . show) [1..n]

	-- (v1+v2) is used three times
	{-
	test_sklansky_o 4
	["v1","(v1+v2)","((v1+v2)+v3)","((v1+v2)+(v3+v4))"]
	-}

	-- (v1+v2) is used seven times
	{-
	test_sklansky_o 8
	["v1","(v1+v2)",
	"((v1+v2)+v3)",
	"((v1+v2)+(v3+v4))",
	"(((v1+v2)+(v3+v4))+v5)",
	"(((v1+v2)+(v3+v4))+(v5+v6))",
	"(((v1+v2)+(v3+v4))+((v5+v6)+v7))",
	"(((v1+v2)+(v3+v4))+((v5+v6)+(v7+v8)))"]
	-}

	-- * sklansky challenge: sharing across expressions, not within an expression

	-- * We re-write in the tagless-final style
	-- * scratch that: we use sklansky as it was
	-- :t ExpI.sklansky
	-- ExpI.sklansky :: (a -> a -> a) -> [a] -> [a]
	-- Actually, there is no re-write
	-- We use Matthew Naylor's code, in pure Haskell, as it was

	-- * //
	-- We run it differently though
	test_sklansky n = runState sk (DAG empty)
	where
	sk = sequence (map unN (ExpI.sklansky add xs))
	xs = map (variable . show) [1..n]

	-- Implicit sharing works: hash code 2 (for v1+v2) is used three times

	{-
	*ExpF> test_sklansky 4
	([0,2,4,7],
	DAG BiMap[(0,NVar "1"),(1,NVar "2"),
	(2,NAdd 0 1),(3,NVar "3"),
	(4,NAdd 2 3),(5,NVar "4"),
	(6,NAdd 3 5),(7,NAdd 2 6)])
	-}
	-- We indeed obtained the set of nodes all pointing within the same DAG
	-- The stateful nature was well-hidden till the very end (running)

	-- We see the deep sharing: hash code 2, which is v1+v2, is used twice.
	-- Then code 7, which is (v1+v2)+(v3+v4), is used 4 times
	-- after being computed
	{-
	*ExpF> test_sklansky 8
	([0,2,4,7,9,12,15,19],
	DAG BiMap[(0,NVar "1"),(1,NVar "2"),
	(2,NAdd 0 1),(3,NVar "3"),
	(4,NAdd 2 3),(5,NVar "4"),
	(6,NAdd 3 5),(7,NAdd 2 6), -- just as it was for test_sklansky 4
	(8,NVar "5"),(9,NAdd 7 8),
	(10,NVar "6"),(11,NAdd 8 10),
	(12,NAdd 7 11),(13,NVar "7"),
	(14,NAdd 11 13),(15,NAdd 7 14),
	(16,NVar "8"),(17,NAdd 13 16),
	(18,NAdd 11 17),(19,NAdd 7 18)])
	-}

	-- * //
	-- ------------------------------------------------------------------------
	-- * Performance is still a problem

	-- We have demonstrated the sharing detection technique that represents
	-- a DSL program as a DAG, eliminating multiple occurrences of common
	-- subexpressions. Alas, to find all these common subexpressions we have
	-- to examine the entire expression _tree_, which may take long time for large
	-- programs (large circuits).
	-- See ExpLet.hs for the exponential speed-up.

	-- To force the evaluation
	do_bench :: (a,DAG) -> Int
	do_bench (_,DAG d) = size d

	bench_mul n = do_bench $
	run_expN (mul n (variable "i"))


	-- It takes effort to find the common subexpressions, just because
	-- we have to traverse the whole tree.

	bench12 = bench_mul (2^12)
	bench13 = bench_mul (2^13)

	{-
	-- Interpreted code
	*ExpF> bench_mul (2^11)
	12
	(0.06 secs, 4688952 bytes)
	*ExpF> bench_mul (2^12)
	13
	(0.09 secs, 5344760 bytes)
	*ExpF> bench_mul (2^13)
	14
	(0.20 secs, 9791092 bytes)


	-- compiled with -O2
	Prelude ExpF> bench_mul (2^20)
	21
	(0.44 secs, 80752592 bytes)
	Prelude ExpF> bench_mul (2^21)
	22
	(0.87 secs, 160434744 bytes)
	-}


	{-
	*ExpF> test_sklansky 128
	([0,2,4,7,9,12,15,19,21, ... 560,567,575], ...)
	(0.37 secs, 13650184 bytes)
	-}

	-- Not so bad... The implicit sharing detection is quite fast
	bench_skl n = do_bench $ test_sklansky n

	{-
	-- Interpreted code
	*ExpF> bench_skl 128
	576
	(0.33 secs, 10854396 bytes)
	*ExpF> bench_skl 256
	1280
	(1.79 secs, 38952968 bytes)

	-- compiled with -O2
	Prelude ExpF> bench_skl 128
	576
	(0.02 secs, 2163436 bytes)
	Prelude ExpF> bench_skl 256
	1280
	(0.08 secs, 4333380 bytes)
	Prelude ExpF> bench_skl 512
	2816
	(0.37 secs, 13908376 bytes)
	-}


	main = do
	print test_shb
	print test_sh4

	print test_val4
	print test_sm4
	print test_sm8

	print $ test_sklansky_o 4
	print $ test_sklansky 4
	-- Implicit sharing (common sub-expression elimination) in
	-- an embedded DSL for arithmetic expressions
	-- The DSL is embedded as a data type
	--
	-- This file is based on the message by Tom Hawkins
	-- posted on Haskell-Cafe on Feb 8, 2008
	-- http://www.haskell.org/pipermail/haskell-cafe/2008-February/039339.html
	-- We fill the missing details so to run the example and exhibit
	-- the problem.

	module CSE_ExpI (Exp(..), sklansky, bench_mul, bench_skl, main) where

	import CSE_BiMap

	-- Tom Hawkins:
	-- My DSLs invariably define a datatype to capture expressions; something
	-- like this:

	-- * //
	-- * A sub-DSL of arithmetic expressions, embedded as a datatype

	data Exp
	= Add Exp Exp -- one can add Sub, Neg, etc.
	\| Variable String
	\| Constant Int
	deriving (Eq, Ord, Show)

	-- Sample expressions
	exp_a = Add (Constant 10) (Variable "i1")
	exp_b = Add exp_a (Variable "i2")

	-- We can indeed generate assembly or Verilog from Exp after
	-- a topological sorting
	-- But first we have to detect identical sub-expressions and
	-- share then, converting an ASTree to a DAG

	-- * Common sub-expressions are abundant and easy to create
	-- * Two running examples, absracted from real life

	-- * Example 1: multiplication by a known integer constant
	-- Our DSL does not have the multiplication
	-- operation (many 8-bit CPUs don't have it). We can generate
	-- code for constant multiplication by repeated addition.

	mul :: Int -> Exp -> Exp
	mul 0 _ = Constant 0
	mul 1 x = x
	mul n x \| n `mod` 2 == 0 = mul (n `div` 2) (Add x x)
	mul n x = Add x (mul (n-1) x)

	-- Examples of duplication
	exp_mul4 = mul 4 (Variable "i1")
	{-
	Add (Add (Variable "i1") (Variable "i1")) (Add (Variable "i1") (Variable "i1"))
	-}

	exp_mul8 = mul 8 (Variable "i1")
	-- Twice as much duplication
	{-
	Add
	(Add (Add (Variable "i1") (Variable "i1")) (Add (Variable "i1") (Variable "i1")))
	(Add (Add (Variable "i1") (Variable "i1")) (Add (Variable "i1") (Variable "i1")))
	-}

	-- * Question: why do we see all this duplication?
	-- * Wasn't the multiplication algorithm supposed to be optimal?
	-- When we apply something like exp_mul8 to an integer 5, do
	-- we keep repeating 5+5 four times in a row?

	-- * //
	-- The other running example:
	-- * sklansky by Matthew Naylor, with further credit to
	-- * Chalmers folk, Mary Sheeran and Emil Axelsson.
	-- * sklansky add [e1,e2, ... en]
	-- * ===> [e1, e1+e2, ... e1+e2+...en]
	-- It produces a running sum, with more parallelism and smaller latency.
	-- It is somewhat in the spirit of the optimal multiplication mul above
	-- The original code, from Matthew Naylor's message:
	-- http://www.haskell.org/pipermail/haskell-cafe/2008-February/039671.html


	sklansky :: (a -> a -> a) -> [a] -> [a]
	sklansky f [] = []
	sklansky f [x] = [x]
	sklansky f xs = left' ++ [ f (last left') r \| r <- right' ]
	where
	(left, right) = splitAt (length xs `div` 2) xs
	left' = sklansky f left
	right' = sklansky f right


	test_sklansky_i n = sklansky Add xs
	where
	xs = Prelude.map (Variable . show) [1..n]

	-- * //
	-- * test_sklansky_i 4
	-- (v1+v2) is repeated three times
	{-
	[Variable "1",
	Add (Variable "1") (Variable "2"),
	Add (Add (Variable "1") (Variable "2")) (Variable "3"),
	Add (Add (Variable "1") (Variable "2")) (Add (Variable "3") (Variable "4"))]
	-}
	-- * Common sub-expressions appear across independent expressions

	{-
	Tom Hawkins wrote (Haskell-Cafe on Feb 8, 2008)
	The problem comes when I want to generate efficient code from an
	Exp (ie. to C or some other target language). The method I use
	involves converting the tree of subexpressions into an acyclic graphic
	to eliminate common subexpressions. The nodes are then topologically
	ordered and assigned an instruction, or statement for each node....

	The process of converting an expression tree to a graph uses either Eq
	or Ord (either derived or a custom instance) to search and build a set
	of unique nodes to be ordered for execution. In this case "a", then
	"b", then "c". The problem is expressions often have shared,
	equivalent subnodes, which dramatically grows the size of the tree.

	As these trees grow in size, the equality comparison in graph
	construction quickly becomes the bottleneck for DSL compilation.
	What's worse, the phase transition from tractable to intractable is
	very sharp. In one of my DSL programs, I made a seemingly small
	change, and compilation time went from milliseconds to
	not-in-a-million-years.
	-}
	-- Tom Hawkins was about to give up on Haskell
	-- His message was entitled: ``I love purity, but it's killing me''

	-- *
	-- Tom Hawkins's message did not show the algorithm for converting
	-- a tree of sub-expressions into a DAG.
	-- The message does say that the algorithm uses Eq or Ord instances
	-- of Exp
	-- Probably Tom Hawkins adds all sub-expressions of an expression
	-- into a DAG, ensuring that duplicates are not added

	newtype DAG = DAG (BiMap Exp) deriving Show

	-- Enter all sub-expressions of an expression into a DAG,
	-- removing the duplicates
	exp_to_dag :: Exp -> DAG -> DAG
	exp_to_dag e d@(DAG m) \| Just _ <- lookup_key e m = d
	exp_to_dag e@(Add e1 e2) d =
	let d1 = exp_to_dag e1 d
	d2 = exp_to_dag e2 d1
	DAG m2 = d2
	(k,m') = insert e m2
	in DAG m'
	-- No sub-expressions
	exp_to_dag e d = d


	-- There are only two expressions two compute
	dag_4 = exp_to_dag exp_mul4 (DAG empty)
	{-
	DAG BiMap[(0,Add (Variable "i1") (Variable "i1")),
	(1,Add (Add (Variable "i1") (Variable "i1"))
	(Add (Variable "i1") (Variable "i1")))]
	-}

	dag_8 = exp_to_dag exp_mul8 (DAG empty)

	bench_mul n =
	case exp_to_dag (mul n (Variable "i")) (DAG empty) of DAG d -> size d

	bench20 = bench_mul (2^20)
	bench21 = bench_mul (2^21)

	-- The size of the DAG increased by 1 node; yet it took twice as
	-- much time...
	{-
	-- Interpreted code
	*ExpI> bench20
	20
	(4.51 secs, 1585728 bytes)
	*ExpI> bench21
	21
	(8.76 secs, 530728 bytes)

	-- compiled with -O2
	Prelude ExpI> bench_mul (2^20)
	20
	(0.06 secs, 1053460 bytes)
	Prelude ExpI> bench_mul (2^21)
	21
	(0.11 secs, 1062568 bytes)
	Prelude ExpI> bench_mul (2^22)
	22
	(0.22 secs, 1600568 bytes)
	Prelude ExpI> bench_mul (2^23)
	23
	(0.43 secs, 1068432 bytes)
	-}


	test_skl_dag = foldl (flip exp_to_dag) (DAG empty) (test_sklansky_i 4)

	bench_skl n =
	case foldl (flip exp_to_dag) (DAG empty) (test_sklansky_i n) of
	DAG d -> size d

	{-
	-- Interpreted code
	*ExpI> bench_skl 128
	448
	(0.32 secs, 1637380 bytes)
	*ExpI> bench_skl 256
	1024
	(1.58 secs, 2234148 bytes)

	-- compiled with -O2
	Prelude ExpI> bench_skl 128
	448
	(0.03 secs, 1053144 bytes)
	Prelude ExpI> bench_skl 256
	1024
	(0.12 secs, 2297932 bytes)
	Prelude ExpI> bench_skl 512
	2304
	(0.56 secs, 3419484 bytes)
	-}

	main = do
	print exp_b
	print exp_mul4
	print exp_mul8
	print $ test_sklansky_i 4
	print dag_8
	print $ bench_skl 128
	{-# LANGUAGE NoMonomorphismRestriction #-}

	-- * Adding explicit sharing to our DSL

	module ExpLet where

	import ExpF

	-- * Why do we even need explicit sharing
	-- * Help the human reader (cf. Principia Mathematica)

	-- * Can't we just use Haskell's let?
	-- Haskell's lets is certainly explicit and so clearly
	-- helps the reader. Does it helps CSE eliminators?
	-- Does it help tagless interpreters?

	-- The multiplication-by-4 example written explicitly
	-- * exp_mul4 = mul 4 (variable "i1")

	exp_mul4 =
	let x = variable "i1" in
	let y = add x x in
	add y y

	-- * Question: Does Haskell guarantee sharing?
	-- * Question: can we rely on memoization?

	tree_mul4 = case exp_mul4 of ExpI t -> t
	{-
	Add (Add (Variable "i1") (Variable "i1"))
	(Add (Variable "i1") (Variable "i1"))
	-}

	dag_mul4 = run_expN exp_mul4
	-- (2,DAG BiMap[(0,NVar "i1"),(1,NAdd 0 0),(2,NAdd 1 1)])

	-- * //
	-- * Haskell's let does work, sometimes!
	-- The question about Haskell let is non-trivial because
	-- it sometimes works.

	-- To demonstrate how Haskell let helps some interpreters
	-- but not the others, we define two tagless-final
	-- interpreters.

	-- One computes the size of an expression (the number of the
	-- constructors)

	-- * Haskell's let does help some DSL interpreters
	-- * The size interpreter
	newtype Size t = Size Int

	instance Exp Size where
	constant _ = Size 1
	variable _ = Size 1
	add (Size x) (Size y) = Size (x+y+1)

	Size size_mul4 = exp_mul4 -- 7

	Size size_large = mul (2^30) (variable "i1")
	-- 2147483647
	-- Nearly instantaneous: only 0.01 secs

	-- * The print interpreter
	-- Another interpreter will print the expression
	newtype Print t = Print (IO ())
	instance Exp Print where
	constant = Print . putStr . show
	variable = Print . putStr
	add (Print x) (Print y) = Print (x >> putStr " + " >> y)

	-- This interpreter will take a long time to print
	-- (mul (2^30) (variable "i1")), _even_ if we redirect
	-- the output to /dev/null
	Print print_mul4 = exp_mul4
	-- i1 + i1 + i1 + i1
	-- We see the duplication in the output, thus the duplication
	-- of the effort to print.
	-- * Print pm30 = mul (2^30) (variable "i1")
	-- Here, running will take a long time, even if we print to /dev/null
	-- So, here Haskell's let did not help!

	-- We thus need a sharing form in the DSL itself

	-- * //
	-- * Adding a new form let_ to our DSL
	-- We extend the DSL language with a new expression form, let_,
	-- to indicate the explicit sharing
	-- Tagless-final interpreters are easily extensible
	class ExpLet repr where
	let_ :: repr a -> (repr a -> repr b) -> repr b

	-- Re-written exp_mul4 using the explicit sharing
	exp_mul4' =
	let_ (variable "i1") (\x ->
	let_ (add x x) (\y->
	add y y))

	-- * Extending the existing interpreters with let_
	-- As one might expect, let_ is basically the reverse application
	instance ExpLet R where
	let_ x f = f x

	-- As expected, exp_mul4 with and without explicit sharing evaluate
	-- to the same results. After all, sharing is an optimization,
	-- it should not affect the results of DSL programs
	val_mul4 = unR exp_mul4 [("i1",5)] -- 20
	val_mul4' = unR exp_mul4' [("i1",5)] -- 20

	-- * How to see sharing
	-- We would like to `see' the sharing.
	-- We have to define a show-like function then, an interpreter
	-- of tagless-final expressions as strings.
	-- Actually, we need a bit more than strings: to show sharing
	-- (as let-expressions) we need a way to generate `pointers',
	-- or local variable names.

	type LetVarCount = Int -- counter for generating let-var-names
	newtype S t = S{unS :: LetVarCount -> String}

	instance Exp S where
	constant = S . const . show
	variable = S . const
	add e1 e2 = S(\c -> unS e1 c ++ " + " ++ unS e2 c)

	instance ExpLet S where
	let_ e f = S(\c -> let vname = "v" ++ show c
	in unwords ["let",vname,"=",unS e c,
	"in",unS (f (S (const vname))) (succ c)])

	run_expS :: S t -> String
	run_expS (S m) = m 0

	sh_mul4 = run_expS exp_mul4
	-- "i1 + i1 + i1 + i1"

	sh_mul4' = run_expS exp_mul4'
	-- "let v0 = i1 in let v1 = v0 + v0 in v1 + v1"

	-- * //
	-- * Payoff: extending the N interpreter to handle explicit sharing
	-- Sharing a computation means performing a computation and sharing
	-- result. The code below says exactly that.
	instance ExpLet N where
	let_ e f = N(do
	x <- unN e
	unN $ f (N (return x)))

	-- Now, we can evaluate exp_mul4' as a DAG
	-- The result is the same as dag_mul4.
	dag_mul4' = run_expN exp_mul4'
	-- (2,DAG BiMap[(0,NVar "i1"),(1,NAdd 0 0),(2,NAdd 1 1)])

	-- * Benchmarking
	-- To really see the difference, we need bigger multiplications

	-- We don't want to write the expressions like \|exp_mul4'\| by hand,
	-- we want to generate them.
	-- We rewrite the \|mul\| generator to use the explicit sharing.
	-- The difference from \|mul\| is on the last-but-one line.
	mul' :: (ExpLet repr, Exp repr) => Int -> repr Int -> repr Int
	mul' 0 _ = constant 0
	mul' 1 x = x
	mul' n x \| n `mod` 2 == 0 = let_ x (\x' -> mul' (n `div` 2) (add x' x'))
	mul' n x = add x (mul' (n-1) x)

	-- Is there another way to write mul'?
	-- mul' n x \| n `mod` 2 == 0 = let_ (add x x) (\y -> mul' (n `div` 2) y)
	-- "let v0 = i1 + i1 in let v1 = v0 + v0 in v1"

	sh_mul4'' = run_expS (mul' 4 (variable "i1"))
	-- "let v0 = i1 in let v1 = v0 + v0 in v1 + v1"

	-- * //
	-- * Benchmarking. Compare with bench12 and bench13 from ExpF.hs
	bench_mul' n = do_bench $
	run_expN (mul' n (variable "i"))

	bench12 = bench_mul' (2^12)
	bench13 = bench_mul' (2^13)

	-- The exponential speed-up is apparent
	{-
	*ExpLet> bench12
	13
	(0.02 secs, 3171764 bytes)
	*ExpLet> bench13
	14
	(0.01 secs, 534548 bytes)
	-}

	-- The mul (2^30) is the expression tree with 2^30 leaves!
	{-
	*ExpLet> bench_mul' (2^20)
	21
	(0.01 secs, 1585752 bytes)
	*ExpLet> bench_mul' (2^30)
	31
	(0.01 secs, 1055932 bytes)
	-}

	-- * Some sharing is left to discover:
	{-
	*ExpLet> run_expS (mul' 15 (variable "i"))
	"i + let v0 = i in v0 + v0 +
	let v1 = v0 + v0 in v1 + v1 + let v2 = v1 + v1 in v2 + v2"
	-}

	-- It is found and eliminated
	{-
	*ExpLet> run_expN (mul' 15 (variable "i"))
	(6,DAG BiMap[
	(0,NVar "i"),
	(1,NAdd 0 0),
	(2,NAdd 1 1),
	(3,NAdd 2 2),
	(4,NAdd 2 3),
	(5,NAdd 1 4),
	(6,NAdd 0 5)])
	-}

	-- rather quickly:
	{-
	*ExpLet> bench_mul' (2^30-1)
	59
	(0.01 secs, 1078232 bytes)
	-}


	main = do
	print tree_mul4
	print dag_mul4
	print val_mul4
	print val_mul4'
	print sh_mul4'
	print sh_mul4''

	print bench13
	{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-}
	{-# LANGUAGE RankNTypes #-}

	module ExpSYMFinal where

	import Control.Monad

	-- typeclass representing final encoding of the Exp dsl
	-- 'repr' is the semantic domain
	class ExpSYM repr where
	lit :: Int -> repr
	neg :: repr -> repr
	add :: repr -> repr -> repr

	-- interpret Exp AST in the Int semantic domain
	instance ExpSYM Int where
	lit n = n
	neg e = - e
	add e1 e2 = e1 + e2
	-- run the Int interpretor
	eval :: Int -> Int
	eval = id

	-- interpret Exp AST in the String semantic domain
	instance ExpSYM String where
	lit n = show n
	neg e = "(-" ++ e ++ ")"
	add e1 e2 = "(" ++ e1 ++ " + " ++ e2 ++ ")"
	-- run the String interpretor
	view :: String -> String
	view = id

	tf1 :: forall repr. ExpSYM repr => repr
	tf1 = add (lit 8) (neg (add (lit 1) (lit 2)))

	-- add multiplication operation to the Exp dsl
	class MulSYM repr where
	mul :: repr -> repr -> repr

	-- mul for Int domain
	instance MulSYM Int where
	mul e1 e2 = e1 * e2
	-- mul for String domain
	instance MulSYM String where
	mul e1 e2 = "(" ++ e1 ++ " * " ++ e2 ++ ")"

	tfm1 :: forall repr. (MulSYM repr, ExpSYM repr) => repr
	tfm1 = add (lit 7) (neg (mul (lit 1) (lit 2)))

	-- serialization problem
	data Tree = Leaf String
	\| Node String [Tree]
	deriving (Eq, Read, Show)

	instance ExpSYM Tree where
	lit n = Node "Lit" [Leaf $ show n]
	neg e = Node "Neg" [e]
	add e1 e2 = Node "Add" [e1,e2]
	-- run the Tree interpretor
	toTree :: Tree -> Tree
	toTree = id

	tf1_tree :: Tree
	tf1_tree = toTree tf1 -- with the type sig the call to toTree is optional

	type ErrMsg = String
	safeRead :: Read a => String -> Either ErrMsg a
	safeRead s = case reads s of
	[(x, "")] -> Right x
	_ -> Left $ "Read error " ++ s

	-- deserializer, closed recursion style
	fromTree :: (ExpSYM repr) => Tree -> Either ErrMsg repr
	fromTree (Node "Lit" [Leaf n]) = liftM lit $ safeRead n
	fromTree (Node "Neg" [e]) = liftM neg $ fromTree e
	fromTree (Node "Add" [e1, e2]) = liftM2 add (fromTree e1) (fromTree e2)
	fromTree e = Left $ "Invalid tree: " ++ show e

	tf1_eval :: IO ()
	tf1_eval = case fromTree tf1_tree of
	Left e -> putStrLn $ "Error: " ++ e
	Right x -> do
	print $ eval x
	-- below doesn't work!
	-- print $ view x

	-- duplicating interpreter, creates monomorphic copies
	instance (ExpSYM repr, ExpSYM repr') => ExpSYM (repr,repr') where
	lit x = (lit x, lit x)
	neg (e1,e2) = (neg e1, neg e2)
	add (e11,e12) (e21, e22) = (add e11 e21, add e12 e22)
	-- run the duplicating interpreter
	duplicate :: (ExpSYM repr, ExpSYM repr') => (repr,repr') -> (repr,repr')
	duplicate = id

	-- error reporting for deserializer
	check_consume f (Left e) = putStrLn $ "Error: " ++ e
	check_consume f (Right x) = f x

	-- dup_consume makes no restrictions on what the type of x2 is
	dup_consume ev x = print (ev x1) >> return x2
	where (x1,x2) = duplicate x

	-- this works, because we basically instantiate \|dup_consume\| + 1 x's (three in
	-- this case, hence the name) and then use them appropriately.
	-- TODO: make them use a common copy?
	thrice x = dup_consume eval x >>= dup_consume view >>= print . toTree

	tf1_int3 = check_consume thrice . fromTree $ tf1_tree

	-- deserializer, open recursion style. replace all self-recursive calls to
	-- applications of the explicit reference to the *-version of itself (this will
	-- be filled in later using Y-combinator)
	-- this is needed for the deserializer to be extensible
	fromTree' :: (ExpSYM repr)
	=> (Tree -> Either ErrMsg repr)
	-> (Tree -> Either ErrMsg repr)
	fromTree' self (Node "Lit" [Leaf n]) = liftM lit $ safeRead n
	fromTree' self (Node "Neg" [e]) = liftM neg $ self e
	fromTree' self (Node "Add" [e1, e2]) = liftM2 add (self e1) (self e2)
	fromTree' self e = Left $ "Invalid tree: " ++ show e
	-- now tie the not explicitly using Y-combinator
	-- if f takes a first argument which is the *-version of itself then fill that
	-- argument's place
	fix f = f (fix f)
	-- now fromTree' = fromTree (*-version of fromTree)
	fromTree'' = fix fromTree'

	tf1E_int3 = check_consume thrice . fromTree'' $ tf1_tree
	-- this results in error as expected
	tfxE_int3 = check_consume thrice . fromTree'' $
	Node "Lit" [Leaf "1", Leaf "2"]

	-- let's extend the deserializer now to include multiplication
	-- 1] extend wire format for multiplication
	instance MulSYM Tree where
	mul e1 e2 = Node "Mul" [e1,e2]
	-- 2] extend duplicator for multiplication
	instance (MulSYM repr, MulSYM repr') => MulSYM (repr, repr') where
	mul (e11, e12) (e21, e22) = (mul e11 e21, mul e12 e22)
	-- 3] extend serializer
	fromTreeExt self (Node "Mul" [e1,e2]) = liftM2 mul (self e1) (self e2)
	fromTreeExt self e = fromTree' self e
	fromTreeExt' = fix fromTreeExt
	-- use same 'check_consume' for error reporting and previously defined
	-- computation (i.e., 'thrice') with new interpreter
	tf1'_int3 = check_consume thrice . fromTreeExt' $ tf1_tree
	tfm1_int3 = check_consume thrice . fromTreeExt' $ toTree tfm1

	-- make the context on which the operation depends. in this case it depends on
	-- the sign
	data Ctx = Pos \| Neg

	-- interpretor transformer
	instance ExpSYM repr => ExpSYM (Ctx -> repr) where
	-- define how operations in current semantics (ExpSYM) interact with
	-- transformation
	lit n Pos = lit n
	lit n Neg = neg (lit n)
	add e1 e2 ctx = add (e1 ctx) (e2 ctx)
	-- add definition for the transformation. in this case the transformation adds
	-- contextual information where there previously was none. addition is also a
	-- homomorphism wrt push_neg
	neg e Pos = e Neg
	neg e Neg = e Pos
	-- the interpretor for pushing negation down. this needs to supply the base
	-- context
	push_neg :: forall t. (Ctx -> t) -> t
	push_neg e = e Pos

	-- extend the transformer to include multiplication
	instance MulSYM repr => MulSYM (Ctx -> repr) where
	-- it's clear that multiplication is not a homomorphism with respect to
	-- negation. we pattern match on the context
	mul e1 e2 Pos = mul (e1 Pos) (e2 Pos)
	mul e1 e2 Neg = mul (e1 Pos) (e2 Neg)

	main :: IO ()
	main = tf1_eval