Skip to content

Instantly share code, notes, and snippets.

@suhailshergill
Last active August 29, 2015 14:06
Show Gist options
  • Save suhailshergill/94ad1260fcb69598c01e to your computer and use it in GitHub Desktop.
Save suhailshergill/94ad1260fcb69598c01e to your computer and use it in GitHub Desktop.
haskell: TTFI
-- The illustration of the Boehm-Berarducci encoding
-- This file is the baseline: using ordinary algebraic
-- data type and operations of constructions, deconstructions
-- and transformation.
-- We use the running example of the Exp data type from
-- tagless-final lectures
module BB_ADT where
-- Sample data type of expressions
data Exp = Lit Int
| Neg Exp
| Add Exp Exp
-- Constructing a sample expression
ti1 = Add (Lit 8) (Neg (Add (Lit 1) (Lit 2)))
-- A sample consumer
-- It is structurally recursive
view:: Exp -> String
view (Lit n) = show n
view (Neg e) = "(-" ++ view e ++ ")"
view (Add e1 e2) = "(" ++ view e1 ++ " + " ++ view e2 ++ ")"
ti1_view = view ti1
-- "(8 + (-(1 + 2)))"
-- Transformer
-- * Pushing the negation down
-- Previously, expressions were constructed according to this grammar:
-- * General grammar of expressions
-- * e ::= int | neg e | add e e
-- *
-- * Restricted grammar:
-- * e ::= factor | add e e
-- * factor ::= int | neg int
-- Now, only integer literals can be negated, and only once.
-- This function is NOT structurally recursive
push_neg :: Exp -> Exp
push_neg e@Lit{} = e
push_neg e@(Neg (Lit _)) = e
push_neg (Neg (Neg e)) = push_neg e
push_neg (Neg (Add e1 e2)) = Add (push_neg (Neg e1)) (push_neg (Neg e2))
push_neg (Add e1 e2) = Add (push_neg e1) (push_neg e2)
-- A few sample transformations and their results
ti1_norm = push_neg ti1
ti1_norm_view = view ti1_norm
-- "(8 + ((-1) + (-2)))"
-- Add an extra negation
ti1n_norm_view = view (push_neg (Neg ti1))
-- "((-8) + (1 + 2))"
{-# LANGUAGE GADTs #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoMonomorphismRestriction #-}
-- {-# LANGUAGE ImpredicativeTypes #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
-- {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE NoImplicitPrelude #-}
-- {-# LANGUAGE OverlappingInstances #-}
module BB_ExpSYM where
import Control.Monad.IO.Class
import Control.Monad.Identity
import Control.Monad.Free
import Control.Monad.State
import Control.Monad.Writer
import Prelude ((.), ($), ($!), Bool(..), (&&), String, (++), Show(..))
import Control.Applicative
import qualified Prelude as P
--------------------------------------------------------------------------------
-- let's start with final semantics parameterizing on evaluation strategy
class LamSym repr where
lam :: (repr a -> repr b) -> repr (a -> b)
app :: repr (a -> b) -> repr a -> repr b
-- A convenient abbreviation
let_ :: LamSym repr => repr a -> (repr a -> repr b) -> repr b
let_ x y = (lam y) `app` x
app3 :: LamSym repr
=> repr (a -> b -> c)
-> repr a -> repr b -> repr c
app3 x = app . (app x)
lam3 :: LamSym repr
=> (repr a -> repr b -> repr c)
-> repr (a -> b -> c)
lam3 x = lam (lam . x)
data a :-> b
infixr 5 :->
newtype BoolBB = BoolBB {
unBoolBB :: forall a. (a -> a -> a)
}
class BoolSym repr where
bool :: Bool -> repr BoolBB
not :: repr BoolBB -> repr BoolBB
true :: repr BoolBB
false :: repr BoolBB
and :: repr BoolBB -> repr BoolBB -> repr BoolBB
type family Sem (m :: * -> *) a :: *
type instance Sem m BoolBB = Bool
type instance Sem m (a -> b) = m (Sem m a) -> m (Sem m b)
-- 'S' isn't really a monad, but it is up to Sem interpretation. so we can
-- define a Monad instance which simply applies 'unS' and 'S' as needed. hmm
-- it's not as straightforward because we need some way to convert a type 'a'
-- into 'Sem m a', whatever that may mean.
newtype S l m a = S { unS :: m (Sem m a) }
newtype BoolSem a = BoolSem { unBoolSem :: Bool }
instance BoolSym BoolSem where
bool = BoolSem
true = bool True
false = bool False
not = BoolSem . P.not . unBoolSem
and x y = BoolSem $ (unBoolSem x) && (unBoolSem y)
-- let's use BoolSem semantics for (S l (State String))
withBoolSem :: (Monad m) => BoolSem BoolBB -> m (Sem m BoolBB)
withBoolSem = return . unBoolSem
-- the reason we need to give below definition in terms of 'S' is because that
-- is what ties 'repr BoolBB' to 'Bool'. if we had another way of connecting the
-- two, then we could possibly define BoolSym in terms of (Monad repr). so one
-- way of connecting the two would be using the instance constraint, not sure if
-- this works however
instance BoolSym (S l (State String)) where
-- treating 'bool' as compile-time 'constant', so no logging
bool = S . withBoolSem . bool
true = S $ do
modify (++ "True" ++ "; ")
withBoolSem true
false = S $ do
modify (++ "False" ++ "; ")
withBoolSem false
not x = S $ do
arg <- unS x
modify (++ "(not " ++ show arg ++ "); ")
withBoolSem . not . BoolSem $ arg
and x y = S $ do
a <- unS x
b <- unS y
modify (++ "(and " ++ show a ++ " " ++ show b ++ "); ")
withBoolSem $ (BoolSem a) `and` (BoolSem b)
data Name
instance LamSym (S Name (State String)) where
-- lam :: (S a -> S b) -> S (a -> b)
-- lam f = S . return $ (unS . f . S)
lam f = S $ do
return $ \x -> unS(f(S x))
-- app :: S (a -> b) -> S a -> S b
-- app x y = S $ unS x >>= ($ (unS y))
app x y = S $ do
a <- unS x
a (unS y)
runName :: S Name m a -> m (Sem m a)
runName = unS
viewName x = (runState . runName) x "Trace :: "
data Value
-- We reuse most of EDSL (S Name) except for lam
vn :: S Value m x -> S Name m x
vn = S . unS
nv :: S Name m x -> S Value m x
nv = S . unS
instance LamSym (S Value (State String)) where
-- lam :: S a -> S b -> S (a -> b)
-- lam f = S . return $ (\x -> x >>= unS . f . S . return)
-- S (a -> b) = S{ m(Sem m (a -> b)) }
-- Sem m (a -> b) = m(Sem m a) -> m(Sem m b)
-- S (a -> b) = S{ m( m(Sem m a) -> m(Sem m b) ) }
-- S (B -> (B -> (B -> B)))
-- S{ m( m(Sem m B) -> m(Sem m (B -> (B -> B))) ) }
-- S{ m( m(Sem m B) -> m( m(Sem m B) -> m(Sem m (B -> B)) ) ) }
-- S{ m( m(B) -> m( m(B) -> m( m(B) -> m(B) ) ) ) }
lam f = S $ do
return $ \x -> do
y <- x
unS(f(S(return y)))
-- app :: S (a -> b) -> S a -> S b
app x y = nv $ app (vn x) (vn y)
runValue :: S Value m a -> m (Sem m a)
runValue = unS
viewValue x = (runState . runValue) x "Trace :: "
-- so this is just plain wrong. it works, but evaluation hasn't been
-- parameterized
-- t = (and2 (not2 true2) (not2 true2))
--
-- this doesn't work because we're passing in arguments twice
-- explicitly. basically this needs a CSE pass, which can be done in final style
-- by building sharing DAG from bottom up during interpretation
t0 = lam (lam . and)
`app` (lam not `app` true)
`app` (lam not `app` true)
t1 = lam (\x -> let_ (not x)
$ \y -> y `and` y) `app` true
t2 = lam (\_ -> lam (\x -> let_ (not x)
$ \y -> y `and` y))
`app` false
`app` true
{-# LANGUAGE Rank2Types #-}
-- The illustration of the Boehm-Berarducci encoding
-- We re-write an Algebraic Data Type example in BB_ADT.hs
-- using Boehm-Berarducci encoding to represent the
-- data type with lambda-terms.
-- Boehm-Berarducci encoding represents an inductive data type
-- as a non-recursive but higher-rank data type
module BB_LAM where
import BB_ADT (Exp(..))
-- We define the Boehm-Berarducci encoding of the data type
-- Exp from BB_ADT.hs in two steps.
-- First we clarify the signature of the Exp algebra: the
-- types of the algebra constructors. We represent the signature
-- as a Haskell record. It is NOT recursive.
data ExpDict a = ExpDict {
dlit :: Int -> a
, dneg :: a -> a
, dadd :: a -> a -> a
}
-- We can define (a version of) Boehm-Berarducci encoding as
type ExpBB1 = forall a. (ExpDict a -> a)
-- To use only applications and abstractions, we curry the record
-- argument ExpDict a.
-- The wrapper newtype is needed for the first-class polymorphism
-- (actually we can go some way without newtype wrapper but eventually
-- GHC becomes confused. After all, the source language of GHC is not
-- system F.)
newtype ExpBB
= ExpBB {
unExpBB :: forall a.
(Int -> a)
-> (a -> a)
-> (a -> a -> a)
-> a
}
-- A data type declaration defines type, constructors, deconstructors
-- and the induction principle (fold). We already have a new type ExpBB,
-- which represents fold, as will become clear soon. We need constructors:
lit :: Int -> ExpBB
lit x = ExpBB $ \dlit dneg dadd -> dlit x
neg :: ExpBB -> ExpBB
neg e = ExpBB $ \dlit dneg dadd -> dneg (unExpBB e dlit dneg dadd)
add :: ExpBB -> ExpBB -> ExpBB
add e1 e2 = ExpBB $ \dlit dneg dadd ->
dadd (unExpBB e1 dlit dneg dadd) (unExpBB e2 dlit dneg dadd)
-- A sample expression: our first running example
tbb1 = add (lit 8) (neg (add (lit 1) (lit 2)))
-- A sample consumer
-- viewBB: can be implemented simply via the induction principle
-- It is not recursive!
viewBB:: ExpBB -> String
viewBB e = unExpBB e dlit dneg dadd
where
dlit n = show n
dneg e = "(-" ++ e ++ ")"
dadd e1 e2 = "(" ++ e1 ++ " + " ++ e2 ++ ")"
tbb1v = viewBB tbb1
-- "(8 + (-(1 + 2)))"
-- How to define a deconstructor?
-- To give an intuition, we define the following functions roll and unroll.
-- The names meant to invoke terms witnessing the isomorphism between an
-- iso-recursive type and its one-step unrolling.
-- The deconstructor will use a form of these functions.
{- The following shows that we can replace type-level induction
(in BB_ADT.Exp) with a term-level induction.
The following definitions of roll and unroll are actually present
in Boehm-Berarducci's paper -- but in a very indirect way.
The crucial part is Defn 7.1: sort of self-interpreter: v cons nil ~= v
Informally, it shows that roll . unroll should be equivalent to the
identity. The equivalence relation (~=) is strictly speaking not
the equality; in modern terms, (~=) is contextual equivalence.
-}
-- First we re-write the signature of the Exp algebra, ExpDic,
-- in the form of a strictly-positive functor
data ExpF a = FLit Int
| FNeg a
| FAdd a a
instance Functor ExpF where
fmap f (FLit n) = FLit n
fmap f (FNeg e) = FNeg (f e)
fmap f (FAdd e1 e2) = FAdd (f e1) (f e2)
roll :: ExpF ExpBB -> ExpBB
roll (FLit n) = lit n
roll (FNeg e) = neg e
roll (FAdd e1 e2) = add e1 e2
-- The occurrence of roll below is the source of major inefficiency
-- as we do repeated pattern-matching.
unroll :: ExpBB -> ExpF ExpBB
unroll e = unExpBB e dlit dneg dadd
where
dlit :: Int -> ExpF ExpBB
dlit n = FLit n
dneg :: ExpF ExpBB -> ExpF ExpBB
dneg e = FNeg (roll e)
dadd :: ExpF ExpBB -> ExpF ExpBB -> ExpF ExpBB
dadd e1 e2 = FAdd (roll e1) (roll e2)
-- Compare with the signature for ExpBB!
-- Informally, ExpBB unfolds all the way whereas ExpD unfolds only one step
newtype ExpD =
ExpD {
unED :: forall w.
(Int -> w)
-> (ExpBB -> w)
-> (ExpBB -> ExpBB -> w)
-> w
}
decon :: ExpBB -> ExpD
decon e = unExpBB e dlit dneg dadd
where
dlit n = ExpD $ \onlit onneg onadd -> onlit n
dneg e = ExpD $ \onlit onneg onadd -> onneg (unED e lit neg add)
dadd e1 e2 = ExpD $ \onlit onneg onadd ->
onadd (unED e1 lit neg add) (unED e2 lit neg add)
-- We redo viewBB, using pattern-matching this time, and recursion
viewBBd:: ExpBB -> String
viewBBd e = unED (decon e) dlit dneg dadd
where
dlit n = show n
dneg e = "(-" ++ viewBBd e ++ ")"
dadd e1 e2 = "(" ++ viewBBd e1 ++ " + " ++ viewBBd e2 ++ ")"
tbb1vd = viewBBd tbb1
-- "(8 + (-(1 + 2)))"
-- So, we can do any pattern-matching!
-- And do non-structurally--recursive processing on ExpBB
{-
push_neg :: Exp -> Exp
push_neg e@Lit{} = e
push_neg e@(Neg (Lit _)) = e
push_neg (Neg (Neg e)) = push_neg e
push_neg (Neg (Add e1 e2)) = Add (push_neg (Neg e1)) (push_neg (Neg e2))
push_neg (Add e1 e2) = Add (push_neg e1) (push_neg e2)
-}
push_negBB :: ExpBB -> ExpBB
push_negBB e = unED (decon e) dlit dneg dadd
where
dlit _ = e
dneg e2 = unED (decon e2) dlit2 dneg2 dadd2
where
dlit2 n = e
dneg2 e = push_negBB e
dadd2 e1 e2 = add (push_negBB (neg e1)) (push_negBB (neg e2))
dadd e1 e2 = add (push_negBB e1) (push_negBB e2)
tbb1_norm = push_negBB tbb1
tbb1_norm_viewBB = viewBBd tbb1_norm
-- "(8 + ((-1) + (-2)))"
-- Add an extra negation
tbb1n_norm_viewBB = viewBBd (push_negBB (neg tbb1))
-- "((-8) + (1 + 2))"
-- ------------------------------------------------------------------------
-- A bit of algebra
-- The types (ExpF a -> a) and ExpDict a are isomorphic.
-- Hence, the operations of an Exp algebra with the carrier U
-- could be specified either in the form of a value of the type ExpDict U,
-- or in the form of a function ExpF U -> U.
sigma_dict :: (ExpF a -> a) -> ExpDict a
sigma_dict sigma = ExpDict {
dlit = \n -> sigma (FLit n)
, dneg = \e -> sigma (FNeg e)
, dadd = \e1 e2 -> sigma (FAdd e1 e2)
}
dict_sigma :: ExpDict a -> (ExpF a -> a)
dict_sigma dict (FLit n) = dlit dict n
dict_sigma dict (FNeg e) = dneg dict e
dict_sigma dict (FAdd e1 e2) = dadd dict e1 e2
-- Recall ExpBB1 as an uncurried form of the Boehm-Berarducci encoding.
-- It follows then the encoding can be written in the equivalent form
newtype ExpC = ExpC {
unExpC :: forall a.
(ExpF a -> a)
-> a
}
-- ExpC is indeed fully equivalent to ExpBB, and inter-convertible
h :: ExpDict a -> ExpBB -> a
h dict e = unExpBB e (dlit dict) (dneg dict) (dadd dict)
exp_BB_C :: ExpBB -> ExpC
exp_BB_C e = ExpC $ \f -> h (sigma_dict f) e
exp_C_BB :: ExpC -> ExpBB
exp_C_BB e = ExpBB $ \dlit dneg dadd ->
unExpC e (dict_sigma (ExpDict dlit dneg dadd))
-- Let us write the constructors explicitly:
sigma_expC :: ExpF ExpC -> ExpC
sigma_expC (FLit n) = ExpC $ \f -> f (FLit n)
sigma_expC (FNeg e) = ExpC $ \f -> f (FNeg (unExpC e f))
sigma_expC (FAdd e1 e2) = ExpC $ \f -> f (FAdd (unExpC e1 f) (unExpC e2 f))
-- Sample term
t1c = exp_BB_C tbb1
-- The advantage of ExpC is that functions roll and unroll take
-- a particularly elegant form
rollC :: ExpF ExpC -> ExpC
rollC = sigma_expC
unrollC :: ExpC -> ExpF ExpC
unrollC e = unExpC e (fmap sigma_expC)
-- (ExpC, sigma_ExpC) is the initial algebra of the functor ExpF.
-- (the same holds for ExpBB since it is isomorphic to ExpC)
-- Indeed, consider an arbitrary ExpF algebra with the carrier U
-- and the operations sigma :: ExpF U -> U.
-- We show that there exists a unique homomorphism
-- (hc sigma) :: ExpC -> U such that the following holds
-- hc sigma . sigma_ExpC = sigma . fmap (hc sigma)
-- Consider x = FLit n :: ExpF ExpC.
-- Then (sigma . fmap (hc sigma)) x = sigma (FLit n)
-- Thus we obtain
-- hc sigma (ExpC $ \f -> f (FLit n)) = sigma (FLit n)
-- from which we get
-- hc sigma e = unExpC e sigma
-- Consider x = FNeg e :: ExpF ExpC.
-- Then (sigma . fmap (hc sigma)) x = sigma (FNeg (hc sigma e))
-- Thus we obtain
-- hc sigma (ExpC $ \f -> f (FNeg (unExpC e f))) = sigma (FNeg (hc sigma e))
-- Again, the only way to satisfy the equality for all sigma and e is to set
-- hc sigma e = unExpC e sigma
-- The case of x = FAdd e1 e2 :: ExpF ExpC is similar.
-- Thus we get the unique morphism from (ExpC, sigma_ExpC) to
-- any other ExpF algebra.
hc :: (ExpF a -> a) -> ExpC -> a
hc sigma e = unExpC e sigma
-- Here is the illustration of the morphism.
-- Define an ExpF algebra,
-- with Strings as the carrier and the following operations
sigma_view :: ExpF String -> String
sigma_view (FLit n) = show n
sigma_view (FNeg e) = "(-" ++ e ++ ")"
sigma_view (FAdd e1 e2) = "(" ++ e1 ++ " + " ++ e2 ++ ")"
-- We immediately get the function to view the values of ExpC type:
viewC :: ExpC -> String
viewC = hc sigma_view
t1c_view = viewC t1c
-- "(8 + (-(1 + 2)))"
-- (ExpBB, sigma_ExpBB) is also initial algebra: there is a unique
-- homomorphism from it to any other Exp algebra. Here is this morphism
-- h dict . roll = (dict_sigma dict) . fmap (h dict)
How to take a TAIL of a functional stream
Or how to obtain the first few elements of a functional (success/failure
continuation stream) without first converting it to the regular
Cons/Nil stream. The latter conversion is unreliable: it diverges for
a monadic functional stream over a strict monad. Our technique works
even in the latter case.
$Id: car-fstream.lhs,v 1.3 2012/04/04 03:40:11 oleg Exp oleg $
> {-# LANGUAGE Rank2Types #-}
> module Streams where
> import Control.Monad.Identity
The most common way to represent (a potentially infinite) stream is as
a data structure of the following _recursive_ type:
> data Stream a = Nil | Cons a (() -> Stream a)
The thunking is explicit so the definition applies to strict
languages too. We can also represent a stream pure functionally.
The following is _not_ a recursive type, but a higher-ranked one.
> newtype FStream a = SFK (forall ans. SK a ans -> FK ans -> ans)
> type FK ans = () -> ans -- failure continuation
> type SK a ans = a -> FK ans -> ans -- success continuation
> unSFK (SFK a) = a
If we ignore the newtype tagging (needed to properly deal with the
higher-ranked types in Haskell), an FStream is a function of two
continuations, success and failure continuations. The latter is
invoked if the stream has no elements. The stream invokes the success
continuation if the stream has at least one element. The continuation
receives that element and another failure continuation, to ask for
more elements. As an example of such a functional stream, see Ralf
Hinze's ``Deriving Backtracking Monad Transformers'' (ICFP'00).
Obviously there is a correspondence between the two
representations. For example:
> fstream_to_stream fstream = unSFK fstream sk fk
> where fk () = Nil
> sk a fk' = Cons a fk'
>
> stream_to_fstream Nil = SFK( \sk fk -> fk () )
> stream_to_fstream (Cons a rest) =
> SFK(\sk fk ->
> sk a (\ () -> unSFK (stream_to_fstream (rest ())) sk fk))
A test data stream
> s1 = Cons 'a' (\ () -> Cons 'b' (\ () -> Cons 'c' (\ () -> Nil)))
> f1 = stream_to_fstream s1
> s1' = fstream_to_stream f1
>
> test1 = unSFK f1 (\a k -> a:(k())) (\() -> [])
*Streams> test1
"abc"
Mitch Wand and D.Vaillancourt (see ICFP'04) have described the
correspondence in more formal terms.
There is an obvious difference between the two representations
however. It is easy to find the first two elements of a Stream:
> stream_take:: Int -> Stream a -> [a]
> stream_take 0 _ = []
> stream_take n (Cons a r) = a : (stream_take (n-1) $ r ())
It seems all but impossible to find the first two elements of an
FStream without first converting it all to Stream. That conversion
becomes problematic if we had a monadic functional stream:
> newtype MFStream m a = MSFK (forall ans. MSK m a ans -> MFK m ans -> m ans)
> type MFK m ans = m ans -- failure continuation
> type MSK m a ans = a -> MFK m ans -> m ans -- success continuation
> unMSFK (MSFK a) = a
>
>
> mfstream_to_stream:: (Monad m) => MFStream m a -> m (Stream a)
> mfstream_to_stream mfstream = unMSFK mfstream sk fk
> where fk = return Nil
> sk a fk' = fk' >>= (\rest -> return (Cons a (\ () -> rest)))
if m is a strict monad and fstream is infinite, the conversion to Stream
diverges. Indeed,
> nats = nats' 0 where
> nats' n = MSFK (\sk fk -> sk n (unMSFK (nats' (n+1)) sk fk))
>
> test2 = runIdentity (mfstream_to_stream nats >>= (return . stream_take 5))
*Streams> test2
[0,1,2,3,4]
but for a strict monad like IO:
> test3 = mfstream_to_stream nats >>= (print . stream_take 5)
it diverges without printing anything. Indeed, before we can print
even the first element from nats, we must convert it entirely to a
stream.
It turns out, we can split fstream as if it were just a data
structure. The following function returns an FStream of at most one item.
The item is a pair of the head of the original stream and of the
_rest stream_
> split_fstream :: FStream a -> FStream (a, FStream a)
> split_fstream fstream = unSFK fstream ssk caseB
> where caseB () = SFK(\a b -> b ())
> ssk v fk = SFK(\a b ->
> a (v, (SFK (\s' f' ->
> unSFK (fk ())
> (\ (v'',x) _ -> s' v''
> (\ () -> unSFK x s' f'))
> f')))
> (\ () -> b ()))
>
> f11 = split_fstream f1
> e1 = unSFK f11 (\ (a,_) _ -> a) undefined
> f1r = unSFK f11 (\ (_,r) _ -> r) undefined
> f1rs = split_fstream f1r
> e2 = unSFK f1rs (\ (a,_) _ -> a) undefined
Indeed, we can obtain the first two elements from the finite stream f1
*Streams> e1
'a'
*Streams> e2
'b'
Monadic case (quoted almost directly from the paper with Chung-chieh Shan,
Amr A. Sabry, Daniel P. Friedman)
> split_mfstream :: Monad m => MFStream m a -> m (MFStream m (a, MFStream m a))
> split_mfstream m = unMSFK m ssk caseB
> where caseB = return $ MSFK(\a b -> b)
> ssk v fk = return $ MSFK(\a b ->
> a (v, (MSFK (\s' f' ->
> fk >>= (\r -> unMSFK r
> (\ (v'',x) _ -> s' v'' (unMSFK x s' f'))
> f'))))
> b)
> mfstream_take:: Monad m => Int -> MFStream m a -> m [a]
> mfstream_take 0 _ = return []
> mfstream_take n m = do
> m' <- split_mfstream m
> (v,rs) <- unMSFK m' (\ (v,rs) _ -> return (v,rs) )
> undefined
> rl <- mfstream_take (n-1) rs
> return (v:rl)
> test2' = runIdentity (mfstream_take 5 nats)
*Streams> test2'
[0,1,2,3,4]
> test3' = mfstream_take 5 nats >>= print
But now also
*Streams> test3'
[0,1,2,3,4]
{-# LANGUAGE TypeFamilies, EmptyDataDecls, TypeOperators #-}
{-# LANGUAGE FlexibleInstances #-}
-- Embedding a higher-order domain-specific language (simply-typed
-- lambda-calculus with constants) with a selectable evaluation order:
-- Call-by-value, call-by-name, call-by-need in the same Final Tagless framework
module CB where
import Data.IORef
import Control.Monad.IO.Class
-- Our EDSL is typed. EDSL types are built from the following two
-- type constructors:
data IntT
data a :-> b
infixr 5 :->
-- We could have used Haskell's type Int and the arrow -> constructor.
-- We would like to emphasize however that EDSL types need not be identical
-- to the host language types. To give the type system to EDSL, we merely
-- need `labels' -- which is what IntT and :-> are
-- The (higher-order abstract) syntax of our DSL
class EDSL exp where
-- if we were using (->) instead of (:->) this would be
-- (f a -> f b) -> f (a -> b)
lam :: (exp a -> exp b) -> exp (a :-> b)
-- can we make all of this work even if we use (->) instead of (:->) and
-- ($) instead of (app)?
app :: exp (a :-> b) -> exp a -> exp b
int :: Int -> exp IntT -- Integer literal
add :: exp IntT -> exp IntT -> exp IntT
sub :: exp IntT -> exp IntT -> exp IntT
-- A convenient abbreviation
let_ :: EDSL exp => exp a -> (exp a -> exp b) -> exp b
let_ x y = (lam y) `app` x
-- A sample EDSL term
t :: EDSL exp => exp IntT
t = lam (\x -> let_ (x `add` x)
$ \y -> y `add` y) `app` int 10
-- typed BB style booleans?
true :: EDSL exp => exp (a :-> a :-> a)
true = lam (\x -> lam (\y -> x))
false :: EDSL exp => exp (a :-> a :-> a)
false = lam (\x -> lam (\y -> y))
-- not2 :: EDSL exp => exp ((a :-> a :-> a) :-> (b :-> b :-> b))
-- not2 = lam (\x -> x `app` true) `app` _foo
-- Interpretation of EDSL types as host language types
-- The type interpretation function Sem is parameterized by 'm',
-- which is assumed to be a Monad.
--
-- HMM: why do we need 'm' to be a monad? right now, 'm' is simply a type
-- constructor taking one parameter
-- - if we wanted to use an associated type (family?) then whom would we
-- associate this with? hmm with 'S l m' perhaps and turn it into a class
-- (instead of newtype)?
type family Sem (m :: * -> *) a :: *
type instance Sem m IntT = Int
-- if we wanted to map (->) to (->) (instead of (:->) this is where we'd do it
type instance Sem m (a :-> b) = m (Sem m a) -> m (Sem m b)
-- Interpretation of EDSL expressions as values of the host language (Haskell)
-- An EDSL expression of the type a is interpreted as a Haskell value
-- of the type S l m a, where m is a Monad (the parameter of the interpretation)
-- and l is the label for the evaluation order (one of Name, Value, or Lazy).
-- (S l m) is not quite a monad -- only up to the Sem interpretation
--
-- HMM: can we constrain 'l' somehow? and what would be the benefit of doing
-- that? that we disallow certain other type tags?
newtype S l m a = S { unS :: m (Sem m a) }
-- Call-by-name
data Name
instance MonadIO m => EDSL (S Name m) where
int = S . return
add x y = S $ do a <- unS x
b <- unS y
liftIO $ putStrLn "Adding"
return (a + b)
sub x y = S $ do a <- unS x
b <- unS y
liftIO $ putStrLn "Subtracting"
return (a - b)
-- (S . return) :: (m (Sem m a) -> m (Sem m b)) -> S Name m (a :-> b)
-- (unS . f . S) :: m (Sem m a) -> m (Sem m b)
-- S :: m (Sem m a) -> S Name m a
-- f :: S Name m a -> S Name m b
-- unS :: S Name m b -> m (Sem m b)
lam f = S . return $ (unS . f . S)
-- hmm (app) actually carries out (>>=), so probably can't map it to ($)
app x y = S $ unS x >>= ($ (unS y))
-- Tests
runName :: S Name m a -> m (Sem m a)
runName = unS
-- Evaluating:
-- t = (lam $ \x -> let_ (x `add` x)
-- $ \y -> y `add` y) `app` int 10
-- The addition (x `add` x) is performed twice because y is bound
-- to a computation, and y is evaluated twice
t0SN :: IO ()
t0SN = runName t >>= print
{-
Adding
Adding
Adding
40
-}
-- A more elaborate example
t1 :: EDSL exp => exp IntT
t1 = lam (\x -> let_ (x `add` x)
$ \y -> lam $ \z ->
z `add` (z `add` (y `add` y))) `app` (int 10 `sub` int 5)
`app` (int 20 `sub` int 10)
t1SN :: IO ()
t1SN = runName t1 >>= print
{-
*CB> t1SN
Subtracting
Subtracting
Subtracting
Subtracting
Adding
Subtracting
Subtracting
Adding
Adding
Adding
Adding
40
-}
-- A better example
t2 :: EDSL exp => exp IntT
t2 = lam (\_ -> lam $ \x -> let_ (x `add` x)
$ \y -> y `add` y)
`app` (int 100 `sub` int 10)
`app` (int 5 `add` int 5)
-- The result of subtraction was not needed, and so it was not performed
-- OTH, (int 5 `add` int 5) was computed four times
t2SN :: IO ()
t2SN = runName t2 >>= print
{-
*CB> t2SN
Adding
Adding
Adding
Adding
Adding
Adding
Adding
40
-}
-- Call-by-value
data Value
-- We reuse most of EDSL (S Name) except for lam
vn :: S Value m x -> S Name m x
vn = S . unS
nv :: S Name m x -> S Value m x
nv = S . unS
instance MonadIO m => EDSL (S Value m) where
int = nv . int
add x y = nv $ add (vn x) (vn y)
sub x y = nv $ sub (vn x) (vn y)
-- the good news is that the magic doesn't happen in (app), it's really
-- (lam) which differentiate the various CB schemes
app x y = nv $ app (vn x) (vn y)
-- This is the only difference between CBN and CBV:
-- lam first evaluates its argument, no matter what
-- This is the definition of CBV after all
lam f = S . return $ (\x -> x >>= unS . f . S . return)
runValue :: S Value m a -> m (Sem m a)
runValue = unS
-- We now evaluate the previously written tests t, t1, t2
-- under the new interpretation
t0SV :: IO ()
t0SV = runValue t >>= print
{-
*CB> t0SV
Adding
Adding
40
-}
t1SV :: IO ()
t1SV = runValue t1 >>= print
{-
*CB> t1SV
Subtracting
Adding
Subtracting
Adding
Adding
Adding
40
-}
-- Although the result of subs-traction was not needed, it was still performed
-- OTH, (int 5 `add` int 5) was computed only once
t2SV :: IO ()
t2SV = runValue t2 >>= print
{-
*CB> t2SV
Subtracting
Adding
Adding
Adding
40
-}
-- Call-by-need
share :: MonadIO m => m a -> m (m a)
share m = do
r <- liftIO $ newIORef (False,m)
let ac = do
(f,m) <- liftIO $ readIORef r
if f then m
else do -- forget effect (from `app`?) + record it
v <- m
liftIO $ writeIORef r (True,return v)
return v
return ac
data Lazy
-- We reuse most of EDSL (S Name) except for lam
ln :: S Lazy m x -> S Name m x
ln = S . unS
nl :: S Name m x -> S Lazy m x
nl = S . unS
instance MonadIO m => EDSL (S Lazy m) where
int = nl . int
add x y = nl $ add (ln x) (ln y)
sub x y = nl $ sub (ln x) (ln y)
app x y = nl $ app (ln x) (ln y)
-- This is the only difference between CBN and CBNeed
-- lam shares its argument, no matter what
-- This is the definition of CBNeed after all
lam f = S . return $ (\x -> share x >>= unS . f . S)
runLazy :: S Lazy m a -> m (Sem m a)
runLazy = unS
-- We now evaluate the previously written tests t, t1, t2
-- under the new interpretation
-- Here, Lazy is just as efficient as CBV
t0SL :: IO ()
t0SL = runLazy t >>= print
{-
*CB> t0SL
Adding
Adding
40
-}
-- Ditto
t1SL :: IO ()
t1SL = runLazy t1 >>= print
{-
*CB> t1SL
Subtracting
Subtracting
Adding
Adding
Adding
Adding
40
-}
-- Now, Lazy is better than both CBN and CBV: subtraction was not needed,
-- and it was not performed.
-- All other expressions were needed, and evaluated once.
t2SL :: IO ()
t2SL = runLazy t2 >>= print
{-
*CB> t2SL
Adding
Adding
Adding
40
-}
-- Establishing a bijection between the values of the type a and integers, with
-- the operations to retrieve the value given its key,
-- to find the key for the existing value, and to extend the
-- bijection with a new association.
-- The type 'a' of values should at least permit equality comparison;
-- In the present implementation, we require 'a' to be a member
-- of Ord.
-- There are many ways to implement bi-maps, for example, using hash tables,
-- or maps.
-- Our implementation uses Data.Map and Data.IntMap to record
-- both parts of the association.
module CSE_BiMap (
BiMap, empty,
lookup_key,
lookup_val,
insert,
size,
)
where
import qualified Data.Map as M
import qualified Data.IntMap as IM
data BiMap a = BiMap (M.Map a Int) (IM.IntMap a)
-- Find a key for a value
lookup_key :: Ord a => a -> BiMap a -> Maybe Int
lookup_key v (BiMap m _) = M.lookup v m
-- Find a value for a key
lookup_val :: Int -> BiMap a -> a
lookup_val k (BiMap _ m) = m IM.! k
-- Insert the value and return the corresponding key
-- and the new map
-- Alas, Map interface does not have an operation to insert and find the index
-- at the same time (although such an operation is easily possible)
insert :: Ord a => a -> BiMap a -> (Int, BiMap a)
insert v (BiMap m im) = (k, BiMap m' im')
where m' = M.insert v k m
im' = IM.insert k v im
k = IM.size im
empty :: BiMap a
empty = BiMap (M.empty) (IM.empty)
instance Show a => Show (BiMap a) where
show (BiMap _ m) = "BiMap" ++ show (IM.toList m)
size :: BiMap a -> Int
size (BiMap _ m) = IM.size m
{-# LANGUAGE NoMonomorphismRestriction #-}
-- * Implicit sharing in the tagless-final style, or:
-- * seemingly pure hash-consing
-- The imperative details of hash-consing are hidden better in a
-- final-tagless style of the DSL embedding, described next.
module CSE_ExpF (Exp(..),
ExpI(..),
R(..),
N(..),NodeId, DAG(..),
run_expN,
do_bench,
mul,
) where
import Control.Monad.State
import CSE_BiMap
import qualified CSE_ExpI as ExpI (Exp(..), sklansky)
-- * //
-- * Tagless-final EDSL embedding
-- In the tagless-final approach, embedded DSL
-- expressions are built with `constructor functions' such as |constant|,
-- |variable|, |add| rather than the data constructors |Constant|,
-- |Variable|, |Add|. The constructor functions yield a representation
-- for the DSL expression being built. The representation could be a
-- string (for pretty-printing), an integer (for evaluator), etc. Since
-- the same DSL expression may be concretely represented in several ways,
-- the constructor functions are polymorphic, parameterized by the
-- representation |repr|. In other words, the constructor functions are
-- the members of the type class
class Exp repr where
constant :: Int -> repr Int
variable :: String -> repr Int
add :: repr Int -> repr Int -> repr Int
-- The data type Exp from ExpI.hs becomes a type class
-- Our expressions here are of only one type, Int. We could have dropped
-- Int and made 'repr' to be a type variable of the kind *.
-- We keep the 'repr Int' notation nevertheless, for consistency
-- with the tagless final paper, and to allow for extensions (e.g.,
-- the addition of booleans).
-- Sample expressions from ExpI.hs now look as follows
exp_a = add (constant 10) (variable "i1")
exp_b = add exp_a (variable "i2")
-- * like those in ExpI.hs modulo the case of the identifiers:
-- * everything is in lower case.
-- The multiplication example:
-- the only two differences is the case of the identifiers
-- and the type signature
mul :: Exp repr => Int -> repr Int -> repr Int
mul 0 _ = constant 0
mul 1 x = x
mul n x | n `mod` 2 == 0 = mul (n `div` 2) (add x x)
mul n x = add x (mul (n-1) x)
exp_mul4 = mul 4 (variable "i1")
exp_mul8 = mul 8 (variable "i1")
-- * Interpreters for our DSL: instances of Exp
-- The first interpreter interprets a tagless-final expression
-- as ExpI.Exp data type, converting the DSL expressions here into
-- the so-called `initial form' of ExpI.hs
-- This is one way to print the tagless-final expressions, since
-- we have derived a Show instance for ExpI.Exp
newtype ExpI t = ExpI (ExpI.Exp)
-- * Question: why do we need the wrapper?
instance Exp ExpI where
constant = ExpI . ExpI.Constant
variable = ExpI . ExpI.Variable
add (ExpI x) (ExpI y) = ExpI (ExpI.Add x y)
test_shb = case exp_b of ExpI e -> e
-- Add (Add (Constant 10) (Variable "i1")) (Variable "i2")
test_sh4 = case exp_mul4 of ExpI e -> e
{-
Add (Add (Variable "i1") (Variable "i1"))
(Add (Variable "i1") (Variable "i1"))
-}
-- The conversion to ExpI takes the form of an instance
-- of the class Exp that provides the interpretation for the expression
-- primitives, as the values for the domain ExpI.
-- * Another interpreter: the evaluator
-- That interpretation of tagless-final expressions is not
-- the only one possible. We can write an evaluator, interpreting
-- each expression as an element of the domain R
type REnv = [(String,Int)]
newtype R t = R{unR :: REnv -> t} -- A reader Monad, actually
-- that is, an integer in an environment that gives values for
-- free `variables' that may occur in the expression
instance Exp R where
constant x = R (\_ -> x)
variable x = R (\env -> maybe (error $ "no var: " ++ x) id $
lookup x env)
add e1 e2 = R (\env -> unR e1 env + unR e2 env)
test_val4 = unR exp_mul4 [("i1",5)] -- 20
-- * Evaluating sample expressions is good for debugging
-- * Write once (exp_mul4), interpret many times
-- We are using exactly the same exp_mul4 as in
-- test_sh4. We are evaluating it differently. The gist of
-- the final tagless approach is to write an expression once
-- and evaluate it many times.
-- * //
-- ------------------------------------------------------------------------
-- * Detecting implicit sharing (common subexpression elimination)
-- * Goal: detect structurally equal subexpressions and share them,
-- * converting an expression tree into a DAG
-- * Idea: rather than convert an ASTree to ASDag, build the
-- * ASDag to begin with.
-- * Representing a DAG
-- a collection of Nodes identified by NodeIds,
type NodeId = Int
-- We stress: Node is NOT a recursive data structure, so the comparison
-- of Node values takes constant time!
data Node = NConst Int
| NVar String
| NAdd !NodeId !NodeId
deriving (Eq,Ord,Show)
-- we could use several bimaps for different operations (for
-- addition, subtraction, etc).
newtype DAG = DAG (BiMap Node) deriving Show
-- * BiMap -- (partial) bijection a <-> Int
-- The mapping between |Node|s and |NodeId|s is realized
-- through a BiMap interface
-- BiMap a establishes a bijection between the values of the type |a|
-- and integers, with the operations to retrieve the value given its key,
-- to find the key for the existing value, and to extend the bijection
-- with a new association.
-- * //
-- * Bottom-up DAG construction
-- As we construct a node for a subexpression, we check if the DAG already
-- has the equal node. If so, we return its |NodeId|; otherwise, we add the node
-- to the DAG.
-- The computation is stateful (we are using the State monad)
-- Hash-consing proper: insert Node into the DAG if it isn't
-- there already, and return its hash code.
hashcons :: Node -> State DAG NodeId
hashcons e = do
DAG m <- get
case lookup_key e m of
Nothing -> let (k,m') = insert e m
in put (DAG m') >> return k
Just k -> return k
-- * Bottom-up DAG construction: tagless-final helps
-- The bottom-up DAG construction maps well to computing a representation
-- for a tagless-final expression, which is also evaluated bottom-up. The
-- DAG construction can therefore be written as a tagless-final
-- interpreter, an instance of the type class Exp. The interpreter maps
-- a tagless-final expression to the concrete representation
-- * Interpreting Exp as a Node in the current DAG
newtype N t = N{unN :: State DAG NodeId}
instance Exp N where
constant x = N(hashcons $ NConst x)
variable x = N(hashcons $ NVar x)
add e1 e2 = N(do
h1 <- unN e1
h2 <- unN e2
hashcons $ NAdd h1 h2)
-- * The state is hidden behind the tagless-final veneer
-- run the DAG-construction interpreter and the node,
-- as a reference within a DAG.
run_expN :: N t -> (NodeId, DAG)
run_expN (N m) = runState m (DAG empty)
-- We re-interpret exp_mul4 differently this time.
-- The DAG-representation makes the sharing patent
-- A DAG is printed as the list of |(NodeId,Node)| associations. The
-- sharing of the left and right summands below is patent
test_sm4 = run_expN exp_mul4
-- (2,DAG BiMap[(0,NVar "i1"),(1,NAdd 0 0),(2,NAdd 1 1)])
test_sm8 = run_expN exp_mul8
-- (3,DAG BiMap[(0,NVar "i1"),(1,NAdd 0 0),(2,NAdd 1 1),(3,NAdd 2 2)])
-- * We have constructed netlists, topologically sorted
-- a netlist is a low-level representation of a circuit listing the gates and
-- their connections, used in circuit manufacturing. Since our BiMap allocated
-- monotonically increasing NodeIds, the resulting netlist comes out
-- topologically sorted. Therefore, we can straightforwardly generate machine
-- code after the standard register allocation.
-- We retain all the information about exp_mul4. In addition, all
-- sharing is fully explicit. As we can see, the evaluation process finds
-- common subexpressions automatically.
-- * //
-- ------------------------------------------------------------------------
-- * Superficially `effectless' common sub-expression elimination
-- sklansky example by Matthew Naylor, with further credit to
-- Chalmers folk, Mary Sheeran and Emil Axelsson.
-- It is said to be similar to scanl1, but contains more parallelism
-- The function sklansky is defined in ExpI.hs
-- * To remind what sklansky produces:
test_sklansky_o n = ExpI.sklansky addition xs
where
addition x y = "(" ++ x ++ "+" ++ y ++ ")"
xs = Prelude.map (("v"++) . show) [1..n]
-- (v1+v2) is used three times
{-
test_sklansky_o 4
["v1","(v1+v2)","((v1+v2)+v3)","((v1+v2)+(v3+v4))"]
-}
-- (v1+v2) is used seven times
{-
test_sklansky_o 8
["v1","(v1+v2)",
"((v1+v2)+v3)",
"((v1+v2)+(v3+v4))",
"(((v1+v2)+(v3+v4))+v5)",
"(((v1+v2)+(v3+v4))+(v5+v6))",
"(((v1+v2)+(v3+v4))+((v5+v6)+v7))",
"(((v1+v2)+(v3+v4))+((v5+v6)+(v7+v8)))"]
-}
-- * sklansky challenge: sharing across expressions, not within an expression
-- * We re-write in the tagless-final style
-- * scratch that: we use sklansky as it was
-- :t ExpI.sklansky
-- ExpI.sklansky :: (a -> a -> a) -> [a] -> [a]
-- Actually, there is no re-write
-- We use Matthew Naylor's code, in pure Haskell, as it was
-- * //
-- We run it differently though
test_sklansky n = runState sk (DAG empty)
where
sk = sequence (map unN (ExpI.sklansky add xs))
xs = map (variable . show) [1..n]
-- Implicit sharing works: hash code 2 (for v1+v2) is used three times
{-
*ExpF> test_sklansky 4
([0,2,4,7],
DAG BiMap[(0,NVar "1"),(1,NVar "2"),
(2,NAdd 0 1),(3,NVar "3"),
(4,NAdd 2 3),(5,NVar "4"),
(6,NAdd 3 5),(7,NAdd 2 6)])
-}
-- We indeed obtained the set of nodes all pointing within the same DAG
-- The stateful nature was well-hidden till the very end (running)
-- We see the deep sharing: hash code 2, which is v1+v2, is used twice.
-- Then code 7, which is (v1+v2)+(v3+v4), is used 4 times
-- after being computed
{-
*ExpF> test_sklansky 8
([0,2,4,7,9,12,15,19],
DAG BiMap[(0,NVar "1"),(1,NVar "2"),
(2,NAdd 0 1),(3,NVar "3"),
(4,NAdd 2 3),(5,NVar "4"),
(6,NAdd 3 5),(7,NAdd 2 6), -- just as it was for test_sklansky 4
(8,NVar "5"),(9,NAdd 7 8),
(10,NVar "6"),(11,NAdd 8 10),
(12,NAdd 7 11),(13,NVar "7"),
(14,NAdd 11 13),(15,NAdd 7 14),
(16,NVar "8"),(17,NAdd 13 16),
(18,NAdd 11 17),(19,NAdd 7 18)])
-}
-- * //
-- ------------------------------------------------------------------------
-- * Performance is still a problem
-- We have demonstrated the sharing detection technique that represents
-- a DSL program as a DAG, eliminating multiple occurrences of common
-- subexpressions. Alas, to find all these common subexpressions we have
-- to examine the entire expression _tree_, which may take long time for large
-- programs (large circuits).
-- See ExpLet.hs for the exponential speed-up.
-- To force the evaluation
do_bench :: (a,DAG) -> Int
do_bench (_,DAG d) = size d
bench_mul n = do_bench $
run_expN (mul n (variable "i"))
-- It takes effort to find the common subexpressions, just because
-- we have to traverse the whole tree.
bench12 = bench_mul (2^12)
bench13 = bench_mul (2^13)
{-
-- Interpreted code
*ExpF> bench_mul (2^11)
12
(0.06 secs, 4688952 bytes)
*ExpF> bench_mul (2^12)
13
(0.09 secs, 5344760 bytes)
*ExpF> bench_mul (2^13)
14
(0.20 secs, 9791092 bytes)
-- compiled with -O2
Prelude ExpF> bench_mul (2^20)
21
(0.44 secs, 80752592 bytes)
Prelude ExpF> bench_mul (2^21)
22
(0.87 secs, 160434744 bytes)
-}
{-
*ExpF> test_sklansky 128
([0,2,4,7,9,12,15,19,21, ... 560,567,575], ...)
(0.37 secs, 13650184 bytes)
-}
-- Not so bad... The implicit sharing detection is quite fast
bench_skl n = do_bench $ test_sklansky n
{-
-- Interpreted code
*ExpF> bench_skl 128
576
(0.33 secs, 10854396 bytes)
*ExpF> bench_skl 256
1280
(1.79 secs, 38952968 bytes)
-- compiled with -O2
Prelude ExpF> bench_skl 128
576
(0.02 secs, 2163436 bytes)
Prelude ExpF> bench_skl 256
1280
(0.08 secs, 4333380 bytes)
Prelude ExpF> bench_skl 512
2816
(0.37 secs, 13908376 bytes)
-}
main = do
print test_shb
print test_sh4
print test_val4
print test_sm4
print test_sm8
print $ test_sklansky_o 4
print $ test_sklansky 4
-- Implicit sharing (common sub-expression elimination) in
-- an embedded DSL for arithmetic expressions
-- The DSL is embedded as a data type
--
-- This file is based on the message by Tom Hawkins
-- posted on Haskell-Cafe on Feb 8, 2008
-- http://www.haskell.org/pipermail/haskell-cafe/2008-February/039339.html
-- We fill the missing details so to run the example and exhibit
-- the problem.
module CSE_ExpI (Exp(..), sklansky, bench_mul, bench_skl, main) where
import CSE_BiMap
-- Tom Hawkins:
-- My DSLs invariably define a datatype to capture expressions; something
-- like this:
-- * //
-- * A sub-DSL of arithmetic expressions, embedded as a datatype
data Exp
= Add Exp Exp -- one can add Sub, Neg, etc.
| Variable String
| Constant Int
deriving (Eq, Ord, Show)
-- Sample expressions
exp_a = Add (Constant 10) (Variable "i1")
exp_b = Add exp_a (Variable "i2")
-- We can indeed generate assembly or Verilog from Exp after
-- a topological sorting
-- But first we have to detect identical sub-expressions and
-- share then, converting an ASTree to a DAG
-- * Common sub-expressions are abundant and easy to create
-- * Two running examples, absracted from real life
-- * Example 1: multiplication by a known integer constant
-- Our DSL does not have the multiplication
-- operation (many 8-bit CPUs don't have it). We can generate
-- code for constant multiplication by repeated addition.
mul :: Int -> Exp -> Exp
mul 0 _ = Constant 0
mul 1 x = x
mul n x | n `mod` 2 == 0 = mul (n `div` 2) (Add x x)
mul n x = Add x (mul (n-1) x)
-- Examples of duplication
exp_mul4 = mul 4 (Variable "i1")
{-
Add (Add (Variable "i1") (Variable "i1")) (Add (Variable "i1") (Variable "i1"))
-}
exp_mul8 = mul 8 (Variable "i1")
-- Twice as much duplication
{-
Add
(Add (Add (Variable "i1") (Variable "i1")) (Add (Variable "i1") (Variable "i1")))
(Add (Add (Variable "i1") (Variable "i1")) (Add (Variable "i1") (Variable "i1")))
-}
-- * Question: why do we see all this duplication?
-- * Wasn't the multiplication algorithm supposed to be optimal?
-- When we apply something like exp_mul8 to an integer 5, do
-- we keep repeating 5+5 four times in a row?
-- * //
-- The other running example:
-- * sklansky by Matthew Naylor, with further credit to
-- * Chalmers folk, Mary Sheeran and Emil Axelsson.
-- * sklansky add [e1,e2, ... en]
-- * ===> [e1, e1+e2, ... e1+e2+...en]
-- It produces a running sum, with more parallelism and smaller latency.
-- It is somewhat in the spirit of the optimal multiplication mul above
-- The original code, from Matthew Naylor's message:
-- http://www.haskell.org/pipermail/haskell-cafe/2008-February/039671.html
sklansky :: (a -> a -> a) -> [a] -> [a]
sklansky f [] = []
sklansky f [x] = [x]
sklansky f xs = left' ++ [ f (last left') r | r <- right' ]
where
(left, right) = splitAt (length xs `div` 2) xs
left' = sklansky f left
right' = sklansky f right
test_sklansky_i n = sklansky Add xs
where
xs = Prelude.map (Variable . show) [1..n]
-- * //
-- * test_sklansky_i 4
-- (v1+v2) is repeated three times
{-
[Variable "1",
Add (Variable "1") (Variable "2"),
Add (Add (Variable "1") (Variable "2")) (Variable "3"),
Add (Add (Variable "1") (Variable "2")) (Add (Variable "3") (Variable "4"))]
-}
-- * Common sub-expressions appear across independent expressions
{-
Tom Hawkins wrote (Haskell-Cafe on Feb 8, 2008)
The problem comes when I want to generate efficient code from an
Exp (ie. to C or some other target language). The method I use
involves converting the tree of subexpressions into an acyclic graphic
to eliminate common subexpressions. The nodes are then topologically
ordered and assigned an instruction, or statement for each node....
The process of converting an expression tree to a graph uses either Eq
or Ord (either derived or a custom instance) to search and build a set
of unique nodes to be ordered for execution. In this case "a", then
"b", then "c". The problem is expressions often have shared,
equivalent subnodes, which dramatically grows the size of the tree.
As these trees grow in size, the equality comparison in graph
construction quickly becomes the bottleneck for DSL compilation.
What's worse, the phase transition from tractable to intractable is
very sharp. In one of my DSL programs, I made a seemingly small
change, and compilation time went from milliseconds to
not-in-a-million-years.
-}
-- Tom Hawkins was about to give up on Haskell
-- His message was entitled: ``I love purity, but it's killing me''
-- *
-- Tom Hawkins's message did not show the algorithm for converting
-- a tree of sub-expressions into a DAG.
-- The message does say that the algorithm uses Eq or Ord instances
-- of Exp
-- Probably Tom Hawkins adds all sub-expressions of an expression
-- into a DAG, ensuring that duplicates are not added
newtype DAG = DAG (BiMap Exp) deriving Show
-- Enter all sub-expressions of an expression into a DAG,
-- removing the duplicates
exp_to_dag :: Exp -> DAG -> DAG
exp_to_dag e d@(DAG m) | Just _ <- lookup_key e m = d
exp_to_dag e@(Add e1 e2) d =
let d1 = exp_to_dag e1 d
d2 = exp_to_dag e2 d1
DAG m2 = d2
(k,m') = insert e m2
in DAG m'
-- No sub-expressions
exp_to_dag e d = d
-- There are only two expressions two compute
dag_4 = exp_to_dag exp_mul4 (DAG empty)
{-
DAG BiMap[(0,Add (Variable "i1") (Variable "i1")),
(1,Add (Add (Variable "i1") (Variable "i1"))
(Add (Variable "i1") (Variable "i1")))]
-}
dag_8 = exp_to_dag exp_mul8 (DAG empty)
bench_mul n =
case exp_to_dag (mul n (Variable "i")) (DAG empty) of DAG d -> size d
bench20 = bench_mul (2^20)
bench21 = bench_mul (2^21)
-- The size of the DAG increased by 1 node; yet it took twice as
-- much time...
{-
-- Interpreted code
*ExpI> bench20
20
(4.51 secs, 1585728 bytes)
*ExpI> bench21
21
(8.76 secs, 530728 bytes)
-- compiled with -O2
Prelude ExpI> bench_mul (2^20)
20
(0.06 secs, 1053460 bytes)
Prelude ExpI> bench_mul (2^21)
21
(0.11 secs, 1062568 bytes)
Prelude ExpI> bench_mul (2^22)
22
(0.22 secs, 1600568 bytes)
Prelude ExpI> bench_mul (2^23)
23
(0.43 secs, 1068432 bytes)
-}
test_skl_dag = foldl (flip exp_to_dag) (DAG empty) (test_sklansky_i 4)
bench_skl n =
case foldl (flip exp_to_dag) (DAG empty) (test_sklansky_i n) of
DAG d -> size d
{-
-- Interpreted code
*ExpI> bench_skl 128
448
(0.32 secs, 1637380 bytes)
*ExpI> bench_skl 256
1024
(1.58 secs, 2234148 bytes)
-- compiled with -O2
Prelude ExpI> bench_skl 128
448
(0.03 secs, 1053144 bytes)
Prelude ExpI> bench_skl 256
1024
(0.12 secs, 2297932 bytes)
Prelude ExpI> bench_skl 512
2304
(0.56 secs, 3419484 bytes)
-}
main = do
print exp_b
print exp_mul4
print exp_mul8
print $ test_sklansky_i 4
print dag_8
print $ bench_skl 128
{-# LANGUAGE NoMonomorphismRestriction #-}
-- * Adding explicit sharing to our DSL
module ExpLet where
import ExpF
-- * Why do we even need explicit sharing
-- * Help the human reader (cf. Principia Mathematica)
-- * Can't we just use Haskell's let?
-- Haskell's lets is certainly explicit and so clearly
-- helps the reader. Does it helps CSE eliminators?
-- Does it help tagless interpreters?
-- The multiplication-by-4 example written explicitly
-- * exp_mul4 = mul 4 (variable "i1")
exp_mul4 =
let x = variable "i1" in
let y = add x x in
add y y
-- * Question: Does Haskell guarantee sharing?
-- * Question: can we rely on memoization?
tree_mul4 = case exp_mul4 of ExpI t -> t
{-
Add (Add (Variable "i1") (Variable "i1"))
(Add (Variable "i1") (Variable "i1"))
-}
dag_mul4 = run_expN exp_mul4
-- (2,DAG BiMap[(0,NVar "i1"),(1,NAdd 0 0),(2,NAdd 1 1)])
-- * //
-- * Haskell's let does work, sometimes!
-- The question about Haskell let is non-trivial because
-- it sometimes works.
-- To demonstrate how Haskell let helps some interpreters
-- but not the others, we define two tagless-final
-- interpreters.
-- One computes the size of an expression (the number of the
-- constructors)
-- * Haskell's let does help some DSL interpreters
-- * The size interpreter
newtype Size t = Size Int
instance Exp Size where
constant _ = Size 1
variable _ = Size 1
add (Size x) (Size y) = Size (x+y+1)
Size size_mul4 = exp_mul4 -- 7
Size size_large = mul (2^30) (variable "i1")
-- 2147483647
-- Nearly instantaneous: only 0.01 secs
-- * The print interpreter
-- Another interpreter will print the expression
newtype Print t = Print (IO ())
instance Exp Print where
constant = Print . putStr . show
variable = Print . putStr
add (Print x) (Print y) = Print (x >> putStr " + " >> y)
-- This interpreter will take a long time to print
-- (mul (2^30) (variable "i1")), _even_ if we redirect
-- the output to /dev/null
Print print_mul4 = exp_mul4
-- i1 + i1 + i1 + i1
-- We see the duplication in the output, thus the duplication
-- of the effort to print.
-- * Print pm30 = mul (2^30) (variable "i1")
-- Here, running will take a long time, even if we print to /dev/null
-- So, here Haskell's let did not help!
-- We thus need a sharing form in the DSL itself
-- * //
-- * Adding a new form let_ to our DSL
-- We extend the DSL language with a new expression form, let_,
-- to indicate the explicit sharing
-- Tagless-final interpreters are easily extensible
class ExpLet repr where
let_ :: repr a -> (repr a -> repr b) -> repr b
-- Re-written exp_mul4 using the explicit sharing
exp_mul4' =
let_ (variable "i1") (\x ->
let_ (add x x) (\y->
add y y))
-- * Extending the existing interpreters with let_
-- As one might expect, let_ is basically the reverse application
instance ExpLet R where
let_ x f = f x
-- As expected, exp_mul4 with and without explicit sharing evaluate
-- to the same results. After all, sharing is an optimization,
-- it should not affect the results of DSL programs
val_mul4 = unR exp_mul4 [("i1",5)] -- 20
val_mul4' = unR exp_mul4' [("i1",5)] -- 20
-- * How to see sharing
-- We would like to `see' the sharing.
-- We have to define a show-like function then, an interpreter
-- of tagless-final expressions as strings.
-- Actually, we need a bit more than strings: to show sharing
-- (as let-expressions) we need a way to generate `pointers',
-- or local variable names.
type LetVarCount = Int -- counter for generating let-var-names
newtype S t = S{unS :: LetVarCount -> String}
instance Exp S where
constant = S . const . show
variable = S . const
add e1 e2 = S(\c -> unS e1 c ++ " + " ++ unS e2 c)
instance ExpLet S where
let_ e f = S(\c -> let vname = "v" ++ show c
in unwords ["let",vname,"=",unS e c,
"in",unS (f (S (const vname))) (succ c)])
run_expS :: S t -> String
run_expS (S m) = m 0
sh_mul4 = run_expS exp_mul4
-- "i1 + i1 + i1 + i1"
sh_mul4' = run_expS exp_mul4'
-- "let v0 = i1 in let v1 = v0 + v0 in v1 + v1"
-- * //
-- * Payoff: extending the N interpreter to handle explicit sharing
-- Sharing a computation means performing a computation and sharing
-- result. The code below says exactly that.
instance ExpLet N where
let_ e f = N(do
x <- unN e
unN $ f (N (return x)))
-- Now, we can evaluate exp_mul4' as a DAG
-- The result is the same as dag_mul4.
dag_mul4' = run_expN exp_mul4'
-- (2,DAG BiMap[(0,NVar "i1"),(1,NAdd 0 0),(2,NAdd 1 1)])
-- * Benchmarking
-- To really see the difference, we need bigger multiplications
-- We don't want to write the expressions like |exp_mul4'| by hand,
-- we want to generate them.
-- We rewrite the |mul| generator to use the explicit sharing.
-- The difference from |mul| is on the last-but-one line.
mul' :: (ExpLet repr, Exp repr) => Int -> repr Int -> repr Int
mul' 0 _ = constant 0
mul' 1 x = x
mul' n x | n `mod` 2 == 0 = let_ x (\x' -> mul' (n `div` 2) (add x' x'))
mul' n x = add x (mul' (n-1) x)
-- Is there another way to write mul'?
-- mul' n x | n `mod` 2 == 0 = let_ (add x x) (\y -> mul' (n `div` 2) y)
-- "let v0 = i1 + i1 in let v1 = v0 + v0 in v1"
sh_mul4'' = run_expS (mul' 4 (variable "i1"))
-- "let v0 = i1 in let v1 = v0 + v0 in v1 + v1"
-- * //
-- * Benchmarking. Compare with bench12 and bench13 from ExpF.hs
bench_mul' n = do_bench $
run_expN (mul' n (variable "i"))
bench12 = bench_mul' (2^12)
bench13 = bench_mul' (2^13)
-- The exponential speed-up is apparent
{-
*ExpLet> bench12
13
(0.02 secs, 3171764 bytes)
*ExpLet> bench13
14
(0.01 secs, 534548 bytes)
-}
-- The mul (2^30) is the expression tree with 2^30 leaves!
{-
*ExpLet> bench_mul' (2^20)
21
(0.01 secs, 1585752 bytes)
*ExpLet> bench_mul' (2^30)
31
(0.01 secs, 1055932 bytes)
-}
-- * Some sharing is left to discover:
{-
*ExpLet> run_expS (mul' 15 (variable "i"))
"i + let v0 = i in v0 + v0 +
let v1 = v0 + v0 in v1 + v1 + let v2 = v1 + v1 in v2 + v2"
-}
-- It is found and eliminated
{-
*ExpLet> run_expN (mul' 15 (variable "i"))
(6,DAG BiMap[
(0,NVar "i"),
(1,NAdd 0 0),
(2,NAdd 1 1),
(3,NAdd 2 2),
(4,NAdd 2 3),
(5,NAdd 1 4),
(6,NAdd 0 5)])
-}
-- rather quickly:
{-
*ExpLet> bench_mul' (2^30-1)
59
(0.01 secs, 1078232 bytes)
-}
main = do
print tree_mul4
print dag_mul4
print val_mul4
print val_mul4'
print sh_mul4'
print sh_mul4''
print bench13
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-}
{-# LANGUAGE RankNTypes #-}
module ExpSYMFinal where
import Control.Monad
-- typeclass representing final encoding of the Exp dsl
-- 'repr' is the semantic domain
class ExpSYM repr where
lit :: Int -> repr
neg :: repr -> repr
add :: repr -> repr -> repr
-- interpret Exp AST in the Int semantic domain
instance ExpSYM Int where
lit n = n
neg e = - e
add e1 e2 = e1 + e2
-- run the Int interpretor
eval :: Int -> Int
eval = id
-- interpret Exp AST in the String semantic domain
instance ExpSYM String where
lit n = show n
neg e = "(-" ++ e ++ ")"
add e1 e2 = "(" ++ e1 ++ " + " ++ e2 ++ ")"
-- run the String interpretor
view :: String -> String
view = id
tf1 :: forall repr. ExpSYM repr => repr
tf1 = add (lit 8) (neg (add (lit 1) (lit 2)))
-- add multiplication operation to the Exp dsl
class MulSYM repr where
mul :: repr -> repr -> repr
-- mul for Int domain
instance MulSYM Int where
mul e1 e2 = e1 * e2
-- mul for String domain
instance MulSYM String where
mul e1 e2 = "(" ++ e1 ++ " * " ++ e2 ++ ")"
tfm1 :: forall repr. (MulSYM repr, ExpSYM repr) => repr
tfm1 = add (lit 7) (neg (mul (lit 1) (lit 2)))
-- serialization problem
data Tree = Leaf String
| Node String [Tree]
deriving (Eq, Read, Show)
instance ExpSYM Tree where
lit n = Node "Lit" [Leaf $ show n]
neg e = Node "Neg" [e]
add e1 e2 = Node "Add" [e1,e2]
-- run the Tree interpretor
toTree :: Tree -> Tree
toTree = id
tf1_tree :: Tree
tf1_tree = toTree tf1 -- with the type sig the call to toTree is optional
type ErrMsg = String
safeRead :: Read a => String -> Either ErrMsg a
safeRead s = case reads s of
[(x, "")] -> Right x
_ -> Left $ "Read error " ++ s
-- deserializer, closed recursion style
fromTree :: (ExpSYM repr) => Tree -> Either ErrMsg repr
fromTree (Node "Lit" [Leaf n]) = liftM lit $ safeRead n
fromTree (Node "Neg" [e]) = liftM neg $ fromTree e
fromTree (Node "Add" [e1, e2]) = liftM2 add (fromTree e1) (fromTree e2)
fromTree e = Left $ "Invalid tree: " ++ show e
tf1_eval :: IO ()
tf1_eval = case fromTree tf1_tree of
Left e -> putStrLn $ "Error: " ++ e
Right x -> do
print $ eval x
-- below doesn't work!
-- print $ view x
-- duplicating interpreter, creates monomorphic copies
instance (ExpSYM repr, ExpSYM repr') => ExpSYM (repr,repr') where
lit x = (lit x, lit x)
neg (e1,e2) = (neg e1, neg e2)
add (e11,e12) (e21, e22) = (add e11 e21, add e12 e22)
-- run the duplicating interpreter
duplicate :: (ExpSYM repr, ExpSYM repr') => (repr,repr') -> (repr,repr')
duplicate = id
-- error reporting for deserializer
check_consume f (Left e) = putStrLn $ "Error: " ++ e
check_consume f (Right x) = f x
-- dup_consume makes no restrictions on what the type of x2 is
dup_consume ev x = print (ev x1) >> return x2
where (x1,x2) = duplicate x
-- this works, because we basically instantiate |dup_consume| + 1 x's (three in
-- this case, hence the name) and then use them appropriately.
-- TODO: make them use a common copy?
thrice x = dup_consume eval x >>= dup_consume view >>= print . toTree
tf1_int3 = check_consume thrice . fromTree $ tf1_tree
-- deserializer, open recursion style. replace all self-recursive calls to
-- applications of the explicit reference to the *-version of itself (this will
-- be filled in later using Y-combinator)
-- this is needed for the deserializer to be extensible
fromTree' :: (ExpSYM repr)
=> (Tree -> Either ErrMsg repr)
-> (Tree -> Either ErrMsg repr)
fromTree' self (Node "Lit" [Leaf n]) = liftM lit $ safeRead n
fromTree' self (Node "Neg" [e]) = liftM neg $ self e
fromTree' self (Node "Add" [e1, e2]) = liftM2 add (self e1) (self e2)
fromTree' self e = Left $ "Invalid tree: " ++ show e
-- now tie the not explicitly using Y-combinator
-- if f takes a first argument which is the *-version of itself then fill that
-- argument's place
fix f = f (fix f)
-- now fromTree' = fromTree (*-version of fromTree)
fromTree'' = fix fromTree'
tf1E_int3 = check_consume thrice . fromTree'' $ tf1_tree
-- this results in error as expected
tfxE_int3 = check_consume thrice . fromTree'' $
Node "Lit" [Leaf "1", Leaf "2"]
-- let's extend the deserializer now to include multiplication
-- 1] extend wire format for multiplication
instance MulSYM Tree where
mul e1 e2 = Node "Mul" [e1,e2]
-- 2] extend duplicator for multiplication
instance (MulSYM repr, MulSYM repr') => MulSYM (repr, repr') where
mul (e11, e12) (e21, e22) = (mul e11 e21, mul e12 e22)
-- 3] extend serializer
fromTreeExt self (Node "Mul" [e1,e2]) = liftM2 mul (self e1) (self e2)
fromTreeExt self e = fromTree' self e
fromTreeExt' = fix fromTreeExt
-- use same 'check_consume' for error reporting and previously defined
-- computation (i.e., 'thrice') with new interpreter
tf1'_int3 = check_consume thrice . fromTreeExt' $ tf1_tree
tfm1_int3 = check_consume thrice . fromTreeExt' $ toTree tfm1
-- make the context on which the operation depends. in this case it depends on
-- the sign
data Ctx = Pos | Neg
-- interpretor transformer
instance ExpSYM repr => ExpSYM (Ctx -> repr) where
-- define how operations in current semantics (ExpSYM) interact with
-- transformation
lit n Pos = lit n
lit n Neg = neg (lit n)
add e1 e2 ctx = add (e1 ctx) (e2 ctx)
-- add definition for the transformation. in this case the transformation adds
-- contextual information where there previously was none. addition is also a
-- homomorphism wrt push_neg
neg e Pos = e Neg
neg e Neg = e Pos
-- the interpretor for pushing negation down. this needs to supply the base
-- context
push_neg :: forall t. (Ctx -> t) -> t
push_neg e = e Pos
-- extend the transformer to include multiplication
instance MulSYM repr => MulSYM (Ctx -> repr) where
-- it's clear that multiplication is not a homomorphism with respect to
-- negation. we pattern match on the context
mul e1 e2 Pos = mul (e1 Pos) (e2 Pos)
mul e1 e2 Neg = mul (e1 Pos) (e2 Neg)
main :: IO ()
main = tf1_eval
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment