Last active
August 29, 2015 14:06
-
-
Save suhailshergill/94ad1260fcb69598c01e to your computer and use it in GitHub Desktop.
haskell: TTFI
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- The illustration of the Boehm-Berarducci encoding | |
-- This file is the baseline: using ordinary algebraic | |
-- data type and operations of constructions, deconstructions | |
-- and transformation. | |
-- We use the running example of the Exp data type from | |
-- tagless-final lectures | |
module BB_ADT where | |
-- Sample data type of expressions | |
data Exp = Lit Int | |
| Neg Exp | |
| Add Exp Exp | |
-- Constructing a sample expression | |
ti1 = Add (Lit 8) (Neg (Add (Lit 1) (Lit 2))) | |
-- A sample consumer | |
-- It is structurally recursive | |
view:: Exp -> String | |
view (Lit n) = show n | |
view (Neg e) = "(-" ++ view e ++ ")" | |
view (Add e1 e2) = "(" ++ view e1 ++ " + " ++ view e2 ++ ")" | |
ti1_view = view ti1 | |
-- "(8 + (-(1 + 2)))" | |
-- Transformer | |
-- * Pushing the negation down | |
-- Previously, expressions were constructed according to this grammar: | |
-- * General grammar of expressions | |
-- * e ::= int | neg e | add e e | |
-- * | |
-- * Restricted grammar: | |
-- * e ::= factor | add e e | |
-- * factor ::= int | neg int | |
-- Now, only integer literals can be negated, and only once. | |
-- This function is NOT structurally recursive | |
push_neg :: Exp -> Exp | |
push_neg e@Lit{} = e | |
push_neg e@(Neg (Lit _)) = e | |
push_neg (Neg (Neg e)) = push_neg e | |
push_neg (Neg (Add e1 e2)) = Add (push_neg (Neg e1)) (push_neg (Neg e2)) | |
push_neg (Add e1 e2) = Add (push_neg e1) (push_neg e2) | |
-- A few sample transformations and their results | |
ti1_norm = push_neg ti1 | |
ti1_norm_view = view ti1_norm | |
-- "(8 + ((-1) + (-2)))" | |
-- Add an extra negation | |
ti1n_norm_view = view (push_neg (Neg ti1)) | |
-- "((-8) + (1 + 2))" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE GADTs #-} | |
{-# LANGUAGE RankNTypes #-} | |
{-# LANGUAGE FlexibleInstances #-} | |
{-# LANGUAGE InstanceSigs #-} | |
{-# LANGUAGE KindSignatures #-} | |
{-# LANGUAGE TypeFamilies #-} | |
{-# LANGUAGE FlexibleContexts #-} | |
{-# LANGUAGE NoMonomorphismRestriction #-} | |
-- {-# LANGUAGE ImpredicativeTypes #-} | |
{-# LANGUAGE StandaloneDeriving #-} | |
{-# LANGUAGE TypeOperators #-} | |
-- {-# LANGUAGE ScopedTypeVariables #-} | |
{-# LANGUAGE NoImplicitPrelude #-} | |
-- {-# LANGUAGE OverlappingInstances #-} | |
module BB_ExpSYM where | |
import Control.Monad.IO.Class | |
import Control.Monad.Identity | |
import Control.Monad.Free | |
import Control.Monad.State | |
import Control.Monad.Writer | |
import Prelude ((.), ($), ($!), Bool(..), (&&), String, (++), Show(..)) | |
import Control.Applicative | |
import qualified Prelude as P | |
-------------------------------------------------------------------------------- | |
-- let's start with final semantics parameterizing on evaluation strategy | |
class LamSym repr where | |
lam :: (repr a -> repr b) -> repr (a -> b) | |
app :: repr (a -> b) -> repr a -> repr b | |
-- A convenient abbreviation | |
let_ :: LamSym repr => repr a -> (repr a -> repr b) -> repr b | |
let_ x y = (lam y) `app` x | |
app3 :: LamSym repr | |
=> repr (a -> b -> c) | |
-> repr a -> repr b -> repr c | |
app3 x = app . (app x) | |
lam3 :: LamSym repr | |
=> (repr a -> repr b -> repr c) | |
-> repr (a -> b -> c) | |
lam3 x = lam (lam . x) | |
data a :-> b | |
infixr 5 :-> | |
newtype BoolBB = BoolBB { | |
unBoolBB :: forall a. (a -> a -> a) | |
} | |
class BoolSym repr where | |
bool :: Bool -> repr BoolBB | |
not :: repr BoolBB -> repr BoolBB | |
true :: repr BoolBB | |
false :: repr BoolBB | |
and :: repr BoolBB -> repr BoolBB -> repr BoolBB | |
type family Sem (m :: * -> *) a :: * | |
type instance Sem m BoolBB = Bool | |
type instance Sem m (a -> b) = m (Sem m a) -> m (Sem m b) | |
-- 'S' isn't really a monad, but it is up to Sem interpretation. so we can | |
-- define a Monad instance which simply applies 'unS' and 'S' as needed. hmm | |
-- it's not as straightforward because we need some way to convert a type 'a' | |
-- into 'Sem m a', whatever that may mean. | |
newtype S l m a = S { unS :: m (Sem m a) } | |
newtype BoolSem a = BoolSem { unBoolSem :: Bool } | |
instance BoolSym BoolSem where | |
bool = BoolSem | |
true = bool True | |
false = bool False | |
not = BoolSem . P.not . unBoolSem | |
and x y = BoolSem $ (unBoolSem x) && (unBoolSem y) | |
-- let's use BoolSem semantics for (S l (State String)) | |
withBoolSem :: (Monad m) => BoolSem BoolBB -> m (Sem m BoolBB) | |
withBoolSem = return . unBoolSem | |
-- the reason we need to give below definition in terms of 'S' is because that | |
-- is what ties 'repr BoolBB' to 'Bool'. if we had another way of connecting the | |
-- two, then we could possibly define BoolSym in terms of (Monad repr). so one | |
-- way of connecting the two would be using the instance constraint, not sure if | |
-- this works however | |
instance BoolSym (S l (State String)) where | |
-- treating 'bool' as compile-time 'constant', so no logging | |
bool = S . withBoolSem . bool | |
true = S $ do | |
modify (++ "True" ++ "; ") | |
withBoolSem true | |
false = S $ do | |
modify (++ "False" ++ "; ") | |
withBoolSem false | |
not x = S $ do | |
arg <- unS x | |
modify (++ "(not " ++ show arg ++ "); ") | |
withBoolSem . not . BoolSem $ arg | |
and x y = S $ do | |
a <- unS x | |
b <- unS y | |
modify (++ "(and " ++ show a ++ " " ++ show b ++ "); ") | |
withBoolSem $ (BoolSem a) `and` (BoolSem b) | |
data Name | |
instance LamSym (S Name (State String)) where | |
-- lam :: (S a -> S b) -> S (a -> b) | |
-- lam f = S . return $ (unS . f . S) | |
lam f = S $ do | |
return $ \x -> unS(f(S x)) | |
-- app :: S (a -> b) -> S a -> S b | |
-- app x y = S $ unS x >>= ($ (unS y)) | |
app x y = S $ do | |
a <- unS x | |
a (unS y) | |
runName :: S Name m a -> m (Sem m a) | |
runName = unS | |
viewName x = (runState . runName) x "Trace :: " | |
data Value | |
-- We reuse most of EDSL (S Name) except for lam | |
vn :: S Value m x -> S Name m x | |
vn = S . unS | |
nv :: S Name m x -> S Value m x | |
nv = S . unS | |
instance LamSym (S Value (State String)) where | |
-- lam :: S a -> S b -> S (a -> b) | |
-- lam f = S . return $ (\x -> x >>= unS . f . S . return) | |
-- S (a -> b) = S{ m(Sem m (a -> b)) } | |
-- Sem m (a -> b) = m(Sem m a) -> m(Sem m b) | |
-- S (a -> b) = S{ m( m(Sem m a) -> m(Sem m b) ) } | |
-- S (B -> (B -> (B -> B))) | |
-- S{ m( m(Sem m B) -> m(Sem m (B -> (B -> B))) ) } | |
-- S{ m( m(Sem m B) -> m( m(Sem m B) -> m(Sem m (B -> B)) ) ) } | |
-- S{ m( m(B) -> m( m(B) -> m( m(B) -> m(B) ) ) ) } | |
lam f = S $ do | |
return $ \x -> do | |
y <- x | |
unS(f(S(return y))) | |
-- app :: S (a -> b) -> S a -> S b | |
app x y = nv $ app (vn x) (vn y) | |
runValue :: S Value m a -> m (Sem m a) | |
runValue = unS | |
viewValue x = (runState . runValue) x "Trace :: " | |
-- so this is just plain wrong. it works, but evaluation hasn't been | |
-- parameterized | |
-- t = (and2 (not2 true2) (not2 true2)) | |
-- | |
-- this doesn't work because we're passing in arguments twice | |
-- explicitly. basically this needs a CSE pass, which can be done in final style | |
-- by building sharing DAG from bottom up during interpretation | |
t0 = lam (lam . and) | |
`app` (lam not `app` true) | |
`app` (lam not `app` true) | |
t1 = lam (\x -> let_ (not x) | |
$ \y -> y `and` y) `app` true | |
t2 = lam (\_ -> lam (\x -> let_ (not x) | |
$ \y -> y `and` y)) | |
`app` false | |
`app` true |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE Rank2Types #-} | |
-- The illustration of the Boehm-Berarducci encoding | |
-- We re-write an Algebraic Data Type example in BB_ADT.hs | |
-- using Boehm-Berarducci encoding to represent the | |
-- data type with lambda-terms. | |
-- Boehm-Berarducci encoding represents an inductive data type | |
-- as a non-recursive but higher-rank data type | |
module BB_LAM where | |
import BB_ADT (Exp(..)) | |
-- We define the Boehm-Berarducci encoding of the data type | |
-- Exp from BB_ADT.hs in two steps. | |
-- First we clarify the signature of the Exp algebra: the | |
-- types of the algebra constructors. We represent the signature | |
-- as a Haskell record. It is NOT recursive. | |
data ExpDict a = ExpDict { | |
dlit :: Int -> a | |
, dneg :: a -> a | |
, dadd :: a -> a -> a | |
} | |
-- We can define (a version of) Boehm-Berarducci encoding as | |
type ExpBB1 = forall a. (ExpDict a -> a) | |
-- To use only applications and abstractions, we curry the record | |
-- argument ExpDict a. | |
-- The wrapper newtype is needed for the first-class polymorphism | |
-- (actually we can go some way without newtype wrapper but eventually | |
-- GHC becomes confused. After all, the source language of GHC is not | |
-- system F.) | |
newtype ExpBB | |
= ExpBB { | |
unExpBB :: forall a. | |
(Int -> a) | |
-> (a -> a) | |
-> (a -> a -> a) | |
-> a | |
} | |
-- A data type declaration defines type, constructors, deconstructors | |
-- and the induction principle (fold). We already have a new type ExpBB, | |
-- which represents fold, as will become clear soon. We need constructors: | |
lit :: Int -> ExpBB | |
lit x = ExpBB $ \dlit dneg dadd -> dlit x | |
neg :: ExpBB -> ExpBB | |
neg e = ExpBB $ \dlit dneg dadd -> dneg (unExpBB e dlit dneg dadd) | |
add :: ExpBB -> ExpBB -> ExpBB | |
add e1 e2 = ExpBB $ \dlit dneg dadd -> | |
dadd (unExpBB e1 dlit dneg dadd) (unExpBB e2 dlit dneg dadd) | |
-- A sample expression: our first running example | |
tbb1 = add (lit 8) (neg (add (lit 1) (lit 2))) | |
-- A sample consumer | |
-- viewBB: can be implemented simply via the induction principle | |
-- It is not recursive! | |
viewBB:: ExpBB -> String | |
viewBB e = unExpBB e dlit dneg dadd | |
where | |
dlit n = show n | |
dneg e = "(-" ++ e ++ ")" | |
dadd e1 e2 = "(" ++ e1 ++ " + " ++ e2 ++ ")" | |
tbb1v = viewBB tbb1 | |
-- "(8 + (-(1 + 2)))" | |
-- How to define a deconstructor? | |
-- To give an intuition, we define the following functions roll and unroll. | |
-- The names meant to invoke terms witnessing the isomorphism between an | |
-- iso-recursive type and its one-step unrolling. | |
-- The deconstructor will use a form of these functions. | |
{- The following shows that we can replace type-level induction | |
(in BB_ADT.Exp) with a term-level induction. | |
The following definitions of roll and unroll are actually present | |
in Boehm-Berarducci's paper -- but in a very indirect way. | |
The crucial part is Defn 7.1: sort of self-interpreter: v cons nil ~= v | |
Informally, it shows that roll . unroll should be equivalent to the | |
identity. The equivalence relation (~=) is strictly speaking not | |
the equality; in modern terms, (~=) is contextual equivalence. | |
-} | |
-- First we re-write the signature of the Exp algebra, ExpDic, | |
-- in the form of a strictly-positive functor | |
data ExpF a = FLit Int | |
| FNeg a | |
| FAdd a a | |
instance Functor ExpF where | |
fmap f (FLit n) = FLit n | |
fmap f (FNeg e) = FNeg (f e) | |
fmap f (FAdd e1 e2) = FAdd (f e1) (f e2) | |
roll :: ExpF ExpBB -> ExpBB | |
roll (FLit n) = lit n | |
roll (FNeg e) = neg e | |
roll (FAdd e1 e2) = add e1 e2 | |
-- The occurrence of roll below is the source of major inefficiency | |
-- as we do repeated pattern-matching. | |
unroll :: ExpBB -> ExpF ExpBB | |
unroll e = unExpBB e dlit dneg dadd | |
where | |
dlit :: Int -> ExpF ExpBB | |
dlit n = FLit n | |
dneg :: ExpF ExpBB -> ExpF ExpBB | |
dneg e = FNeg (roll e) | |
dadd :: ExpF ExpBB -> ExpF ExpBB -> ExpF ExpBB | |
dadd e1 e2 = FAdd (roll e1) (roll e2) | |
-- Compare with the signature for ExpBB! | |
-- Informally, ExpBB unfolds all the way whereas ExpD unfolds only one step | |
newtype ExpD = | |
ExpD { | |
unED :: forall w. | |
(Int -> w) | |
-> (ExpBB -> w) | |
-> (ExpBB -> ExpBB -> w) | |
-> w | |
} | |
decon :: ExpBB -> ExpD | |
decon e = unExpBB e dlit dneg dadd | |
where | |
dlit n = ExpD $ \onlit onneg onadd -> onlit n | |
dneg e = ExpD $ \onlit onneg onadd -> onneg (unED e lit neg add) | |
dadd e1 e2 = ExpD $ \onlit onneg onadd -> | |
onadd (unED e1 lit neg add) (unED e2 lit neg add) | |
-- We redo viewBB, using pattern-matching this time, and recursion | |
viewBBd:: ExpBB -> String | |
viewBBd e = unED (decon e) dlit dneg dadd | |
where | |
dlit n = show n | |
dneg e = "(-" ++ viewBBd e ++ ")" | |
dadd e1 e2 = "(" ++ viewBBd e1 ++ " + " ++ viewBBd e2 ++ ")" | |
tbb1vd = viewBBd tbb1 | |
-- "(8 + (-(1 + 2)))" | |
-- So, we can do any pattern-matching! | |
-- And do non-structurally--recursive processing on ExpBB | |
{- | |
push_neg :: Exp -> Exp | |
push_neg e@Lit{} = e | |
push_neg e@(Neg (Lit _)) = e | |
push_neg (Neg (Neg e)) = push_neg e | |
push_neg (Neg (Add e1 e2)) = Add (push_neg (Neg e1)) (push_neg (Neg e2)) | |
push_neg (Add e1 e2) = Add (push_neg e1) (push_neg e2) | |
-} | |
push_negBB :: ExpBB -> ExpBB | |
push_negBB e = unED (decon e) dlit dneg dadd | |
where | |
dlit _ = e | |
dneg e2 = unED (decon e2) dlit2 dneg2 dadd2 | |
where | |
dlit2 n = e | |
dneg2 e = push_negBB e | |
dadd2 e1 e2 = add (push_negBB (neg e1)) (push_negBB (neg e2)) | |
dadd e1 e2 = add (push_negBB e1) (push_negBB e2) | |
tbb1_norm = push_negBB tbb1 | |
tbb1_norm_viewBB = viewBBd tbb1_norm | |
-- "(8 + ((-1) + (-2)))" | |
-- Add an extra negation | |
tbb1n_norm_viewBB = viewBBd (push_negBB (neg tbb1)) | |
-- "((-8) + (1 + 2))" | |
-- ------------------------------------------------------------------------ | |
-- A bit of algebra | |
-- The types (ExpF a -> a) and ExpDict a are isomorphic. | |
-- Hence, the operations of an Exp algebra with the carrier U | |
-- could be specified either in the form of a value of the type ExpDict U, | |
-- or in the form of a function ExpF U -> U. | |
sigma_dict :: (ExpF a -> a) -> ExpDict a | |
sigma_dict sigma = ExpDict { | |
dlit = \n -> sigma (FLit n) | |
, dneg = \e -> sigma (FNeg e) | |
, dadd = \e1 e2 -> sigma (FAdd e1 e2) | |
} | |
dict_sigma :: ExpDict a -> (ExpF a -> a) | |
dict_sigma dict (FLit n) = dlit dict n | |
dict_sigma dict (FNeg e) = dneg dict e | |
dict_sigma dict (FAdd e1 e2) = dadd dict e1 e2 | |
-- Recall ExpBB1 as an uncurried form of the Boehm-Berarducci encoding. | |
-- It follows then the encoding can be written in the equivalent form | |
newtype ExpC = ExpC { | |
unExpC :: forall a. | |
(ExpF a -> a) | |
-> a | |
} | |
-- ExpC is indeed fully equivalent to ExpBB, and inter-convertible | |
h :: ExpDict a -> ExpBB -> a | |
h dict e = unExpBB e (dlit dict) (dneg dict) (dadd dict) | |
exp_BB_C :: ExpBB -> ExpC | |
exp_BB_C e = ExpC $ \f -> h (sigma_dict f) e | |
exp_C_BB :: ExpC -> ExpBB | |
exp_C_BB e = ExpBB $ \dlit dneg dadd -> | |
unExpC e (dict_sigma (ExpDict dlit dneg dadd)) | |
-- Let us write the constructors explicitly: | |
sigma_expC :: ExpF ExpC -> ExpC | |
sigma_expC (FLit n) = ExpC $ \f -> f (FLit n) | |
sigma_expC (FNeg e) = ExpC $ \f -> f (FNeg (unExpC e f)) | |
sigma_expC (FAdd e1 e2) = ExpC $ \f -> f (FAdd (unExpC e1 f) (unExpC e2 f)) | |
-- Sample term | |
t1c = exp_BB_C tbb1 | |
-- The advantage of ExpC is that functions roll and unroll take | |
-- a particularly elegant form | |
rollC :: ExpF ExpC -> ExpC | |
rollC = sigma_expC | |
unrollC :: ExpC -> ExpF ExpC | |
unrollC e = unExpC e (fmap sigma_expC) | |
-- (ExpC, sigma_ExpC) is the initial algebra of the functor ExpF. | |
-- (the same holds for ExpBB since it is isomorphic to ExpC) | |
-- Indeed, consider an arbitrary ExpF algebra with the carrier U | |
-- and the operations sigma :: ExpF U -> U. | |
-- We show that there exists a unique homomorphism | |
-- (hc sigma) :: ExpC -> U such that the following holds | |
-- hc sigma . sigma_ExpC = sigma . fmap (hc sigma) | |
-- Consider x = FLit n :: ExpF ExpC. | |
-- Then (sigma . fmap (hc sigma)) x = sigma (FLit n) | |
-- Thus we obtain | |
-- hc sigma (ExpC $ \f -> f (FLit n)) = sigma (FLit n) | |
-- from which we get | |
-- hc sigma e = unExpC e sigma | |
-- Consider x = FNeg e :: ExpF ExpC. | |
-- Then (sigma . fmap (hc sigma)) x = sigma (FNeg (hc sigma e)) | |
-- Thus we obtain | |
-- hc sigma (ExpC $ \f -> f (FNeg (unExpC e f))) = sigma (FNeg (hc sigma e)) | |
-- Again, the only way to satisfy the equality for all sigma and e is to set | |
-- hc sigma e = unExpC e sigma | |
-- The case of x = FAdd e1 e2 :: ExpF ExpC is similar. | |
-- Thus we get the unique morphism from (ExpC, sigma_ExpC) to | |
-- any other ExpF algebra. | |
hc :: (ExpF a -> a) -> ExpC -> a | |
hc sigma e = unExpC e sigma | |
-- Here is the illustration of the morphism. | |
-- Define an ExpF algebra, | |
-- with Strings as the carrier and the following operations | |
sigma_view :: ExpF String -> String | |
sigma_view (FLit n) = show n | |
sigma_view (FNeg e) = "(-" ++ e ++ ")" | |
sigma_view (FAdd e1 e2) = "(" ++ e1 ++ " + " ++ e2 ++ ")" | |
-- We immediately get the function to view the values of ExpC type: | |
viewC :: ExpC -> String | |
viewC = hc sigma_view | |
t1c_view = viewC t1c | |
-- "(8 + (-(1 + 2)))" | |
-- (ExpBB, sigma_ExpBB) is also initial algebra: there is a unique | |
-- homomorphism from it to any other Exp algebra. Here is this morphism | |
-- h dict . roll = (dict_sigma dict) . fmap (h dict) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
How to take a TAIL of a functional stream | |
Or how to obtain the first few elements of a functional (success/failure | |
continuation stream) without first converting it to the regular | |
Cons/Nil stream. The latter conversion is unreliable: it diverges for | |
a monadic functional stream over a strict monad. Our technique works | |
even in the latter case. | |
$Id: car-fstream.lhs,v 1.3 2012/04/04 03:40:11 oleg Exp oleg $ | |
> {-# LANGUAGE Rank2Types #-} | |
> module Streams where | |
> import Control.Monad.Identity | |
The most common way to represent (a potentially infinite) stream is as | |
a data structure of the following _recursive_ type: | |
> data Stream a = Nil | Cons a (() -> Stream a) | |
The thunking is explicit so the definition applies to strict | |
languages too. We can also represent a stream pure functionally. | |
The following is _not_ a recursive type, but a higher-ranked one. | |
> newtype FStream a = SFK (forall ans. SK a ans -> FK ans -> ans) | |
> type FK ans = () -> ans -- failure continuation | |
> type SK a ans = a -> FK ans -> ans -- success continuation | |
> unSFK (SFK a) = a | |
If we ignore the newtype tagging (needed to properly deal with the | |
higher-ranked types in Haskell), an FStream is a function of two | |
continuations, success and failure continuations. The latter is | |
invoked if the stream has no elements. The stream invokes the success | |
continuation if the stream has at least one element. The continuation | |
receives that element and another failure continuation, to ask for | |
more elements. As an example of such a functional stream, see Ralf | |
Hinze's ``Deriving Backtracking Monad Transformers'' (ICFP'00). | |
Obviously there is a correspondence between the two | |
representations. For example: | |
> fstream_to_stream fstream = unSFK fstream sk fk | |
> where fk () = Nil | |
> sk a fk' = Cons a fk' | |
> | |
> stream_to_fstream Nil = SFK( \sk fk -> fk () ) | |
> stream_to_fstream (Cons a rest) = | |
> SFK(\sk fk -> | |
> sk a (\ () -> unSFK (stream_to_fstream (rest ())) sk fk)) | |
A test data stream | |
> s1 = Cons 'a' (\ () -> Cons 'b' (\ () -> Cons 'c' (\ () -> Nil))) | |
> f1 = stream_to_fstream s1 | |
> s1' = fstream_to_stream f1 | |
> | |
> test1 = unSFK f1 (\a k -> a:(k())) (\() -> []) | |
*Streams> test1 | |
"abc" | |
Mitch Wand and D.Vaillancourt (see ICFP'04) have described the | |
correspondence in more formal terms. | |
There is an obvious difference between the two representations | |
however. It is easy to find the first two elements of a Stream: | |
> stream_take:: Int -> Stream a -> [a] | |
> stream_take 0 _ = [] | |
> stream_take n (Cons a r) = a : (stream_take (n-1) $ r ()) | |
It seems all but impossible to find the first two elements of an | |
FStream without first converting it all to Stream. That conversion | |
becomes problematic if we had a monadic functional stream: | |
> newtype MFStream m a = MSFK (forall ans. MSK m a ans -> MFK m ans -> m ans) | |
> type MFK m ans = m ans -- failure continuation | |
> type MSK m a ans = a -> MFK m ans -> m ans -- success continuation | |
> unMSFK (MSFK a) = a | |
> | |
> | |
> mfstream_to_stream:: (Monad m) => MFStream m a -> m (Stream a) | |
> mfstream_to_stream mfstream = unMSFK mfstream sk fk | |
> where fk = return Nil | |
> sk a fk' = fk' >>= (\rest -> return (Cons a (\ () -> rest))) | |
if m is a strict monad and fstream is infinite, the conversion to Stream | |
diverges. Indeed, | |
> nats = nats' 0 where | |
> nats' n = MSFK (\sk fk -> sk n (unMSFK (nats' (n+1)) sk fk)) | |
> | |
> test2 = runIdentity (mfstream_to_stream nats >>= (return . stream_take 5)) | |
*Streams> test2 | |
[0,1,2,3,4] | |
but for a strict monad like IO: | |
> test3 = mfstream_to_stream nats >>= (print . stream_take 5) | |
it diverges without printing anything. Indeed, before we can print | |
even the first element from nats, we must convert it entirely to a | |
stream. | |
It turns out, we can split fstream as if it were just a data | |
structure. The following function returns an FStream of at most one item. | |
The item is a pair of the head of the original stream and of the | |
_rest stream_ | |
> split_fstream :: FStream a -> FStream (a, FStream a) | |
> split_fstream fstream = unSFK fstream ssk caseB | |
> where caseB () = SFK(\a b -> b ()) | |
> ssk v fk = SFK(\a b -> | |
> a (v, (SFK (\s' f' -> | |
> unSFK (fk ()) | |
> (\ (v'',x) _ -> s' v'' | |
> (\ () -> unSFK x s' f')) | |
> f'))) | |
> (\ () -> b ())) | |
> | |
> f11 = split_fstream f1 | |
> e1 = unSFK f11 (\ (a,_) _ -> a) undefined | |
> f1r = unSFK f11 (\ (_,r) _ -> r) undefined | |
> f1rs = split_fstream f1r | |
> e2 = unSFK f1rs (\ (a,_) _ -> a) undefined | |
Indeed, we can obtain the first two elements from the finite stream f1 | |
*Streams> e1 | |
'a' | |
*Streams> e2 | |
'b' | |
Monadic case (quoted almost directly from the paper with Chung-chieh Shan, | |
Amr A. Sabry, Daniel P. Friedman) | |
> split_mfstream :: Monad m => MFStream m a -> m (MFStream m (a, MFStream m a)) | |
> split_mfstream m = unMSFK m ssk caseB | |
> where caseB = return $ MSFK(\a b -> b) | |
> ssk v fk = return $ MSFK(\a b -> | |
> a (v, (MSFK (\s' f' -> | |
> fk >>= (\r -> unMSFK r | |
> (\ (v'',x) _ -> s' v'' (unMSFK x s' f')) | |
> f')))) | |
> b) | |
> mfstream_take:: Monad m => Int -> MFStream m a -> m [a] | |
> mfstream_take 0 _ = return [] | |
> mfstream_take n m = do | |
> m' <- split_mfstream m | |
> (v,rs) <- unMSFK m' (\ (v,rs) _ -> return (v,rs) ) | |
> undefined | |
> rl <- mfstream_take (n-1) rs | |
> return (v:rl) | |
> test2' = runIdentity (mfstream_take 5 nats) | |
*Streams> test2' | |
[0,1,2,3,4] | |
> test3' = mfstream_take 5 nats >>= print | |
But now also | |
*Streams> test3' | |
[0,1,2,3,4] | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE TypeFamilies, EmptyDataDecls, TypeOperators #-} | |
{-# LANGUAGE FlexibleInstances #-} | |
-- Embedding a higher-order domain-specific language (simply-typed | |
-- lambda-calculus with constants) with a selectable evaluation order: | |
-- Call-by-value, call-by-name, call-by-need in the same Final Tagless framework | |
module CB where | |
import Data.IORef | |
import Control.Monad.IO.Class | |
-- Our EDSL is typed. EDSL types are built from the following two | |
-- type constructors: | |
data IntT | |
data a :-> b | |
infixr 5 :-> | |
-- We could have used Haskell's type Int and the arrow -> constructor. | |
-- We would like to emphasize however that EDSL types need not be identical | |
-- to the host language types. To give the type system to EDSL, we merely | |
-- need `labels' -- which is what IntT and :-> are | |
-- The (higher-order abstract) syntax of our DSL | |
class EDSL exp where | |
-- if we were using (->) instead of (:->) this would be | |
-- (f a -> f b) -> f (a -> b) | |
lam :: (exp a -> exp b) -> exp (a :-> b) | |
-- can we make all of this work even if we use (->) instead of (:->) and | |
-- ($) instead of (app)? | |
app :: exp (a :-> b) -> exp a -> exp b | |
int :: Int -> exp IntT -- Integer literal | |
add :: exp IntT -> exp IntT -> exp IntT | |
sub :: exp IntT -> exp IntT -> exp IntT | |
-- A convenient abbreviation | |
let_ :: EDSL exp => exp a -> (exp a -> exp b) -> exp b | |
let_ x y = (lam y) `app` x | |
-- A sample EDSL term | |
t :: EDSL exp => exp IntT | |
t = lam (\x -> let_ (x `add` x) | |
$ \y -> y `add` y) `app` int 10 | |
-- typed BB style booleans? | |
true :: EDSL exp => exp (a :-> a :-> a) | |
true = lam (\x -> lam (\y -> x)) | |
false :: EDSL exp => exp (a :-> a :-> a) | |
false = lam (\x -> lam (\y -> y)) | |
-- not2 :: EDSL exp => exp ((a :-> a :-> a) :-> (b :-> b :-> b)) | |
-- not2 = lam (\x -> x `app` true) `app` _foo | |
-- Interpretation of EDSL types as host language types | |
-- The type interpretation function Sem is parameterized by 'm', | |
-- which is assumed to be a Monad. | |
-- | |
-- HMM: why do we need 'm' to be a monad? right now, 'm' is simply a type | |
-- constructor taking one parameter | |
-- - if we wanted to use an associated type (family?) then whom would we | |
-- associate this with? hmm with 'S l m' perhaps and turn it into a class | |
-- (instead of newtype)? | |
type family Sem (m :: * -> *) a :: * | |
type instance Sem m IntT = Int | |
-- if we wanted to map (->) to (->) (instead of (:->) this is where we'd do it | |
type instance Sem m (a :-> b) = m (Sem m a) -> m (Sem m b) | |
-- Interpretation of EDSL expressions as values of the host language (Haskell) | |
-- An EDSL expression of the type a is interpreted as a Haskell value | |
-- of the type S l m a, where m is a Monad (the parameter of the interpretation) | |
-- and l is the label for the evaluation order (one of Name, Value, or Lazy). | |
-- (S l m) is not quite a monad -- only up to the Sem interpretation | |
-- | |
-- HMM: can we constrain 'l' somehow? and what would be the benefit of doing | |
-- that? that we disallow certain other type tags? | |
newtype S l m a = S { unS :: m (Sem m a) } | |
-- Call-by-name | |
data Name | |
instance MonadIO m => EDSL (S Name m) where | |
int = S . return | |
add x y = S $ do a <- unS x | |
b <- unS y | |
liftIO $ putStrLn "Adding" | |
return (a + b) | |
sub x y = S $ do a <- unS x | |
b <- unS y | |
liftIO $ putStrLn "Subtracting" | |
return (a - b) | |
-- (S . return) :: (m (Sem m a) -> m (Sem m b)) -> S Name m (a :-> b) | |
-- (unS . f . S) :: m (Sem m a) -> m (Sem m b) | |
-- S :: m (Sem m a) -> S Name m a | |
-- f :: S Name m a -> S Name m b | |
-- unS :: S Name m b -> m (Sem m b) | |
lam f = S . return $ (unS . f . S) | |
-- hmm (app) actually carries out (>>=), so probably can't map it to ($) | |
app x y = S $ unS x >>= ($ (unS y)) | |
-- Tests | |
runName :: S Name m a -> m (Sem m a) | |
runName = unS | |
-- Evaluating: | |
-- t = (lam $ \x -> let_ (x `add` x) | |
-- $ \y -> y `add` y) `app` int 10 | |
-- The addition (x `add` x) is performed twice because y is bound | |
-- to a computation, and y is evaluated twice | |
t0SN :: IO () | |
t0SN = runName t >>= print | |
{- | |
Adding | |
Adding | |
Adding | |
40 | |
-} | |
-- A more elaborate example | |
t1 :: EDSL exp => exp IntT | |
t1 = lam (\x -> let_ (x `add` x) | |
$ \y -> lam $ \z -> | |
z `add` (z `add` (y `add` y))) `app` (int 10 `sub` int 5) | |
`app` (int 20 `sub` int 10) | |
t1SN :: IO () | |
t1SN = runName t1 >>= print | |
{- | |
*CB> t1SN | |
Subtracting | |
Subtracting | |
Subtracting | |
Subtracting | |
Adding | |
Subtracting | |
Subtracting | |
Adding | |
Adding | |
Adding | |
Adding | |
40 | |
-} | |
-- A better example | |
t2 :: EDSL exp => exp IntT | |
t2 = lam (\_ -> lam $ \x -> let_ (x `add` x) | |
$ \y -> y `add` y) | |
`app` (int 100 `sub` int 10) | |
`app` (int 5 `add` int 5) | |
-- The result of subtraction was not needed, and so it was not performed | |
-- OTH, (int 5 `add` int 5) was computed four times | |
t2SN :: IO () | |
t2SN = runName t2 >>= print | |
{- | |
*CB> t2SN | |
Adding | |
Adding | |
Adding | |
Adding | |
Adding | |
Adding | |
Adding | |
40 | |
-} | |
-- Call-by-value | |
data Value | |
-- We reuse most of EDSL (S Name) except for lam | |
vn :: S Value m x -> S Name m x | |
vn = S . unS | |
nv :: S Name m x -> S Value m x | |
nv = S . unS | |
instance MonadIO m => EDSL (S Value m) where | |
int = nv . int | |
add x y = nv $ add (vn x) (vn y) | |
sub x y = nv $ sub (vn x) (vn y) | |
-- the good news is that the magic doesn't happen in (app), it's really | |
-- (lam) which differentiate the various CB schemes | |
app x y = nv $ app (vn x) (vn y) | |
-- This is the only difference between CBN and CBV: | |
-- lam first evaluates its argument, no matter what | |
-- This is the definition of CBV after all | |
lam f = S . return $ (\x -> x >>= unS . f . S . return) | |
runValue :: S Value m a -> m (Sem m a) | |
runValue = unS | |
-- We now evaluate the previously written tests t, t1, t2 | |
-- under the new interpretation | |
t0SV :: IO () | |
t0SV = runValue t >>= print | |
{- | |
*CB> t0SV | |
Adding | |
Adding | |
40 | |
-} | |
t1SV :: IO () | |
t1SV = runValue t1 >>= print | |
{- | |
*CB> t1SV | |
Subtracting | |
Adding | |
Subtracting | |
Adding | |
Adding | |
Adding | |
40 | |
-} | |
-- Although the result of subs-traction was not needed, it was still performed | |
-- OTH, (int 5 `add` int 5) was computed only once | |
t2SV :: IO () | |
t2SV = runValue t2 >>= print | |
{- | |
*CB> t2SV | |
Subtracting | |
Adding | |
Adding | |
Adding | |
40 | |
-} | |
-- Call-by-need | |
share :: MonadIO m => m a -> m (m a) | |
share m = do | |
r <- liftIO $ newIORef (False,m) | |
let ac = do | |
(f,m) <- liftIO $ readIORef r | |
if f then m | |
else do -- forget effect (from `app`?) + record it | |
v <- m | |
liftIO $ writeIORef r (True,return v) | |
return v | |
return ac | |
data Lazy | |
-- We reuse most of EDSL (S Name) except for lam | |
ln :: S Lazy m x -> S Name m x | |
ln = S . unS | |
nl :: S Name m x -> S Lazy m x | |
nl = S . unS | |
instance MonadIO m => EDSL (S Lazy m) where | |
int = nl . int | |
add x y = nl $ add (ln x) (ln y) | |
sub x y = nl $ sub (ln x) (ln y) | |
app x y = nl $ app (ln x) (ln y) | |
-- This is the only difference between CBN and CBNeed | |
-- lam shares its argument, no matter what | |
-- This is the definition of CBNeed after all | |
lam f = S . return $ (\x -> share x >>= unS . f . S) | |
runLazy :: S Lazy m a -> m (Sem m a) | |
runLazy = unS | |
-- We now evaluate the previously written tests t, t1, t2 | |
-- under the new interpretation | |
-- Here, Lazy is just as efficient as CBV | |
t0SL :: IO () | |
t0SL = runLazy t >>= print | |
{- | |
*CB> t0SL | |
Adding | |
Adding | |
40 | |
-} | |
-- Ditto | |
t1SL :: IO () | |
t1SL = runLazy t1 >>= print | |
{- | |
*CB> t1SL | |
Subtracting | |
Subtracting | |
Adding | |
Adding | |
Adding | |
Adding | |
40 | |
-} | |
-- Now, Lazy is better than both CBN and CBV: subtraction was not needed, | |
-- and it was not performed. | |
-- All other expressions were needed, and evaluated once. | |
t2SL :: IO () | |
t2SL = runLazy t2 >>= print | |
{- | |
*CB> t2SL | |
Adding | |
Adding | |
Adding | |
40 | |
-} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Establishing a bijection between the values of the type a and integers, with | |
-- the operations to retrieve the value given its key, | |
-- to find the key for the existing value, and to extend the | |
-- bijection with a new association. | |
-- The type 'a' of values should at least permit equality comparison; | |
-- In the present implementation, we require 'a' to be a member | |
-- of Ord. | |
-- There are many ways to implement bi-maps, for example, using hash tables, | |
-- or maps. | |
-- Our implementation uses Data.Map and Data.IntMap to record | |
-- both parts of the association. | |
module CSE_BiMap ( | |
BiMap, empty, | |
lookup_key, | |
lookup_val, | |
insert, | |
size, | |
) | |
where | |
import qualified Data.Map as M | |
import qualified Data.IntMap as IM | |
data BiMap a = BiMap (M.Map a Int) (IM.IntMap a) | |
-- Find a key for a value | |
lookup_key :: Ord a => a -> BiMap a -> Maybe Int | |
lookup_key v (BiMap m _) = M.lookup v m | |
-- Find a value for a key | |
lookup_val :: Int -> BiMap a -> a | |
lookup_val k (BiMap _ m) = m IM.! k | |
-- Insert the value and return the corresponding key | |
-- and the new map | |
-- Alas, Map interface does not have an operation to insert and find the index | |
-- at the same time (although such an operation is easily possible) | |
insert :: Ord a => a -> BiMap a -> (Int, BiMap a) | |
insert v (BiMap m im) = (k, BiMap m' im') | |
where m' = M.insert v k m | |
im' = IM.insert k v im | |
k = IM.size im | |
empty :: BiMap a | |
empty = BiMap (M.empty) (IM.empty) | |
instance Show a => Show (BiMap a) where | |
show (BiMap _ m) = "BiMap" ++ show (IM.toList m) | |
size :: BiMap a -> Int | |
size (BiMap _ m) = IM.size m |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE NoMonomorphismRestriction #-} | |
-- * Implicit sharing in the tagless-final style, or: | |
-- * seemingly pure hash-consing | |
-- The imperative details of hash-consing are hidden better in a | |
-- final-tagless style of the DSL embedding, described next. | |
module CSE_ExpF (Exp(..), | |
ExpI(..), | |
R(..), | |
N(..),NodeId, DAG(..), | |
run_expN, | |
do_bench, | |
mul, | |
) where | |
import Control.Monad.State | |
import CSE_BiMap | |
import qualified CSE_ExpI as ExpI (Exp(..), sklansky) | |
-- * // | |
-- * Tagless-final EDSL embedding | |
-- In the tagless-final approach, embedded DSL | |
-- expressions are built with `constructor functions' such as |constant|, | |
-- |variable|, |add| rather than the data constructors |Constant|, | |
-- |Variable|, |Add|. The constructor functions yield a representation | |
-- for the DSL expression being built. The representation could be a | |
-- string (for pretty-printing), an integer (for evaluator), etc. Since | |
-- the same DSL expression may be concretely represented in several ways, | |
-- the constructor functions are polymorphic, parameterized by the | |
-- representation |repr|. In other words, the constructor functions are | |
-- the members of the type class | |
class Exp repr where | |
constant :: Int -> repr Int | |
variable :: String -> repr Int | |
add :: repr Int -> repr Int -> repr Int | |
-- The data type Exp from ExpI.hs becomes a type class | |
-- Our expressions here are of only one type, Int. We could have dropped | |
-- Int and made 'repr' to be a type variable of the kind *. | |
-- We keep the 'repr Int' notation nevertheless, for consistency | |
-- with the tagless final paper, and to allow for extensions (e.g., | |
-- the addition of booleans). | |
-- Sample expressions from ExpI.hs now look as follows | |
exp_a = add (constant 10) (variable "i1") | |
exp_b = add exp_a (variable "i2") | |
-- * like those in ExpI.hs modulo the case of the identifiers: | |
-- * everything is in lower case. | |
-- The multiplication example: | |
-- the only two differences is the case of the identifiers | |
-- and the type signature | |
mul :: Exp repr => Int -> repr Int -> repr Int | |
mul 0 _ = constant 0 | |
mul 1 x = x | |
mul n x | n `mod` 2 == 0 = mul (n `div` 2) (add x x) | |
mul n x = add x (mul (n-1) x) | |
exp_mul4 = mul 4 (variable "i1") | |
exp_mul8 = mul 8 (variable "i1") | |
-- * Interpreters for our DSL: instances of Exp | |
-- The first interpreter interprets a tagless-final expression | |
-- as ExpI.Exp data type, converting the DSL expressions here into | |
-- the so-called `initial form' of ExpI.hs | |
-- This is one way to print the tagless-final expressions, since | |
-- we have derived a Show instance for ExpI.Exp | |
newtype ExpI t = ExpI (ExpI.Exp) | |
-- * Question: why do we need the wrapper? | |
instance Exp ExpI where | |
constant = ExpI . ExpI.Constant | |
variable = ExpI . ExpI.Variable | |
add (ExpI x) (ExpI y) = ExpI (ExpI.Add x y) | |
test_shb = case exp_b of ExpI e -> e | |
-- Add (Add (Constant 10) (Variable "i1")) (Variable "i2") | |
test_sh4 = case exp_mul4 of ExpI e -> e | |
{- | |
Add (Add (Variable "i1") (Variable "i1")) | |
(Add (Variable "i1") (Variable "i1")) | |
-} | |
-- The conversion to ExpI takes the form of an instance | |
-- of the class Exp that provides the interpretation for the expression | |
-- primitives, as the values for the domain ExpI. | |
-- * Another interpreter: the evaluator | |
-- That interpretation of tagless-final expressions is not | |
-- the only one possible. We can write an evaluator, interpreting | |
-- each expression as an element of the domain R | |
type REnv = [(String,Int)] | |
newtype R t = R{unR :: REnv -> t} -- A reader Monad, actually | |
-- that is, an integer in an environment that gives values for | |
-- free `variables' that may occur in the expression | |
instance Exp R where | |
constant x = R (\_ -> x) | |
variable x = R (\env -> maybe (error $ "no var: " ++ x) id $ | |
lookup x env) | |
add e1 e2 = R (\env -> unR e1 env + unR e2 env) | |
test_val4 = unR exp_mul4 [("i1",5)] -- 20 | |
-- * Evaluating sample expressions is good for debugging | |
-- * Write once (exp_mul4), interpret many times | |
-- We are using exactly the same exp_mul4 as in | |
-- test_sh4. We are evaluating it differently. The gist of | |
-- the final tagless approach is to write an expression once | |
-- and evaluate it many times. | |
-- * // | |
-- ------------------------------------------------------------------------ | |
-- * Detecting implicit sharing (common subexpression elimination) | |
-- * Goal: detect structurally equal subexpressions and share them, | |
-- * converting an expression tree into a DAG | |
-- * Idea: rather than convert an ASTree to ASDag, build the | |
-- * ASDag to begin with. | |
-- * Representing a DAG | |
-- a collection of Nodes identified by NodeIds, | |
type NodeId = Int | |
-- We stress: Node is NOT a recursive data structure, so the comparison | |
-- of Node values takes constant time! | |
data Node = NConst Int | |
| NVar String | |
| NAdd !NodeId !NodeId | |
deriving (Eq,Ord,Show) | |
-- we could use several bimaps for different operations (for | |
-- addition, subtraction, etc). | |
newtype DAG = DAG (BiMap Node) deriving Show | |
-- * BiMap -- (partial) bijection a <-> Int | |
-- The mapping between |Node|s and |NodeId|s is realized | |
-- through a BiMap interface | |
-- BiMap a establishes a bijection between the values of the type |a| | |
-- and integers, with the operations to retrieve the value given its key, | |
-- to find the key for the existing value, and to extend the bijection | |
-- with a new association. | |
-- * // | |
-- * Bottom-up DAG construction | |
-- As we construct a node for a subexpression, we check if the DAG already | |
-- has the equal node. If so, we return its |NodeId|; otherwise, we add the node | |
-- to the DAG. | |
-- The computation is stateful (we are using the State monad) | |
-- Hash-consing proper: insert Node into the DAG if it isn't | |
-- there already, and return its hash code. | |
hashcons :: Node -> State DAG NodeId | |
hashcons e = do | |
DAG m <- get | |
case lookup_key e m of | |
Nothing -> let (k,m') = insert e m | |
in put (DAG m') >> return k | |
Just k -> return k | |
-- * Bottom-up DAG construction: tagless-final helps | |
-- The bottom-up DAG construction maps well to computing a representation | |
-- for a tagless-final expression, which is also evaluated bottom-up. The | |
-- DAG construction can therefore be written as a tagless-final | |
-- interpreter, an instance of the type class Exp. The interpreter maps | |
-- a tagless-final expression to the concrete representation | |
-- * Interpreting Exp as a Node in the current DAG | |
newtype N t = N{unN :: State DAG NodeId} | |
instance Exp N where | |
constant x = N(hashcons $ NConst x) | |
variable x = N(hashcons $ NVar x) | |
add e1 e2 = N(do | |
h1 <- unN e1 | |
h2 <- unN e2 | |
hashcons $ NAdd h1 h2) | |
-- * The state is hidden behind the tagless-final veneer | |
-- run the DAG-construction interpreter and the node, | |
-- as a reference within a DAG. | |
run_expN :: N t -> (NodeId, DAG) | |
run_expN (N m) = runState m (DAG empty) | |
-- We re-interpret exp_mul4 differently this time. | |
-- The DAG-representation makes the sharing patent | |
-- A DAG is printed as the list of |(NodeId,Node)| associations. The | |
-- sharing of the left and right summands below is patent | |
test_sm4 = run_expN exp_mul4 | |
-- (2,DAG BiMap[(0,NVar "i1"),(1,NAdd 0 0),(2,NAdd 1 1)]) | |
test_sm8 = run_expN exp_mul8 | |
-- (3,DAG BiMap[(0,NVar "i1"),(1,NAdd 0 0),(2,NAdd 1 1),(3,NAdd 2 2)]) | |
-- * We have constructed netlists, topologically sorted | |
-- a netlist is a low-level representation of a circuit listing the gates and | |
-- their connections, used in circuit manufacturing. Since our BiMap allocated | |
-- monotonically increasing NodeIds, the resulting netlist comes out | |
-- topologically sorted. Therefore, we can straightforwardly generate machine | |
-- code after the standard register allocation. | |
-- We retain all the information about exp_mul4. In addition, all | |
-- sharing is fully explicit. As we can see, the evaluation process finds | |
-- common subexpressions automatically. | |
-- * // | |
-- ------------------------------------------------------------------------ | |
-- * Superficially `effectless' common sub-expression elimination | |
-- sklansky example by Matthew Naylor, with further credit to | |
-- Chalmers folk, Mary Sheeran and Emil Axelsson. | |
-- It is said to be similar to scanl1, but contains more parallelism | |
-- The function sklansky is defined in ExpI.hs | |
-- * To remind what sklansky produces: | |
test_sklansky_o n = ExpI.sklansky addition xs | |
where | |
addition x y = "(" ++ x ++ "+" ++ y ++ ")" | |
xs = Prelude.map (("v"++) . show) [1..n] | |
-- (v1+v2) is used three times | |
{- | |
test_sklansky_o 4 | |
["v1","(v1+v2)","((v1+v2)+v3)","((v1+v2)+(v3+v4))"] | |
-} | |
-- (v1+v2) is used seven times | |
{- | |
test_sklansky_o 8 | |
["v1","(v1+v2)", | |
"((v1+v2)+v3)", | |
"((v1+v2)+(v3+v4))", | |
"(((v1+v2)+(v3+v4))+v5)", | |
"(((v1+v2)+(v3+v4))+(v5+v6))", | |
"(((v1+v2)+(v3+v4))+((v5+v6)+v7))", | |
"(((v1+v2)+(v3+v4))+((v5+v6)+(v7+v8)))"] | |
-} | |
-- * sklansky challenge: sharing across expressions, not within an expression | |
-- * We re-write in the tagless-final style | |
-- * scratch that: we use sklansky as it was | |
-- :t ExpI.sklansky | |
-- ExpI.sklansky :: (a -> a -> a) -> [a] -> [a] | |
-- Actually, there is no re-write | |
-- We use Matthew Naylor's code, in pure Haskell, as it was | |
-- * // | |
-- We run it differently though | |
test_sklansky n = runState sk (DAG empty) | |
where | |
sk = sequence (map unN (ExpI.sklansky add xs)) | |
xs = map (variable . show) [1..n] | |
-- Implicit sharing works: hash code 2 (for v1+v2) is used three times | |
{- | |
*ExpF> test_sklansky 4 | |
([0,2,4,7], | |
DAG BiMap[(0,NVar "1"),(1,NVar "2"), | |
(2,NAdd 0 1),(3,NVar "3"), | |
(4,NAdd 2 3),(5,NVar "4"), | |
(6,NAdd 3 5),(7,NAdd 2 6)]) | |
-} | |
-- We indeed obtained the set of nodes all pointing within the same DAG | |
-- The stateful nature was well-hidden till the very end (running) | |
-- We see the deep sharing: hash code 2, which is v1+v2, is used twice. | |
-- Then code 7, which is (v1+v2)+(v3+v4), is used 4 times | |
-- after being computed | |
{- | |
*ExpF> test_sklansky 8 | |
([0,2,4,7,9,12,15,19], | |
DAG BiMap[(0,NVar "1"),(1,NVar "2"), | |
(2,NAdd 0 1),(3,NVar "3"), | |
(4,NAdd 2 3),(5,NVar "4"), | |
(6,NAdd 3 5),(7,NAdd 2 6), -- just as it was for test_sklansky 4 | |
(8,NVar "5"),(9,NAdd 7 8), | |
(10,NVar "6"),(11,NAdd 8 10), | |
(12,NAdd 7 11),(13,NVar "7"), | |
(14,NAdd 11 13),(15,NAdd 7 14), | |
(16,NVar "8"),(17,NAdd 13 16), | |
(18,NAdd 11 17),(19,NAdd 7 18)]) | |
-} | |
-- * // | |
-- ------------------------------------------------------------------------ | |
-- * Performance is still a problem | |
-- We have demonstrated the sharing detection technique that represents | |
-- a DSL program as a DAG, eliminating multiple occurrences of common | |
-- subexpressions. Alas, to find all these common subexpressions we have | |
-- to examine the entire expression _tree_, which may take long time for large | |
-- programs (large circuits). | |
-- See ExpLet.hs for the exponential speed-up. | |
-- To force the evaluation | |
do_bench :: (a,DAG) -> Int | |
do_bench (_,DAG d) = size d | |
bench_mul n = do_bench $ | |
run_expN (mul n (variable "i")) | |
-- It takes effort to find the common subexpressions, just because | |
-- we have to traverse the whole tree. | |
bench12 = bench_mul (2^12) | |
bench13 = bench_mul (2^13) | |
{- | |
-- Interpreted code | |
*ExpF> bench_mul (2^11) | |
12 | |
(0.06 secs, 4688952 bytes) | |
*ExpF> bench_mul (2^12) | |
13 | |
(0.09 secs, 5344760 bytes) | |
*ExpF> bench_mul (2^13) | |
14 | |
(0.20 secs, 9791092 bytes) | |
-- compiled with -O2 | |
Prelude ExpF> bench_mul (2^20) | |
21 | |
(0.44 secs, 80752592 bytes) | |
Prelude ExpF> bench_mul (2^21) | |
22 | |
(0.87 secs, 160434744 bytes) | |
-} | |
{- | |
*ExpF> test_sklansky 128 | |
([0,2,4,7,9,12,15,19,21, ... 560,567,575], ...) | |
(0.37 secs, 13650184 bytes) | |
-} | |
-- Not so bad... The implicit sharing detection is quite fast | |
bench_skl n = do_bench $ test_sklansky n | |
{- | |
-- Interpreted code | |
*ExpF> bench_skl 128 | |
576 | |
(0.33 secs, 10854396 bytes) | |
*ExpF> bench_skl 256 | |
1280 | |
(1.79 secs, 38952968 bytes) | |
-- compiled with -O2 | |
Prelude ExpF> bench_skl 128 | |
576 | |
(0.02 secs, 2163436 bytes) | |
Prelude ExpF> bench_skl 256 | |
1280 | |
(0.08 secs, 4333380 bytes) | |
Prelude ExpF> bench_skl 512 | |
2816 | |
(0.37 secs, 13908376 bytes) | |
-} | |
main = do | |
print test_shb | |
print test_sh4 | |
print test_val4 | |
print test_sm4 | |
print test_sm8 | |
print $ test_sklansky_o 4 | |
print $ test_sklansky 4 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Implicit sharing (common sub-expression elimination) in | |
-- an embedded DSL for arithmetic expressions | |
-- The DSL is embedded as a data type | |
-- | |
-- This file is based on the message by Tom Hawkins | |
-- posted on Haskell-Cafe on Feb 8, 2008 | |
-- http://www.haskell.org/pipermail/haskell-cafe/2008-February/039339.html | |
-- We fill the missing details so to run the example and exhibit | |
-- the problem. | |
module CSE_ExpI (Exp(..), sklansky, bench_mul, bench_skl, main) where | |
import CSE_BiMap | |
-- Tom Hawkins: | |
-- My DSLs invariably define a datatype to capture expressions; something | |
-- like this: | |
-- * // | |
-- * A sub-DSL of arithmetic expressions, embedded as a datatype | |
data Exp | |
= Add Exp Exp -- one can add Sub, Neg, etc. | |
| Variable String | |
| Constant Int | |
deriving (Eq, Ord, Show) | |
-- Sample expressions | |
exp_a = Add (Constant 10) (Variable "i1") | |
exp_b = Add exp_a (Variable "i2") | |
-- We can indeed generate assembly or Verilog from Exp after | |
-- a topological sorting | |
-- But first we have to detect identical sub-expressions and | |
-- share then, converting an ASTree to a DAG | |
-- * Common sub-expressions are abundant and easy to create | |
-- * Two running examples, absracted from real life | |
-- * Example 1: multiplication by a known integer constant | |
-- Our DSL does not have the multiplication | |
-- operation (many 8-bit CPUs don't have it). We can generate | |
-- code for constant multiplication by repeated addition. | |
mul :: Int -> Exp -> Exp | |
mul 0 _ = Constant 0 | |
mul 1 x = x | |
mul n x | n `mod` 2 == 0 = mul (n `div` 2) (Add x x) | |
mul n x = Add x (mul (n-1) x) | |
-- Examples of duplication | |
exp_mul4 = mul 4 (Variable "i1") | |
{- | |
Add (Add (Variable "i1") (Variable "i1")) (Add (Variable "i1") (Variable "i1")) | |
-} | |
exp_mul8 = mul 8 (Variable "i1") | |
-- Twice as much duplication | |
{- | |
Add | |
(Add (Add (Variable "i1") (Variable "i1")) (Add (Variable "i1") (Variable "i1"))) | |
(Add (Add (Variable "i1") (Variable "i1")) (Add (Variable "i1") (Variable "i1"))) | |
-} | |
-- * Question: why do we see all this duplication? | |
-- * Wasn't the multiplication algorithm supposed to be optimal? | |
-- When we apply something like exp_mul8 to an integer 5, do | |
-- we keep repeating 5+5 four times in a row? | |
-- * // | |
-- The other running example: | |
-- * sklansky by Matthew Naylor, with further credit to | |
-- * Chalmers folk, Mary Sheeran and Emil Axelsson. | |
-- * sklansky add [e1,e2, ... en] | |
-- * ===> [e1, e1+e2, ... e1+e2+...en] | |
-- It produces a running sum, with more parallelism and smaller latency. | |
-- It is somewhat in the spirit of the optimal multiplication mul above | |
-- The original code, from Matthew Naylor's message: | |
-- http://www.haskell.org/pipermail/haskell-cafe/2008-February/039671.html | |
sklansky :: (a -> a -> a) -> [a] -> [a] | |
sklansky f [] = [] | |
sklansky f [x] = [x] | |
sklansky f xs = left' ++ [ f (last left') r | r <- right' ] | |
where | |
(left, right) = splitAt (length xs `div` 2) xs | |
left' = sklansky f left | |
right' = sklansky f right | |
test_sklansky_i n = sklansky Add xs | |
where | |
xs = Prelude.map (Variable . show) [1..n] | |
-- * // | |
-- * test_sklansky_i 4 | |
-- (v1+v2) is repeated three times | |
{- | |
[Variable "1", | |
Add (Variable "1") (Variable "2"), | |
Add (Add (Variable "1") (Variable "2")) (Variable "3"), | |
Add (Add (Variable "1") (Variable "2")) (Add (Variable "3") (Variable "4"))] | |
-} | |
-- * Common sub-expressions appear across independent expressions | |
{- | |
Tom Hawkins wrote (Haskell-Cafe on Feb 8, 2008) | |
The problem comes when I want to generate efficient code from an | |
Exp (ie. to C or some other target language). The method I use | |
involves converting the tree of subexpressions into an acyclic graphic | |
to eliminate common subexpressions. The nodes are then topologically | |
ordered and assigned an instruction, or statement for each node.... | |
The process of converting an expression tree to a graph uses either Eq | |
or Ord (either derived or a custom instance) to search and build a set | |
of unique nodes to be ordered for execution. In this case "a", then | |
"b", then "c". The problem is expressions often have shared, | |
equivalent subnodes, which dramatically grows the size of the tree. | |
As these trees grow in size, the equality comparison in graph | |
construction quickly becomes the bottleneck for DSL compilation. | |
What's worse, the phase transition from tractable to intractable is | |
very sharp. In one of my DSL programs, I made a seemingly small | |
change, and compilation time went from milliseconds to | |
not-in-a-million-years. | |
-} | |
-- Tom Hawkins was about to give up on Haskell | |
-- His message was entitled: ``I love purity, but it's killing me'' | |
-- * | |
-- Tom Hawkins's message did not show the algorithm for converting | |
-- a tree of sub-expressions into a DAG. | |
-- The message does say that the algorithm uses Eq or Ord instances | |
-- of Exp | |
-- Probably Tom Hawkins adds all sub-expressions of an expression | |
-- into a DAG, ensuring that duplicates are not added | |
newtype DAG = DAG (BiMap Exp) deriving Show | |
-- Enter all sub-expressions of an expression into a DAG, | |
-- removing the duplicates | |
exp_to_dag :: Exp -> DAG -> DAG | |
exp_to_dag e d@(DAG m) | Just _ <- lookup_key e m = d | |
exp_to_dag e@(Add e1 e2) d = | |
let d1 = exp_to_dag e1 d | |
d2 = exp_to_dag e2 d1 | |
DAG m2 = d2 | |
(k,m') = insert e m2 | |
in DAG m' | |
-- No sub-expressions | |
exp_to_dag e d = d | |
-- There are only two expressions two compute | |
dag_4 = exp_to_dag exp_mul4 (DAG empty) | |
{- | |
DAG BiMap[(0,Add (Variable "i1") (Variable "i1")), | |
(1,Add (Add (Variable "i1") (Variable "i1")) | |
(Add (Variable "i1") (Variable "i1")))] | |
-} | |
dag_8 = exp_to_dag exp_mul8 (DAG empty) | |
bench_mul n = | |
case exp_to_dag (mul n (Variable "i")) (DAG empty) of DAG d -> size d | |
bench20 = bench_mul (2^20) | |
bench21 = bench_mul (2^21) | |
-- The size of the DAG increased by 1 node; yet it took twice as | |
-- much time... | |
{- | |
-- Interpreted code | |
*ExpI> bench20 | |
20 | |
(4.51 secs, 1585728 bytes) | |
*ExpI> bench21 | |
21 | |
(8.76 secs, 530728 bytes) | |
-- compiled with -O2 | |
Prelude ExpI> bench_mul (2^20) | |
20 | |
(0.06 secs, 1053460 bytes) | |
Prelude ExpI> bench_mul (2^21) | |
21 | |
(0.11 secs, 1062568 bytes) | |
Prelude ExpI> bench_mul (2^22) | |
22 | |
(0.22 secs, 1600568 bytes) | |
Prelude ExpI> bench_mul (2^23) | |
23 | |
(0.43 secs, 1068432 bytes) | |
-} | |
test_skl_dag = foldl (flip exp_to_dag) (DAG empty) (test_sklansky_i 4) | |
bench_skl n = | |
case foldl (flip exp_to_dag) (DAG empty) (test_sklansky_i n) of | |
DAG d -> size d | |
{- | |
-- Interpreted code | |
*ExpI> bench_skl 128 | |
448 | |
(0.32 secs, 1637380 bytes) | |
*ExpI> bench_skl 256 | |
1024 | |
(1.58 secs, 2234148 bytes) | |
-- compiled with -O2 | |
Prelude ExpI> bench_skl 128 | |
448 | |
(0.03 secs, 1053144 bytes) | |
Prelude ExpI> bench_skl 256 | |
1024 | |
(0.12 secs, 2297932 bytes) | |
Prelude ExpI> bench_skl 512 | |
2304 | |
(0.56 secs, 3419484 bytes) | |
-} | |
main = do | |
print exp_b | |
print exp_mul4 | |
print exp_mul8 | |
print $ test_sklansky_i 4 | |
print dag_8 | |
print $ bench_skl 128 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE NoMonomorphismRestriction #-} | |
-- * Adding explicit sharing to our DSL | |
module ExpLet where | |
import ExpF | |
-- * Why do we even need explicit sharing | |
-- * Help the human reader (cf. Principia Mathematica) | |
-- * Can't we just use Haskell's let? | |
-- Haskell's lets is certainly explicit and so clearly | |
-- helps the reader. Does it helps CSE eliminators? | |
-- Does it help tagless interpreters? | |
-- The multiplication-by-4 example written explicitly | |
-- * exp_mul4 = mul 4 (variable "i1") | |
exp_mul4 = | |
let x = variable "i1" in | |
let y = add x x in | |
add y y | |
-- * Question: Does Haskell guarantee sharing? | |
-- * Question: can we rely on memoization? | |
tree_mul4 = case exp_mul4 of ExpI t -> t | |
{- | |
Add (Add (Variable "i1") (Variable "i1")) | |
(Add (Variable "i1") (Variable "i1")) | |
-} | |
dag_mul4 = run_expN exp_mul4 | |
-- (2,DAG BiMap[(0,NVar "i1"),(1,NAdd 0 0),(2,NAdd 1 1)]) | |
-- * // | |
-- * Haskell's let does work, sometimes! | |
-- The question about Haskell let is non-trivial because | |
-- it sometimes works. | |
-- To demonstrate how Haskell let helps some interpreters | |
-- but not the others, we define two tagless-final | |
-- interpreters. | |
-- One computes the size of an expression (the number of the | |
-- constructors) | |
-- * Haskell's let does help some DSL interpreters | |
-- * The size interpreter | |
newtype Size t = Size Int | |
instance Exp Size where | |
constant _ = Size 1 | |
variable _ = Size 1 | |
add (Size x) (Size y) = Size (x+y+1) | |
Size size_mul4 = exp_mul4 -- 7 | |
Size size_large = mul (2^30) (variable "i1") | |
-- 2147483647 | |
-- Nearly instantaneous: only 0.01 secs | |
-- * The print interpreter | |
-- Another interpreter will print the expression | |
newtype Print t = Print (IO ()) | |
instance Exp Print where | |
constant = Print . putStr . show | |
variable = Print . putStr | |
add (Print x) (Print y) = Print (x >> putStr " + " >> y) | |
-- This interpreter will take a long time to print | |
-- (mul (2^30) (variable "i1")), _even_ if we redirect | |
-- the output to /dev/null | |
Print print_mul4 = exp_mul4 | |
-- i1 + i1 + i1 + i1 | |
-- We see the duplication in the output, thus the duplication | |
-- of the effort to print. | |
-- * Print pm30 = mul (2^30) (variable "i1") | |
-- Here, running will take a long time, even if we print to /dev/null | |
-- So, here Haskell's let did not help! | |
-- We thus need a sharing form in the DSL itself | |
-- * // | |
-- * Adding a new form let_ to our DSL | |
-- We extend the DSL language with a new expression form, let_, | |
-- to indicate the explicit sharing | |
-- Tagless-final interpreters are easily extensible | |
class ExpLet repr where | |
let_ :: repr a -> (repr a -> repr b) -> repr b | |
-- Re-written exp_mul4 using the explicit sharing | |
exp_mul4' = | |
let_ (variable "i1") (\x -> | |
let_ (add x x) (\y-> | |
add y y)) | |
-- * Extending the existing interpreters with let_ | |
-- As one might expect, let_ is basically the reverse application | |
instance ExpLet R where | |
let_ x f = f x | |
-- As expected, exp_mul4 with and without explicit sharing evaluate | |
-- to the same results. After all, sharing is an optimization, | |
-- it should not affect the results of DSL programs | |
val_mul4 = unR exp_mul4 [("i1",5)] -- 20 | |
val_mul4' = unR exp_mul4' [("i1",5)] -- 20 | |
-- * How to see sharing | |
-- We would like to `see' the sharing. | |
-- We have to define a show-like function then, an interpreter | |
-- of tagless-final expressions as strings. | |
-- Actually, we need a bit more than strings: to show sharing | |
-- (as let-expressions) we need a way to generate `pointers', | |
-- or local variable names. | |
type LetVarCount = Int -- counter for generating let-var-names | |
newtype S t = S{unS :: LetVarCount -> String} | |
instance Exp S where | |
constant = S . const . show | |
variable = S . const | |
add e1 e2 = S(\c -> unS e1 c ++ " + " ++ unS e2 c) | |
instance ExpLet S where | |
let_ e f = S(\c -> let vname = "v" ++ show c | |
in unwords ["let",vname,"=",unS e c, | |
"in",unS (f (S (const vname))) (succ c)]) | |
run_expS :: S t -> String | |
run_expS (S m) = m 0 | |
sh_mul4 = run_expS exp_mul4 | |
-- "i1 + i1 + i1 + i1" | |
sh_mul4' = run_expS exp_mul4' | |
-- "let v0 = i1 in let v1 = v0 + v0 in v1 + v1" | |
-- * // | |
-- * Payoff: extending the N interpreter to handle explicit sharing | |
-- Sharing a computation means performing a computation and sharing | |
-- result. The code below says exactly that. | |
instance ExpLet N where | |
let_ e f = N(do | |
x <- unN e | |
unN $ f (N (return x))) | |
-- Now, we can evaluate exp_mul4' as a DAG | |
-- The result is the same as dag_mul4. | |
dag_mul4' = run_expN exp_mul4' | |
-- (2,DAG BiMap[(0,NVar "i1"),(1,NAdd 0 0),(2,NAdd 1 1)]) | |
-- * Benchmarking | |
-- To really see the difference, we need bigger multiplications | |
-- We don't want to write the expressions like |exp_mul4'| by hand, | |
-- we want to generate them. | |
-- We rewrite the |mul| generator to use the explicit sharing. | |
-- The difference from |mul| is on the last-but-one line. | |
mul' :: (ExpLet repr, Exp repr) => Int -> repr Int -> repr Int | |
mul' 0 _ = constant 0 | |
mul' 1 x = x | |
mul' n x | n `mod` 2 == 0 = let_ x (\x' -> mul' (n `div` 2) (add x' x')) | |
mul' n x = add x (mul' (n-1) x) | |
-- Is there another way to write mul'? | |
-- mul' n x | n `mod` 2 == 0 = let_ (add x x) (\y -> mul' (n `div` 2) y) | |
-- "let v0 = i1 + i1 in let v1 = v0 + v0 in v1" | |
sh_mul4'' = run_expS (mul' 4 (variable "i1")) | |
-- "let v0 = i1 in let v1 = v0 + v0 in v1 + v1" | |
-- * // | |
-- * Benchmarking. Compare with bench12 and bench13 from ExpF.hs | |
bench_mul' n = do_bench $ | |
run_expN (mul' n (variable "i")) | |
bench12 = bench_mul' (2^12) | |
bench13 = bench_mul' (2^13) | |
-- The exponential speed-up is apparent | |
{- | |
*ExpLet> bench12 | |
13 | |
(0.02 secs, 3171764 bytes) | |
*ExpLet> bench13 | |
14 | |
(0.01 secs, 534548 bytes) | |
-} | |
-- The mul (2^30) is the expression tree with 2^30 leaves! | |
{- | |
*ExpLet> bench_mul' (2^20) | |
21 | |
(0.01 secs, 1585752 bytes) | |
*ExpLet> bench_mul' (2^30) | |
31 | |
(0.01 secs, 1055932 bytes) | |
-} | |
-- * Some sharing is left to discover: | |
{- | |
*ExpLet> run_expS (mul' 15 (variable "i")) | |
"i + let v0 = i in v0 + v0 + | |
let v1 = v0 + v0 in v1 + v1 + let v2 = v1 + v1 in v2 + v2" | |
-} | |
-- It is found and eliminated | |
{- | |
*ExpLet> run_expN (mul' 15 (variable "i")) | |
(6,DAG BiMap[ | |
(0,NVar "i"), | |
(1,NAdd 0 0), | |
(2,NAdd 1 1), | |
(3,NAdd 2 2), | |
(4,NAdd 2 3), | |
(5,NAdd 1 4), | |
(6,NAdd 0 5)]) | |
-} | |
-- rather quickly: | |
{- | |
*ExpLet> bench_mul' (2^30-1) | |
59 | |
(0.01 secs, 1078232 bytes) | |
-} | |
main = do | |
print tree_mul4 | |
print dag_mul4 | |
print val_mul4 | |
print val_mul4' | |
print sh_mul4' | |
print sh_mul4'' | |
print bench13 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-} | |
{-# LANGUAGE RankNTypes #-} | |
module ExpSYMFinal where | |
import Control.Monad | |
-- typeclass representing final encoding of the Exp dsl | |
-- 'repr' is the semantic domain | |
class ExpSYM repr where | |
lit :: Int -> repr | |
neg :: repr -> repr | |
add :: repr -> repr -> repr | |
-- interpret Exp AST in the Int semantic domain | |
instance ExpSYM Int where | |
lit n = n | |
neg e = - e | |
add e1 e2 = e1 + e2 | |
-- run the Int interpretor | |
eval :: Int -> Int | |
eval = id | |
-- interpret Exp AST in the String semantic domain | |
instance ExpSYM String where | |
lit n = show n | |
neg e = "(-" ++ e ++ ")" | |
add e1 e2 = "(" ++ e1 ++ " + " ++ e2 ++ ")" | |
-- run the String interpretor | |
view :: String -> String | |
view = id | |
tf1 :: forall repr. ExpSYM repr => repr | |
tf1 = add (lit 8) (neg (add (lit 1) (lit 2))) | |
-- add multiplication operation to the Exp dsl | |
class MulSYM repr where | |
mul :: repr -> repr -> repr | |
-- mul for Int domain | |
instance MulSYM Int where | |
mul e1 e2 = e1 * e2 | |
-- mul for String domain | |
instance MulSYM String where | |
mul e1 e2 = "(" ++ e1 ++ " * " ++ e2 ++ ")" | |
tfm1 :: forall repr. (MulSYM repr, ExpSYM repr) => repr | |
tfm1 = add (lit 7) (neg (mul (lit 1) (lit 2))) | |
-- serialization problem | |
data Tree = Leaf String | |
| Node String [Tree] | |
deriving (Eq, Read, Show) | |
instance ExpSYM Tree where | |
lit n = Node "Lit" [Leaf $ show n] | |
neg e = Node "Neg" [e] | |
add e1 e2 = Node "Add" [e1,e2] | |
-- run the Tree interpretor | |
toTree :: Tree -> Tree | |
toTree = id | |
tf1_tree :: Tree | |
tf1_tree = toTree tf1 -- with the type sig the call to toTree is optional | |
type ErrMsg = String | |
safeRead :: Read a => String -> Either ErrMsg a | |
safeRead s = case reads s of | |
[(x, "")] -> Right x | |
_ -> Left $ "Read error " ++ s | |
-- deserializer, closed recursion style | |
fromTree :: (ExpSYM repr) => Tree -> Either ErrMsg repr | |
fromTree (Node "Lit" [Leaf n]) = liftM lit $ safeRead n | |
fromTree (Node "Neg" [e]) = liftM neg $ fromTree e | |
fromTree (Node "Add" [e1, e2]) = liftM2 add (fromTree e1) (fromTree e2) | |
fromTree e = Left $ "Invalid tree: " ++ show e | |
tf1_eval :: IO () | |
tf1_eval = case fromTree tf1_tree of | |
Left e -> putStrLn $ "Error: " ++ e | |
Right x -> do | |
print $ eval x | |
-- below doesn't work! | |
-- print $ view x | |
-- duplicating interpreter, creates monomorphic copies | |
instance (ExpSYM repr, ExpSYM repr') => ExpSYM (repr,repr') where | |
lit x = (lit x, lit x) | |
neg (e1,e2) = (neg e1, neg e2) | |
add (e11,e12) (e21, e22) = (add e11 e21, add e12 e22) | |
-- run the duplicating interpreter | |
duplicate :: (ExpSYM repr, ExpSYM repr') => (repr,repr') -> (repr,repr') | |
duplicate = id | |
-- error reporting for deserializer | |
check_consume f (Left e) = putStrLn $ "Error: " ++ e | |
check_consume f (Right x) = f x | |
-- dup_consume makes no restrictions on what the type of x2 is | |
dup_consume ev x = print (ev x1) >> return x2 | |
where (x1,x2) = duplicate x | |
-- this works, because we basically instantiate |dup_consume| + 1 x's (three in | |
-- this case, hence the name) and then use them appropriately. | |
-- TODO: make them use a common copy? | |
thrice x = dup_consume eval x >>= dup_consume view >>= print . toTree | |
tf1_int3 = check_consume thrice . fromTree $ tf1_tree | |
-- deserializer, open recursion style. replace all self-recursive calls to | |
-- applications of the explicit reference to the *-version of itself (this will | |
-- be filled in later using Y-combinator) | |
-- this is needed for the deserializer to be extensible | |
fromTree' :: (ExpSYM repr) | |
=> (Tree -> Either ErrMsg repr) | |
-> (Tree -> Either ErrMsg repr) | |
fromTree' self (Node "Lit" [Leaf n]) = liftM lit $ safeRead n | |
fromTree' self (Node "Neg" [e]) = liftM neg $ self e | |
fromTree' self (Node "Add" [e1, e2]) = liftM2 add (self e1) (self e2) | |
fromTree' self e = Left $ "Invalid tree: " ++ show e | |
-- now tie the not explicitly using Y-combinator | |
-- if f takes a first argument which is the *-version of itself then fill that | |
-- argument's place | |
fix f = f (fix f) | |
-- now fromTree' = fromTree (*-version of fromTree) | |
fromTree'' = fix fromTree' | |
tf1E_int3 = check_consume thrice . fromTree'' $ tf1_tree | |
-- this results in error as expected | |
tfxE_int3 = check_consume thrice . fromTree'' $ | |
Node "Lit" [Leaf "1", Leaf "2"] | |
-- let's extend the deserializer now to include multiplication | |
-- 1] extend wire format for multiplication | |
instance MulSYM Tree where | |
mul e1 e2 = Node "Mul" [e1,e2] | |
-- 2] extend duplicator for multiplication | |
instance (MulSYM repr, MulSYM repr') => MulSYM (repr, repr') where | |
mul (e11, e12) (e21, e22) = (mul e11 e21, mul e12 e22) | |
-- 3] extend serializer | |
fromTreeExt self (Node "Mul" [e1,e2]) = liftM2 mul (self e1) (self e2) | |
fromTreeExt self e = fromTree' self e | |
fromTreeExt' = fix fromTreeExt | |
-- use same 'check_consume' for error reporting and previously defined | |
-- computation (i.e., 'thrice') with new interpreter | |
tf1'_int3 = check_consume thrice . fromTreeExt' $ tf1_tree | |
tfm1_int3 = check_consume thrice . fromTreeExt' $ toTree tfm1 | |
-- make the context on which the operation depends. in this case it depends on | |
-- the sign | |
data Ctx = Pos | Neg | |
-- interpretor transformer | |
instance ExpSYM repr => ExpSYM (Ctx -> repr) where | |
-- define how operations in current semantics (ExpSYM) interact with | |
-- transformation | |
lit n Pos = lit n | |
lit n Neg = neg (lit n) | |
add e1 e2 ctx = add (e1 ctx) (e2 ctx) | |
-- add definition for the transformation. in this case the transformation adds | |
-- contextual information where there previously was none. addition is also a | |
-- homomorphism wrt push_neg | |
neg e Pos = e Neg | |
neg e Neg = e Pos | |
-- the interpretor for pushing negation down. this needs to supply the base | |
-- context | |
push_neg :: forall t. (Ctx -> t) -> t | |
push_neg e = e Pos | |
-- extend the transformer to include multiplication | |
instance MulSYM repr => MulSYM (Ctx -> repr) where | |
-- it's clear that multiplication is not a homomorphism with respect to | |
-- negation. we pattern match on the context | |
mul e1 e2 Pos = mul (e1 Pos) (e2 Pos) | |
mul e1 e2 Neg = mul (e1 Pos) (e2 Neg) | |
main :: IO () | |
main = tf1_eval |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment