t0yv0 · September 28, 2011 19:05
diff --git a/CompilerTransforms.fs b/CompilerTransforms.fs
 (*

 We start designing a representation for some functional language.
 Keeping it simple, we use strings to represent identifiers (to be
 changed later by something more fancy), define a few primitives,
 constants, and finally expressions.

 *)

 type Id = string

 type Constant =
    | Bool of bool
    | Double of double
    | Int of int
    | String of string

 type Primitive =
    | Add
    | Divide
    | Multiply
    | Remove

 type Expression =
    | Apply of E * E
    | Call of Primitive * list<E>
    | Constant of Constant
    | If of E * E * E
    | Lambda of Id * E
    | Let of Id * E * E
    | Var of Id

 and private E = Expression

 (* 

 The compiler will have to do a quite a few transformations on this
 data structure.  Let us start with something simple, like getting the
 set of free identifiers in an expression.  Trying to keep it as simple
 as possible, we just write a recursive function again:

 *)

 let rec getFreeIdentifiers (expr: Expression) =
    let (!) = getFreeIdentifiers
    let (!!) = List.map (!)
    match expr with
    | Apply (f, x) ->
        !f + !x
    | Constant _ ->
        Set.empty
    | Call (_, xs) ->
        Set.unionMany !!xs
    | If (c, t, e) ->
        !c + !t + !e
    | Let (id, value, body) ->
        Set.union !value (Set.remove id !body)
    | Lambda (id, body) ->
        Set.remove id !body
    | Var id ->
        Set.singleton id

 (* 

 Let us now consider a transformation of type `E -> E`.  For example,
 note that in our untyped language `Apply (Lambda (x, y), z)` is
 semantically equivalent to `Let (x, z, y)`.

 As an aside, this kind of reasoning is quite dangerous! You better be
 sure you are not fooling yourself by imagining a semantic equivalence
 where there is none.  Using proof assistants such as Coq is a great
 help.

 We can normalize our representation by writing a transformation that
 finds all reducible expressions and converts them to `Let`
 expressions:

 *)

 let rec removeRedexes (expr: Expression) =
    let (!) = removeRedexes
    let (!!) = List.map (!)
    match expr with
    | Apply (f, x) ->
        match !f, !x with
        | Lambda (id, body), value -> Let (id, x, body)
        | f, x -> Apply (f, x)
    | Constant _ ->
        expr
    | Call (p, xs) ->
        Call (p, !!xs)
    | If (c, t, e) ->
        If (!c, !t, !e)
    | Let (id, value, body) ->
        Let (id, !value, !body)
    | Lambda (id, body) ->
        Lambda (id, !body)
    | Var _ ->
        expr

 (*

 The above code is clear, but there is a maintainability problem.  When
 the `Expression` definition is changing over time, when it has 30 or
 so cases, or when we write a lot of transformations, pattern-matching
 becomes very, very tedious.  Note also that in both examples only a
 few cases were interesting, that is, relevant to the algorithm, and
 the rest were processed in a generic fashion.

 The practical solution is to extract the common pattern into a single
 function that maps over the expression's immediate children.  It
 greatly reduces the boilerplate code.

 *)

 let transform (!) expr =
    let (!!) = List.map (!)
    match expr with
    | Apply (f, x) -> Apply (!f, !x)
    | Call (p, xs) -> Call (p, !!xs)
    | Constant _ -> expr
    | If (c, t, e) -> If (!c, !t, !e)
    | Lambda (v, b) -> Lambda (v, !b)
    | Let (v, x, y) -> Let (v, !x, !y)
    | Var _ -> expr

 (*

 The `transform` function has to be touched every time `Expression`
 definition changes, and is proportional to `Expression` in length.
 But then it is very straightforward, and other transformations become
 much shorter, for example:

 *)

 let rec removeRedexesShort (expr: Expression) =
    let (!) = removeRedexesShort
    match expr with
    | Apply (f, x) ->
        match !f, !x with
        | Lambda (id, body), value -> Let (id, x, body)
        | f, x -> Apply (f, x)
    | _ ->
        transform (!) expr

 (*

 A truly lazy programmer can define a `fold` over immediate
 sub-expressions in terms of `transform`, so that `fold` does not have
 to be touched when `Expression` changes:

 *)

 let fold (f: 'S -> E -> 'S) (z: 'S) (expr: E) : 'S =
    let r = ref z
    let g expr =
        r := f !r expr
        expr
    transform g expr
    |> ignore
    !r

 (*

 The simple `fold` helps to collect free identifiers in
 accumulator-fashion, again ignoring boring cases and focusing only on
 what is relevant:

 *)

 let rec getFreeIdentifiersShort (expr: E) =
    let rec free (acc: Set<Id>) (expr: E) =
        let (!) = getFreeIdentifiersShort
        match expr with
        | Let (id, value, body) ->
            let set = free acc body
            free (Set.remove id set) value
        | Lambda (id, body) ->
            Set.remove id (free acc body)
        | Var id ->
            Set.add id acc
        | _ ->
            fold free acc expr
    free Set.empty expr
	(*

	We start designing a representation for some functional language.
	Keeping it simple, we use strings to represent identifiers (to be
	changed later by something more fancy), define a few primitives,
	constants, and finally expressions.

	*)

	type Id = string

	type Constant =
	\| Bool of bool
	\| Double of double
	\| Int of int
	\| String of string

	type Primitive =
	\| Add
	\| Divide
	\| Multiply
	\| Remove

	type Expression =
	\| Apply of E * E
	\| Call of Primitive * list<E>
	\| Constant of Constant
	\| If of E * E * E
	\| Lambda of Id * E
	\| Let of Id * E * E
	\| Var of Id

	and private E = Expression

	(*

	The compiler will have to do a quite a few transformations on this
	data structure. Let us start with something simple, like getting the
	set of free identifiers in an expression. Trying to keep it as simple
	as possible, we just write a recursive function again:

	*)

	let rec getFreeIdentifiers (expr: Expression) =
	let (!) = getFreeIdentifiers
	let (!!) = List.map (!)
	match expr with
	\| Apply (f, x) ->
	!f + !x
	\| Constant _ ->
	Set.empty
	\| Call (_, xs) ->
	Set.unionMany !!xs
	\| If (c, t, e) ->
	!c + !t + !e
	\| Let (id, value, body) ->
	Set.union !value (Set.remove id !body)
	\| Lambda (id, body) ->
	Set.remove id !body
	\| Var id ->
	Set.singleton id

	(*

	Let us now consider a transformation of type `E -> E`. For example,
	note that in our untyped language `Apply (Lambda (x, y), z)` is
	semantically equivalent to `Let (x, z, y)`.

	As an aside, this kind of reasoning is quite dangerous! You better be
	sure you are not fooling yourself by imagining a semantic equivalence
	where there is none. Using proof assistants such as Coq is a great
	help.

	We can normalize our representation by writing a transformation that
	finds all reducible expressions and converts them to `Let`
	expressions:

	*)

	let rec removeRedexes (expr: Expression) =
	let (!) = removeRedexes
	let (!!) = List.map (!)
	match expr with
	\| Apply (f, x) ->
	match !f, !x with
	\| Lambda (id, body), value -> Let (id, x, body)
	\| f, x -> Apply (f, x)
	\| Constant _ ->
	expr
	\| Call (p, xs) ->
	Call (p, !!xs)
	\| If (c, t, e) ->
	If (!c, !t, !e)
	\| Let (id, value, body) ->
	Let (id, !value, !body)
	\| Lambda (id, body) ->
	Lambda (id, !body)
	\| Var _ ->
	expr

	(*

	The above code is clear, but there is a maintainability problem. When
	the `Expression` definition is changing over time, when it has 30 or
	so cases, or when we write a lot of transformations, pattern-matching
	becomes very, very tedious. Note also that in both examples only a
	few cases were interesting, that is, relevant to the algorithm, and
	the rest were processed in a generic fashion.

	The practical solution is to extract the common pattern into a single
	function that maps over the expression's immediate children. It
	greatly reduces the boilerplate code.

	*)

	let transform (!) expr =
	let (!!) = List.map (!)
	match expr with
	\| Apply (f, x) -> Apply (!f, !x)
	\| Call (p, xs) -> Call (p, !!xs)
	\| Constant _ -> expr
	\| If (c, t, e) -> If (!c, !t, !e)
	\| Lambda (v, b) -> Lambda (v, !b)
	\| Let (v, x, y) -> Let (v, !x, !y)
	\| Var _ -> expr

	(*

	The `transform` function has to be touched every time `Expression`
	definition changes, and is proportional to `Expression` in length.
	But then it is very straightforward, and other transformations become
	much shorter, for example:

	*)

	let rec removeRedexesShort (expr: Expression) =
	let (!) = removeRedexesShort
	match expr with
	\| Apply (f, x) ->
	match !f, !x with
	\| Lambda (id, body), value -> Let (id, x, body)
	\| f, x -> Apply (f, x)
	\| _ ->
	transform (!) expr

	(*

	A truly lazy programmer can define a `fold` over immediate
	sub-expressions in terms of `transform`, so that `fold` does not have
	to be touched when `Expression` changes:

	*)

	let fold (f: 'S -> E -> 'S) (z: 'S) (expr: E) : 'S =
	let r = ref z
	let g expr =
	r := f !r expr
	expr
	transform g expr
	\|> ignore
	!r

	(*

	The simple `fold` helps to collect free identifiers in
	accumulator-fashion, again ignoring boring cases and focusing only on
	what is relevant:

	*)

	let rec getFreeIdentifiersShort (expr: E) =
	let rec free (acc: Set<Id>) (expr: E) =
	let (!) = getFreeIdentifiersShort
	match expr with
	\| Let (id, value, body) ->
	let set = free acc body
	free (Set.remove id set) value
	\| Lambda (id, body) ->
	Set.remove id (free acc body)
	\| Var id ->
	Set.add id acc
	\| _ ->
	fold free acc expr
	free Set.empty expr