Last active
June 11, 2021 13:03
-
-
Save kvnyijia/09eff985c8bb9e92befe1671db63c859 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-------------------------------------------------------------------------------- | |
-- | |
---- Dependently Typed Programming: Part 3 | |
-- | |
---- An Intrinsically Typed Compiler | |
-- | |
---- Josh Ko (Institute of Information Science, Academia Sinica) | |
-- | |
-------------------------------------------------------------------------------- | |
variable A B C : Set | |
_∘_ : (B → C) → (A → B) → (A → C) | |
(g ∘ f) x = g (f x) | |
data _×_ (A B : Set) : Set where | |
_,_ : A → B → A × B | |
data Unit : Set where | |
tt : Unit | |
open import Agda.Builtin.Nat renaming (Nat to ℕ) using (zero; suc; _+_) | |
variable k m n : ℕ | |
data List (A : Set) : Set where | |
[] : List A | |
_∷_ : A → List A → List A | |
infixr 8 _∷_ | |
open import Agda.Builtin.Equality using (_≡_; refl) | |
cong : (f : A → B) {x y : A} → x ≡ y → f x ≡ f y | |
cong f refl = refl | |
begin_ : {x y : A} → x ≡ y → x ≡ y | |
begin eq = eq | |
_≡⟨_⟩_ : (x {y z} : A) → x ≡ y → y ≡ z → x ≡ z | |
x ≡⟨ refl ⟩ y≡z = y≡z | |
_∎ : (x : A) → x ≡ x | |
x ∎ = refl | |
infix 1 begin_ | |
infixr 2 _≡⟨_⟩_ | |
infix 3 _∎ | |
-------------------------------------------------------------------------------- | |
-- | |
---- Extrinsic types | |
-- | |
-- Write a (simply typed) program, and then state and prove its properties | |
-- separately. | |
-- | |
-- The typical example is list append and how it interacts with length: | |
_++_ : List A → List A → List A | |
[] ++ ys = ys | |
(x ∷ xs) ++ ys = x ∷ (xs ++ ys) | |
length : List A → ℕ | |
length [] = 0 | |
length (x ∷ xs) = 1 + length xs | |
++-length : (xs ys : List A) → length (xs ++ ys) ≡ length xs + length ys | |
++-length [] ys = refl | |
++-length (x ∷ xs) ys = | |
begin | |
length ((x ∷ xs) ++ ys) | |
≡⟨ refl ⟩ | |
length (x ∷ (xs ++ ys)) | |
≡⟨ refl ⟩ | |
1 + length (xs ++ ys) | |
≡⟨ cong (1 +_) (++-length xs ys) ⟩ | |
1 + length xs + length ys | |
≡⟨ refl ⟩ | |
length (x ∷ xs) + length ys | |
∎ | |
-- | |
-------------------------------------------------------------------------------- | |
-------------------------------------------------------------------------------- | |
-- | |
---- Intrinsic types | |
-- | |
-- We could have convinced ourselves of the truth of ++-length just by going | |
-- through the definition of _++_ and tracing the length of the lists appearing | |
-- in the definition. | |
-- | |
-- In fact, the dependent type system can do the tracing for us: just work with | |
-- vectors instead of lists. | |
data Vec (A : Set) : ℕ → Set where | |
[] : Vec A 0 | |
_∷_ : A → Vec A n → Vec A (1 + n) | |
_++ⱽ_ : Vec A m → Vec A n → Vec A (m + n) | |
[] ++ⱽ ys = ys | |
(x ∷ xs) ++ⱽ ys = x ∷ (xs ++ⱽ ys) | |
-- What we will see next pushes the idea of intrinsically typed programming | |
-- to an extreme and formalises some of the underlying principles. | |
-- | |
-- * Conor McBride [2011]. Ornamental algebras, algebraic ornaments. | |
-- https://personal.cis.strath.ac.uk/conor.mcbride/pub/OAAO/LitOrn.pdf. | |
-- | |
-------------------------------------------------------------------------------- | |
head : Vec A (1 + n) → A | |
head (x ∷ xs) = x | |
-------------------------------------------------------------------------------- | |
-- | |
---- Hutton’s razor | |
-- | |
-- We start with a simple language of additive expressions and its evaluation. | |
data Expr : Set where | |
lit : ℕ → Expr | |
_∔_ : Expr → Expr → Expr | |
infixl 8 _∔_ | |
eval : Expr → ℕ | |
eval (lit x) = x | |
eval (l ∔ r) = eval l + eval r | |
-- | |
-------------------------------------------------------------------------------- | |
-------------------------------------------------------------------------------- | |
-- | |
---- Eliminating (non-trivial) recursion | |
-- | |
-- Computing eval is non-trivial (e.g., for an elementary school kid). | |
testExpr : Expr | |
testExpr = ((lit 0 ∔ lit 1) ∔ lit 2) ∔ (lit 3 ∔ lit 4) | |
-- Let us transform the evaluation of an expression into a series of simple | |
-- instructions (that can be followed by a (clever) elementary school kid). | |
-- | |
-- One series of instructions that evaluates testExpr on a stack machine: | |
-- | |
-- push 0 ▷ push 1 ▷ push 2 ▷ add ▷ add ▷ push 3 ▷ push 4 ▷ add ▷ add | |
-- | |
-------------------------------------------------------------------------------- | |
-------------------------------------------------------------------------------- | |
-- | |
---- Modelling the stack machine (and its instructions) | |
-- | |
-- There are three kinds of instruction for manipulating a stack: | |
module M where | |
data Prog : Set where | |
-- we can push a number onto a stack | |
push : ℕ → Prog | |
-- or replace the top two numbers of a stack with their sum, | |
add : Prog | |
-- and we also need a sequential composition to form a series of instructions. | |
_▷_ : Prog → Prog → Prog | |
-- Formally, representing stacks as lists, | |
Stack : Set | |
Stack = List ℕ | |
-- we can define the semantics of the programs ... | |
⟦_⟧ : Prog → Stack → Stack | |
⟦ p ⟧ xs = {!!} | |
-- ... or can we? | |
-- | |
-------------------------------------------------------------------------------- | |
-------------------------------------------------------------------------------- | |
-- | |
---- Modelling the stack machine (second attempt) | |
-- | |
-- The above ⟦_⟧ function is not total since there is no guarantee that ⟦ add ⟧ | |
-- only operates on lists with at least two elements. | |
-- | |
-- Instead of coping with instructions that can fail, we can work with only | |
-- instructions that operate on stacks with enough elements and thus are | |
-- guaranteed to succeed. | |
-- | |
-- We put two indices into Prog stating the stack heights before and after | |
-- a program is executed (a lightweight form of pre- and post-conditions): | |
data Prog : ℕ → ℕ → Set where | |
push : ℕ → Prog n (1 + n) | |
add : Prog (2 + n) (1 + n) | |
_▷_ : Prog k m → Prog m n → Prog k n | |
infixr 5 _▷_ | |
-- Redefining stacks as vectors, | |
Stack : ℕ → Set | |
Stack = Vec ℕ | |
-- we can now redefine ⟦_⟧ to operate on stacks of specific heights only: | |
⟦_⟧ : Prog m n → Stack m → Stack n | |
⟦ p ⟧ xs = {!!} | |
-- | |
-------------------------------------------------------------------------------- | |
-------------------------------------------------------------------------------- | |
-- | |
---- Compiling an expression to its evaluation on the stack machine | |
-- | |
-- The compiler turns an expression into a program that increases the stack | |
-- height by one: | |
compile : Expr → Prog n (1 + n) | |
compile e = {!!} | |
-- More specifically, if we run a program compiled from an expression e on an | |
-- empty stack, we will end up with eval e at the top of the stack after the | |
-- program is executed. | |
soundness : (e : Expr) → head (⟦ compile e ⟧ []) ≡ eval e | |
soundness e = {!!} | |
-- This is a kind of soundness property because we are saying that a syntactic | |
-- transformation has the intended semantic effect. | |
-- | |
-------------------------------------------------------------------------------- | |
-------------------------------------------------------------------------------- | |
-- | |
---- Switching to intrinsic types | |
-- | |
-- The key observation here is that the soundness argument can be carried out | |
-- in essentially the same way as ++-length, because we can explain what a | |
-- series of compiled instructions does by going through the definition of | |
-- compile. | |
-- | |
-- Here the property we trace is more involved than length: it’s the semantics | |
-- of the instructions. We therefore index Prog with its semantics: | |
apply : (ℕ → ℕ → ℕ) → Stack (2 + n) → Stack (1 + n) | |
apply f (x ∷ y ∷ xs) = f x y ∷ xs | |
data Prog⁺ : (m n : ℕ) → (Stack m → Stack n) → Set where | |
push : (x : ℕ) → Prog⁺ n (1 + n) (x ∷_) | |
add : Prog⁺ (2 + n) (1 + n) (apply _+_) | |
_▷_ : ∀ {f g} → Prog⁺ k m f → Prog⁺ m n g → Prog⁺ k n (g ∘ f) | |
-- Now we can state the semantics of a compiled program in the type of compile: | |
compile⁺ : (e : Expr) → Prog⁺ n (1 + n) (eval e ∷_) | |
compile⁺ e = {!!} | |
-- There is no need to write a soundness proof for compile anymore! | |
-- | |
-------------------------------------------------------------------------------- | |
-------------------------------------------------------------------------------- | |
-- | |
---- Exercises | |
-- | |
-- 0. How does the type-checking of compile⁺ go through? | |
-- | |
-- 1. Extend the Expr language (and all the subsequent definitions) with | |
-- multiplication (or some other operator of your choice). | |
-- | |
-- 2. Generalising Exercise 1 a bit, we could allow the user to supply whatever | |
-- binary operator they want to use by extending Expr with the constructor | |
-- | |
-- binOp : (ℕ → ℕ → ℕ) → Expr → Expr → Expr | |
-- | |
-- One could argue that the resulting stack machine instruction is no longer | |
-- sensible, though. Why? | |
-- | |
-- 3. With Prog and Prog⁺, we haven’t actually eliminated non-trivial recursion | |
-- — the _▷_ operator still leads to a binary tree structure. Two possible | |
-- solutions: | |
-- | |
-- (i) Define a function | |
-- | |
-- normalise : Prog⁺ m n f → Prog⁺ m n f | |
-- | |
-- that rotates a program tree to a right-leaning tree (which is the | |
-- same as a list). Note that the type of normalise says that it | |
-- doesn’t change the semantics of the program. | |
-- | |
-- (ii) Redefine Prog to have a list-like structure, so that ⟦_⟧ becomes | |
-- tail-recursive. Why is tail recursion desirable in this case? | |
-- | |
-- 4. Can you extend Expr with a conditional operator | |
-- | |
-- ifz : Expr → Expr → Expr → Expr | |
-- | |
-- with the semantics | |
-- | |
-- eval (ifz c t e) = if eval c == 0 then eval t else eval e | |
-- | |
-- and make the intrinsically typed compiler work? What about the | |
-- extrinsically typed compiler and its soundness proof? | |
-- | |
-- 5. Can you generalise binOp in Exercise 2 to | |
-- | |
-- op : (n : ℕ) → (ℕ ^ n → ℕ) → Expr ^ n → Expr | |
-- | |
-- where | |
-- | |
-- _^_ : Set → ℕ → Set | |
-- A ^ zero = Unit | |
-- A ^ suc n = A × (A ^ n) | |
-- | |
-- and make the intrinsically typed compiler work? What about the | |
-- extrinsically typed compiler and its soundness proof? | |
-- | |
-------------------------------------------------------------------------------- | |
-------------------------------------------------------------------------------- | |
-- | |
---- The recomputation lemma | |
-- | |
-- We obtained Prog⁺ by encoding the computation of ⟦_⟧ into Prog as a new | |
-- index. The meaning of the new index can be stated formally by the following | |
-- lemma, which says that the index of a Prog⁺ element can be recomputed by ⟦_⟧ | |
-- from the underlying Prog element: | |
forget : ∀ {f} → Prog⁺ m n f → Prog m n | |
forget (push x) = push x | |
forget add = add | |
forget (p ▷ q) = forget p ▷ forget q | |
recomputation : ∀ {f} (p : Prog⁺ m n f) xs → ⟦ forget p ⟧ xs ≡ f xs | |
recomputation p xs = {!!} | |
-- Now if we define | |
compile' : Expr → Prog n (1 + n) | |
compile' e = forget (compile⁺ e) | |
-- the soundness of compile' will become a direct corollary of recomputation: | |
soundness' : (e : Expr) (xs : Stack n) → ⟦ compile' e ⟧ xs ≡ eval e ∷ xs | |
soundness' e xs = {!!} | |
-- | |
-------------------------------------------------------------------------------- | |
-------------------------------------------------------------------------------- | |
-- | |
---- Algebraic ornamentation | |
-- | |
-- In general, whenever a new index is added to datatype by encoding the | |
-- computation of a fold (a process called ‘algebraic ornamentation’), there | |
-- is a recomputation lemma explaining the meaning of the new index. | |
-- | |
-- We don’t have time to introduce the necessary machinery (datatype-generic | |
-- programming) for formulating algebraic ornamentation and the recomputation | |
-- lemma fully generically (quantifying over a range of datatypes), but we can | |
-- get a taste of the construction by specialising it to lists. | |
foldr : (A → B → B) → B → List A → B | |
foldr f e [] = e | |
foldr f e (x ∷ xs) = f x (foldr f e xs) | |
data AlgList (A {B} : Set) (f : A → B → B) (e : B) : B → Set where | |
[] : AlgList A f e e | |
_∷_ : (x : A) {y : B} → AlgList A f e y → AlgList A f e (f x y) | |
-- Exercises: | |
-- | |
-- (i) Show that Vec is a special case of AlgList. | |
-- | |
-- (ii) State and prove the recomputation lemma for AlgList. | |
-- | |
-- (iii) Use the recomputation lemma for AlgList to prove ++-length. | |
-- | |
-- (iv) Fold fusion relates two folds, which can be encoded into two instances | |
-- of AlgList; how are the two instances of AlgList related? | |
-- | |
-------------------------------------------------------------------------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment