Created
March 24, 2011 21:20
-
-
Save kanak/885923 to your computer and use it in GitHub Desktop.
Discrete Mathematics Using a Computer Chapter 12: AVL Tree Notes and Solutions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{- Discrete Mathematics Using a Computer | |
Chapter 12: AVL Trees | |
AVL Trees: Self-balancing binary trees. | |
-} | |
module AVL where | |
data SearchTree k d = Leaf | |
| Node k d (SearchTree k d) (SearchTree k d) | |
deriving (Show) | |
-- A search tree is parametrized by two types: | |
-- k : the type of a key. k needs to derive Ord | |
-- d : the type of value. can be any type. | |
instance (Eq k, Eq d) => Eq (SearchTree k d) where | |
Leaf == Leaf = True | |
Leaf == _ = False | |
_ == Leaf = False | |
(Node xkey xval xleft xright) == (Node ykey yval yleft yright) = | |
(xkey == ykey) && (xval == yval) && (xleft == yleft) && (xright == yright) | |
subtree :: (Eq k, Eq d) => SearchTree k d -> SearchTree k d -> Bool | |
subtree Leaf _ = True | |
subtree _ Leaf = False | |
subtree x y@(Node _yk _yv yl yr) = x == y || x `subtree` yl || x `subtree` yr | |
properSubtree :: (Eq k, Eq d) => SearchTree k d -> SearchTree k d -> Bool | |
properSubtree _ Leaf = False | |
properSubtree x (Node _yk _yv yl yr) = x `subtree` yl || x `subtree` yr | |
-- book's member returns Bool | |
-- has a different dataElems function that returns the element | |
member' :: (Eq k) => k -> SearchTree k d -> Maybe d | |
member' _ Leaf = Nothing | |
member' k (Node yk yv yl yr) | |
| k == yk = Just yv | |
| otherwise = case member' k yl of | |
Nothing -> member' k yr | |
x -> x | |
-- I'm calling the above member' because it doesn't take advantage of searchtree properties | |
-- check if the whole Tree has a certain property | |
allTree :: (k -> d -> Bool) -> SearchTree k d -> Bool | |
allTree p Leaf = True | |
allTree p (Node yk yv yl yr) = p yk yv && allTree p yl && allTree p yr | |
ordered :: (Ord k) => SearchTree k d -> Bool | |
ordered Leaf = True | |
ordered (Node k _v l r) = allTree (keyLess k) l && allTree (keyMore k) r && ordered l && ordered r | |
where keyLess x y _ = y < x | |
keyMore x y _ = y > x | |
-- this is very wasteful. A node is visited d times where d is its depth | |
-- If the tree is balanced and has n nodes, the n/2 leaf nodes are at depth log n | |
-- So this algorithm is (n/2) * log n + (n/4) * (log n - 1) + ... | |
-- OMEGA n log n. Should be able to do this in linear time just by converting it to list | |
-- via inorder traversal and checking sortedness. | |
-------------------------------------------------------------------------------- | |
{- THEOREM 81: Unique keys part 1 | |
Forall S, ordered(S) and k in S => length (member k S) == 1. | |
Proof: Induction on the structure of the tree. | |
Base Case: Leaf | |
Leaf nodes have no keys. So k in S is False. So vacuously True | |
Inductive Case: | |
Each node can hold a single element. | |
If the tree is ordered, then every node in the left subtree has keys strictly less than | |
the key of the node. | |
Similarly, every node in the right subtree has keys strictly greater than key of the node. | |
So, if the node's key matches the input key, the input key cannot be found anywhere else. | |
Thus, if the key exists in the tree, exactly one node has that key. So length (member...) is 1 | |
-} | |
{- THEOREM 82: Unique keys part 2 | |
Forall s, ordered (S) and k not in S => member' k S = Nothing | |
Proof is by induction on the structure of the tree. | |
Base case: | |
A Leaf is ordered but contains no keys. | |
So, (ordered(S) and k not in S) is (True and True) = True | |
In this case, we have returned Nothing. | |
Inductive Hypothesis: | |
member' k S' where S' is a subtree returns Nothing if they don't have the keys. | |
Inductive case: | |
Suppose k is not in the node S. This means that it's not in either this node or in any of the subtrees. | |
Since the node's key is not equal to k, we would check if it is in the left subtree. | |
Left subtree will return Nothing, so we return value of right subtree. | |
Not in right subtree either, so we return Nothing. | |
-} | |
-------------------------------------------------------------------------------- | |
-- Retrieving Items faster | |
member :: Ord k => k -> SearchTree k d -> Maybe d | |
member _ Leaf = Nothing | |
member k (Node k' v l r) | |
| k == k' = Just v | |
| k < k' = member k l | |
| otherwise = member k r | |
{- THEOREM 83: Correctness of member | |
forall S, ordered(S) and k in S => member k S = Just d where d is the value of the | |
node containing the key. | |
Proof: Induction on structure of the tree. | |
Base case: Leaf | |
Vacuously true since k in S is False, and False implies anything is True. | |
Inductive Hypothesis: if it is in either the left or right subtree, the member on the | |
subtree will return correct answer. | |
Inductive case: Node k' d' l r | |
There are three cases: Either this k' == k, k' > k, k' < k | |
Case: k' == k | |
Since the tree is ordered, no other node has the same key. | |
Thus, d' == d. So we are correct in returning Just d' | |
Case k' < k | |
The node key was smaller than the input key. | |
Since the tree is ordered, everything in the left subtree is smaller than k'. | |
So, everything in left subtree is smaller than k' | |
So, can't find k' in left subtree. | |
So, running member on the right subtree guaranteed to give the answer by induction hypothesis. | |
Case k' > k | |
Same as k' < k case, except looking in left subtree guaranteed to work. | |
-} | |
-------------------------------------------------------------------------------- | |
-- Height and other stuff | |
height :: SearchTree k d -> Integer | |
height Leaf = 0 | |
height (Node _k _v l r) = 1 + max (height l) (height r) | |
{- THEOREM: forall s, height(s) >= 0. | |
Proof is by induction on the structure of s. | |
Base case: Leaf | |
height Leaf = 0, 0 >= 0 | |
Inductive case | |
Inductive hypothesis: height on subtree returns non-negative number | |
height (Node _k _v l r) = 1 + max (height l) (height r) | |
both height l and height r >= 0 | |
so max of the two >= 0 | |
so adding 1 to that, still >= 0 | |
-} | |
retrievalSteps :: SearchTree k d -> Integer | |
retrievalSteps Leaf = 1 | |
retrievalSteps (Node _k _v l r) = maximum [2, retrievalSteps l + 3, retrievalSteps r + 4] | |
-- For leaf, value is 1 because we pattern match immediately. | |
-- For node, if equal keys, we did 2 work: 1. match failure for leaf, 2. checking equality | |
-- less, we did above + checking less than, so 3 work | |
-- more, we did above + "otherwise", so 4 work | |
{- THEOREM: forall s, retrievalSteps s <= 4 * (height(s) + 1) | |
Proof: Induction on structure of tree. | |
Base case: Leaf | |
1 <= 4 * (0 + 1) i.e. 1 <= 4 is True. | |
Inductive case: Node | |
Induction Hypothesis: retrievalSteps subtree <= 4 * (height(subtree) + 1) | |
maximum [2, retrievalSteps l + 3, retrievalSteps r + 4] | |
By induction hypothesis: | |
<= maximum [2, 4 * (height(l) + 1) + 3, 4 * (height(r) + 1) + 4] | |
= maximum [2, 4 * height(l) + 7, 4 * height(r) + 8] | |
Since height is non-negative, the 2nd and 3rd terms are guaranteed to be more than 2. | |
= maximum [4 * height(l) + 7, 4 * height(r) + 8] | |
= 4 * maximum [height(l) + 7/4, height(r) + 2] | |
For simplicity, replace 7/4 with an overestimate: 2 | |
= 4 * maximum [height(l) + 2, height(r) + 2] | |
= 4 * (maximum [height(l) + 1, height(r) + 1] + 1) | |
= 4 * (height(s) + 1) | |
QED. | |
This is important because when the trees are balanced, height(s) = log n. | |
So we can perform retrieval in logarithmic time. | |
-} | |
-- ============================================================================= | |
-- 12.6 Balanced Trees | |
balanced :: SearchTree k d -> Bool | |
balanced Leaf = True | |
balanced (Node _k _v l r) = balanced l && balanced r && 1 >= (abs $ heightLeft - heightRight) | |
where heightLeft = height l | |
heightRight = height r | |
-- Again, this is as inefficient as ordered. | |
-- Going from the leaf up would probably be much faster | |
-- Or, while doing balanced subtree, produce the height as well. ("Fusion" as Bird-Wadler call it) | |
{- We want an insertion operator with the following properties: | |
1. If the tree is ordered, inserting a new item shouldn't make it unordered. | |
i.e. forall s,k,d. ordered(s) -> ordered((k,d) `insert` s) | |
2. If the tree is balanced, inserting a new item shouldn't make it unbalanced. | |
i.e. forall s,k,d. balanced(s) -> balanced((k,d) `insert` s) | |
3. Shouldn't delete any item from the old tree. | |
i.e. forall s,k,d,x. x in S -> x in ((k,d) `insert` s) | |
4. The new item should be inserted. | |
i.e. forall s,k,d. k in ((k,d) `insert` s) | |
5. No other keys are inserted. | |
i.e. forall s,k,d, x. ((x not in s) and (x != k)) -> (x not in ((k,d) `insert` s) | |
-} | |
-- The way we'll do this is to insert a node in the appropriate location in the tree | |
-- and "rebalance" the tree by moving some subtrees around to maintain balance. | |
-- We'll need height extensively for the rebalancing operations | |
-- So we'll augment tree to store height as well. | |
data AVLTree k d= AVLLeaf | |
| AVLNode Integer k d (AVLTree k d) (AVLTree k d) | |
deriving (Eq, Show) | |
-------------------------------------------------------------------------------- | |
-- Case 1: Easy Right | |
{- Scenario: | |
Z (ht n + 3) | |
/ \ | |
(ht n + 2) x zR (ht n) | |
/ \ | |
xL xR | |
(ht n+1) (ht n) | |
Assuming that it's "outside left heavy" i.e. left of left is the one that is | |
problematic. | |
Then, we want to rotate it so that it looks like: | |
x | |
/ \ | |
xL z | |
/ \ | |
xR zR | |
Does this rotation satisfy the properties we talked about? | |
THEOREM: after easyRight, the tree is ordered. | |
From the original tree, we see that: | |
xL < x < xR < Z < zR | |
(where A < B, A and B are sets means everything in A is smaller than everything in B) | |
In the rotated tree, | |
xL is at the left of x. so that part is balanced | |
z is the right subtree of x. so x < z is retained. | |
xR is left subtree of z, so xr < z is retained. | |
zR is right subtree of z, so z < zr is also retained. | |
So, the tree still has the same order as before. | |
THEOREM: after easyRight, the tree is balanced. | |
xR had height n, zR had height n | |
=> this part balanced | |
=> height of z is n + 1 | |
xL had height n + 2, z has height n + 1 | |
=> balanced | |
=> height of x is n + 3 | |
THEOREM: No keys lost | |
If a key was in the original tree, it was in one of: x, xL, z, xR, zR | |
Each of those is still present in the new tree. | |
So can't have lost keys. | |
THEOREM: No keys added | |
Nothing other than x, xL, z, xR, zR is in the new tree. | |
So, no new keys have snuck in. | |
-} | |
easyRight :: AVLTree k d -> AVLTree k d | |
easyRight (AVLNode _hz zk zv (AVLNode _hx xk xv xl xr) zr) = AVLNode hx' xk xv xl newRight | |
where newRight = AVLNode hz' zk zv xr zr | |
hx' = parentHeight xl newRight | |
hz' = parentHeight xr zr | |
avlHeight :: AVLTree k d -> Integer | |
avlHeight AVLLeaf = 0 | |
avlHeight (AVLNode h _k _v _l _r) = h | |
parentHeight :: AVLTree k d -> AVLTree k d -> Integer | |
parentHeight left right = 1 + max (avlHeight left) (avlHeight right) | |
{- easyLeft is to be used in the outside right heavy case | |
i.e. right subtree of the right subtree has the largest height | |
Z x | |
/ \ / \ | |
zL x => Z xR | |
/ \ / \ | |
xL xR zL xL | |
-} | |
easyLeft :: AVLTree k d -> AVLTree k d | |
easyLeft (AVLNode _zh zk zv zL (AVLNode _xh xk xv xL xR)) = AVLNode xh' xk xv newLeft xR | |
where newLeft = AVLNode zh' zk zv zL xL | |
xh' = parentHeight newLeft xR | |
zh' = parentHeight zL xL | |
-------------------------------------------------------------------------------- | |
-- Rebalancing the hard cases | |
{- Hard cases are when the tallest part of the tree is on the inside. | |
Inside Right heavy case: | |
z (n + 3) z x | |
/ \ Easy / \ Easy / \ | |
(n) zL y (n + 2) => zL x => z y | |
/ \ Right / \ Left / \ / \ | |
(n+1) x yR (n) on y xL y on z zL xL xR yR | |
/ \ / \ | |
xL xR xR yR | |
n-1 n | |
(Still Unbalanced: 1) | |
* any one of xL and xR could be the larger one. So, one of the heights is n | |
1. height y : 1 + max (n, n) = n + 1 | |
height x = 1 + max (n - 1, n + 1) = n + 2 NOT BALNCED! | |
height z = 1 + max (n, n + 2) = n + 3 NOT BALANCED! | |
2. In the final tree, heights are: | |
height y: 1 + max (ht xr, ht yr) = 1 + max (n, n) = n + 1 | |
height z: 1 + max (ht zl, ht xl) = 1 + max (n, n -1) = 1 + n | |
height x: 1 + max (ht z, ht y) = 1 + max (n +1, n + 1) = n + 2 | |
Balanced! | |
Also, no new keys were added and no keys were thrown away. | |
so all properties still hold. | |
-} | |
hardRight :: AVLTree k d -> AVLTree k d | |
hardRight z@(AVLNode _zh zk zv x@(AVLNode _xh _xk _xv xL xR) zR) = | |
if avlHeight xL < avlHeight xR | |
then easyRight $ AVLNode zh' zk zv newLeft zR | |
else easyRight z | |
where zh' = parentHeight newLeft zR | |
newLeft = easyLeft x | |
hardLeft :: AVLTree k d -> AVLTree k d | |
hardLeft z@(AVLNode _zh zk zv zL y@(AVLNode _yh _yk _yv yL yR)) = | |
if avlHeight yR < avlHeight yL | |
then easyLeft $ AVLNode zh' zk zv zL newRight | |
else easyLeft z | |
where zh' = parentHeight zL newRight | |
newRight = easyRight y | |
{- Proving that these rotations conserve keys and maintain balance is | |
easy because we know that easyLeft and Right work. | |
-} | |
insert :: Ord k => k -> d -> AVLTree k d -> AVLTree k d | |
insert k d AVLLeaf = AVLNode 1 k d AVLLeaf AVLLeaf | |
insert k d (AVLNode xh xk xv xL xR) = | |
if k < xk -- Add to left node, balance if necessary | |
then if avlHeight newLeft > avlHeight xR + 1 | |
then hardRight addedLeft | |
else addedLeft | |
else if k > xk -- Add to balance if necessary | |
then if avlHeight newRight > avlHeight xL + 1 | |
then hardLeft addedRight | |
else addedRight | |
else (AVLNode xh xk d xL xR) -- Update existing Value | |
where addedLeft = AVLNode heightA xk xv newLeft xR | |
newLeft = insert k d xL | |
heightA = parentHeight newLeft xR | |
addedRight = AVLNode heightB xk xv xL newRight | |
newRight = insert k d xR | |
heightB = parentHeight xL newRight | |
-- Let's make tree with numbers 1 .. 16 inserted in order. | |
-- If the insert works as promised, we should have a nice balanced tree. | |
-- If not, we get a chain. | |
makeTree :: Integer -> AVLTree Integer Integer | |
makeTree n = foldr avlInsert AVLLeaf [1..n] | |
where avlInsert new tree = insert new new tree | |
tinyTree, smallTree, normalTree, largeTree, hugeTree :: AVLTree Integer Integer | |
tinyTree = makeTree 16 | |
smallTree = makeTree 256 | |
normalTree = makeTree 1024 | |
largeTree = makeTree 10000 | |
hugeTree = makeTree 25000 | |
{- Let's see the heights of these trees: | |
*AVL> avlHeight tinyTree | |
5 | |
*AVL> avlHeight smallTree | |
9 | |
*AVL> avlHeight normalTree | |
11 | |
*AVL> avlHeight largeTree | |
14 | |
*AVL> avlHeight hugeTree | |
15 | |
-- JOY TO THE WORLD! | |
-} | |
avlMember :: Ord k => k -> AVLTree k d -> Maybe d | |
avlMember _ AVLLeaf = Nothing | |
avlMember k (AVLNode _h k' v l r) | |
| k == k' = Just v | |
| k < k' = avlMember k l | |
| otherwise = avlMember k r | |
{- Tiny correctness check | |
*AVL> avlMember 0 hugeTree | |
Nothing | |
*AVL> avlMember 100 hugeTree | |
Just 100 | |
*AVL> avlMember 24999 hugeTree | |
Just 24999 | |
*AVL> avlMember 25000 hugeTree | |
Just 25000 | |
-- Hallelujah! | |
-} | |
-------------------------------------------------------------------------------- | |
{- Deleting keys while maintaining order | |
Easy Case: Want to remove a node whose left child is a leaf i.e. | |
Z | |
\ | |
X <- want to delete | |
\ | |
Xr | |
In this case, just replace X with Xr | |
Harder case: Want to remove a node whose left child is a tree | |
X | |
/ \ | |
Xl Xr | |
Strategy: Find the maximum of the Xl Subtree and replace X with it | |
Correct because that elt will be larger than every other in Xl, but still smaller than Xr | |
To do this, we need to go along the right child of Xl until we find one whose right child is a leaf | |
Then, it's parent's right child becomes the left child of the max | |
Move the max to the top. | |
(Sounds confusing but the code will clarify the idea): | |
-} | |
-- we'll return a 3-tuple containing key, value of max and the new tree with the max removed | |
shrink :: AVLTree k d -> (k, d, AVLTree k d) | |
shrink (AVLNode _h k d l AVLLeaf) = (k, d, l) | |
shrink (AVLNode _h k d l r) = (ans_key, ans_val, ans_shrunken) | |
where (ans_key, ans_val, shr) = shrink r | |
rot_shr = if avlHeight l > avlHeight shr + 1 then hardRight shr else shr -- <- balance the shrunken if necessary | |
ans_shrunken = AVLNode (parentHeight l rot_shr) k d l rot_shr -- <- attach shrunken to this node. | |
{- Proofs of correctness | |
1. Shrink preserves Balance | |
Let nodex be an AVL Node. | |
If balanced nodex and (k, d, shr) = shrink nodex then shr is balanced. | |
Proof: By structural induction on the right child of nodex | |
Base Case: Right child is a leaf | |
Then, shr = left child of nodex. Since nodex is balanced, left child is balanced as well. | |
Inductive case: Right child is a AVLNode (call it "r") | |
Inductive hypothesis: shrink r is balanced. | |
Since nodex is balanced before, height of l = 1 +/- height of r | |
When we do shrink, we remove a node. So, the height of l could be 2 more than r, 1 more than r or equal to r | |
Only the first scenario is troublesome. | |
We know that if the left subtree is heavier than the right, we can do a hardRight rotation to balance the tree. | |
So, the left and right subtrees of nodex are now balanced (by correctness of hardRight) | |
so, balancedness is not lost when we shrink | |
2. Shrink preserves order | |
i.e. ordered(nodex) and (k, d, shr) = shrink nodex => ordered(shr) and (forall k' in shr, k > shr) | |
Proof by induction. | |
Base case: Right node is a leaf | |
Then, (k, d, shr) = (k, d, l) where l is the left child | |
Since nodex was ordered, k > k' for all k' in its left child, and the left child must have been ordered as well. | |
So, both conditions hold | |
Inductive case: Right node is a AVLNode, call it "r" | |
Inductive hypothesis: if (k, d, shr) is returned by the node below, we know that everything below the node itself is balanced | |
and that k is larger than every key below. | |
Since the "k" came from the right child of this node, it is larger than this node, and everything in the left subtree. | |
Since the tree was originally ordered, everything in the shrunken subtree is larger than this node (and everything in the left). | |
So, order is still preserved. | |
3. Shrink doesn't add any keys. | |
i.e. If k not in nodex before means k not in the shr returned by shrink nodex. | |
Base case: If not in nodex, then not in nodex's left child as well. So, returning left child doesn't break correctness. | |
Inductive case: | |
The left child of the new node is the same as the previous left. So can't have added a new key. | |
The right child is the right child returned from the level below. This doesn't add key by induction hypothesis. | |
The node's key is the same as before, so still correct. | |
4. Shrink doesn't remove any keys. | |
i.e. if (k,d, shr) was returned by shrink nodex, then all the {k} U keys(shr) = keys(nodex) | |
Base case: | |
Right child is a leaf (no keys), return (k, d, left) where k is the root's key. | |
So, haven't lost any keys. | |
Inductive case: | |
Hypothesis: The tuple received from the level below has all the keys from that subtree. | |
Again, no keys lost from the left side because we join it unchanged | |
We make a new tree with this node at the root and the shr from below as the right child. | |
At this state, we have accounted for all keys except the one in the tuple that was passed. | |
That key is passed in the tuple by this node as well. | |
So, haven't lost any keys. | |
-} | |
deleteRoot :: AVLTree k d -> AVLTree k d | |
deleteRoot (AVLNode _h _k _v AVLLeaf r) = r | |
deleteRoot (AVLNode _h _k _v l@(AVLNode _hl _kl _vl _ll rl) r) = rotNew | |
where rotNew = if avlHeight r > avlHeight shr + 1 then hardLeft new else new | |
new = AVLNode (parentHeight shr rl) xk xv shr r -- Book has a typo here: it tries to add rl instead of r | |
(xk, xv, shr) = shrink l | |
-- now we can write delete | |
delete :: (Ord k) => k -> AVLTree k d -> AVLTree k d | |
delete _key AVLLeaf = AVLLeaf | |
delete key n@(AVLNode _h k d l r) | |
| key == k = deleteRoot n | |
| key < k = if avlHeight r > avlHeight newL + 1 | |
then hardLeft removedLeft | |
else removedLeft | |
| otherwise = if avlHeight l > avlHeight newR + 1 | |
then hardRight removedRight | |
else removedRight | |
where newL = key `delete` l | |
newR = key `delete` r | |
removedLeft = AVLNode (parentHeight newL r) k d newL r | |
removedRight = AVLNode (parentHeight l newR) k d l newR | |
{- *AVL> avlMember 1 (makeTree 10) | |
Just 1 | |
*AVL> avlMember 1 (1 `delete` (makeTree 10)) | |
Nothing | |
-} | |
-- ============================================================================= | |
-- Conclusion | |
-- Proofs of many theorems were left out, many proofs were "less than formal" | |
-- Problem: would require thousands of logic formulas to complete such proofs | |
-- have to use theorem provers such as ACL2. | |
-- ACL2 can not only verify proofs, but also construct many inductive proofs | |
-- "Our hope is that you can use proof assistants" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment