Created
April 13, 2018 23:42
-
-
Save cosmez/b49da568674c54191e89b1da760939b2 to your computer and use it in GitHub Desktop.
Huffman Encoding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#lang racket/base | |
(require racket/list) | |
(require racket/match) | |
;; LEAFS HERE | |
;; a leaf is a simple tagged list with the symbol 'leaf | |
;; as its first element, the second is the symbol and the | |
;; third is the weight | |
;; symbol? natural-number/c -> leaf? | |
(define (make-leaf symbol weight) | |
(list 'leaf symbol weight)) | |
;; is the list a leaf? | |
;; list? -> boolean? | |
(define (leaf? element) | |
(eq? (first element) 'leaf)) | |
;; leaf? -> symbol? | |
(define symbol-leaf second) | |
;; leaf? -> natural-number/c | |
(define weight-leaf third) | |
(module+ test | |
(require rackunit) | |
(define lf '(leaf B 3)) | |
(check-equal? (leaf? lf) #t) | |
(check-equal? (symbol-leaf lf) 'B) | |
(check-equal? (weight-leaf lf) 3)) | |
;; BRANCHES HERE | |
;; a code tree is made out of 4 parts | |
;; the left and the right branches | |
;; the symbols from both branches | |
;; and the weight from both branches | |
;; (oc/c leaf? tree?) (oc/c leaf? tree?) -> tree? | |
(define (make-code-tree left right) | |
(list left | |
right | |
(append (symbols left) | |
(symbols right)) | |
(+ (weight left) | |
(weight right)))) | |
;; Gets the left or right element from a tree | |
;; the result can be either a leaf or tree | |
;; tree? -> (oc/c leaf? tree?) | |
(define left-branch first) | |
(define right-branch second) | |
;; returns the symbols from a tree or a leaf | |
;; tree? -> symbol? | |
(define (symbols tree) | |
(if (leaf? tree) | |
(list (symbol-leaf tree)) | |
(third tree))) | |
;; returns the weight of a tree or a single leaf | |
;; tree? -> natural-number/c | |
(define (weight tree) | |
(if (leaf? tree) | |
(weight-leaf tree) | |
(fourth tree))) | |
(module+ test | |
(define cleaf '(leaf C 1)) | |
(define dleaf '(leaf D 2)) | |
(define code-tree (make-code-tree dleaf cleaf)) | |
(check-equal? (symbols code-tree) '(D C)) | |
(check-equal? (weight code-tree) 3) | |
(check-equal? (left-branch code-tree) '(leaf D 2)) | |
(check-equal? (right-branch code-tree) '(leaf C 1)) | |
(check-equal? (make-code-tree dleaf cleaf) '((leaf D 2) (leaf C 1) (D C) 3))) | |
;;sort the words | |
;;recursively make the tree by joining the smallest ones first | |
;; (oc/c (listof leaf?) tree?) -> tree? | |
(define (recur-tree els) | |
(cond | |
[(equal? (length els) 2) (make-code-tree (second els) (first els))] | |
[else | |
;;first code tree the first 2 elements | |
(define code-tree (make-code-tree (second els) (first els))) | |
(recur-tree (cons code-tree (cddr els)))])) | |
;; build a tree from a unordered list of leafs | |
;; by first sorting the leafs by weight | |
;; (listof leaf?) -> tree? | |
(define (generate-huffman-tree leafs) | |
(define sorted-leafs (sort leafs (λ (1st 2nd) (< (weight-leaf 1st) (weight-leaf 2nd))))) | |
(recur-tree sorted-leafs)) | |
(module+ test | |
(define unsorted-tree | |
'((leaf C 1) (leaf B 3) (leaf E 2) (leaf H 1) (leaf D 1) (leaf F 1) (leaf A 8) (leaf G 1))) | |
(define built-tree (generate-huffman-tree unsorted-tree)) | |
(check-equal? (generate-huffman-tree unsorted-tree) '((leaf A 8) | |
((leaf B 3) | |
((leaf E 2) | |
((leaf G 1) | |
((leaf F 1) | |
((leaf D 1) | |
((leaf H 1) (leaf C 1) (H C) 2) | |
(D H C) 3) (F D H C) 4) | |
(G F D H C) 5) | |
(E G F D H C) 7) (B E G F D H C) 10) | |
(A B E G F D H C) 18)) | |
(check-equal? (weight built-tree) 18) | |
(check-equal? (symbols built-tree) '(A B E G F D H C))) | |
;;decoding | |
;;moves into a branch according to the bit | |
;; tree? bit? -> (or/c leaf? tree?) | |
(define (move-branch tree bit) | |
(when (leaf? tree) (error "Cant get inside a leaf")) | |
(match bit | |
[0 (left-branch tree)] | |
[1 (right-branch tree)])) | |
;; moves within a tree using the specified bits | |
;; it returns the branch and the remaining bits to decode | |
;; tree? (listof bit?) -> list? | |
(define (move-tree-and-remainder tree bits) | |
(define bit (first bits)) | |
(define remaining (rest bits)) | |
(define branch (move-branch tree bit)) | |
(if (leaf? branch) | |
(list (move-branch tree bit) remaining) | |
(if (null? remaining) | |
(error "bad encoding bits, last couldnt not find element") | |
(move-tree-and-remainder branch remaining)))) | |
(define (consume-tree tree bits output) | |
(match (move-tree-and-remainder tree bits) | |
[(list leaf '()) | |
(reverse (cons leaf output))] | |
[(list leaf remaining) | |
(consume-tree tree remaining (cons leaf output))])) | |
;; decodes the message for the tree with the specified bits | |
;; tree? (listof bit?) -> (listof leaf?) | |
(define (decode-huffman-tree tree bits) | |
(map second (consume-tree tree bits '()))) | |
;; finds the bit representation for a given symbol in the tree | |
;; symbol? tree? -> (listof bit?) | |
(define (find-symbol-bits symbol tree previous-bits) | |
(match tree | |
[(list 'leaf leaf-symbol leaf-weight) | |
(if (eq? symbol leaf-symbol) | |
previous-bits | |
(error "Couldnt find symbol in tree" symbol tree))] | |
[(list left-branch right-branch symbols weight) | |
;; if the left branch has the symbol, return the the bits | |
(if (and (leaf? left-branch) (equal? (symbol-leaf left-branch) symbol)) | |
(append previous-bits '(0)) | |
(find-symbol-bits symbol right-branch (append previous-bits '(1))))])) | |
(define (encode-message message tree output) | |
(cond | |
[(empty? message) output] | |
[else | |
(define message-symbol (first message)) | |
(define remaining (rest message)) | |
(define bits (find-symbol-bits message-symbol tree '())) | |
(encode-message remaining tree (append output bits))])) | |
;; encodes a message with a huffman tree | |
;; returning a list of bits containing the message | |
;; (listof symbol?) tree? -> (listof bit?) | |
(define (encode-huffman-message message tree) | |
(encode-message message tree '())) | |
(module+ test | |
(define tree '((leaf A 8) | |
((leaf B 3) | |
((leaf E 2) | |
((leaf G 1) | |
((leaf F 1) | |
((leaf D 1) | |
((leaf H 1) (leaf C 1) (H C) 2) | |
(D H C) 3) (F D H C) 4) | |
(G F D H C) 5) | |
(E G F D H C) 7) (B E G F D H C) 10) | |
(A B E G F D H C) 18)) | |
(define bits '(0 1 1 0 0 1 0 1 0 1 1 1 0)) | |
(define message '(A C B D A A D B C C C)) | |
(check-equal? (move-branch tree 0) '(leaf A 8)) | |
(check-exn exn:fail? (λ () (move-branch '(leaf A 8) 1))) | |
(check-equal? (move-branch tree 1) '((leaf B 3) | |
((leaf E 2) | |
((leaf G 1) | |
((leaf F 1) | |
((leaf D 1) | |
((leaf H 1) (leaf C 1) (H C) 2) | |
(D H C) 3) (F D H C) 4) | |
(G F D H C) 5) | |
(E G F D H C) 7) (B E G F D H C) 10)) | |
(check-equal? (decode-huffman-tree tree bits) | |
'(A E A B B G)) | |
(check-equal? (find-symbol-bits 'E tree '()) '(1 1 0)) | |
(check-equal? (find-symbol-bits 'A tree '()) '(0)) | |
(check-exn exn:fail? (λ () (find-symbol-bits 'T tree '()))) | |
(define small-tree '((leaf A 4) (leaf B 2) (A B) 6)) | |
(check-equal? | |
(encode-huffman-message '(A A B) small-tree) '(0 0 1)) | |
(check-equal? (decode-huffman-tree small-tree (encode-huffman-message '(A A B) small-tree)) | |
'(A A B))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment