Skip to content

Instantly share code, notes, and snippets.

@cosmez
Created April 13, 2018 23:42
Show Gist options
  • Save cosmez/b49da568674c54191e89b1da760939b2 to your computer and use it in GitHub Desktop.
Save cosmez/b49da568674c54191e89b1da760939b2 to your computer and use it in GitHub Desktop.
Huffman Encoding
#lang racket/base
(require racket/list)
(require racket/match)
;; LEAFS HERE
;; a leaf is a simple tagged list with the symbol 'leaf
;; as its first element, the second is the symbol and the
;; third is the weight
;; symbol? natural-number/c -> leaf?
(define (make-leaf symbol weight)
(list 'leaf symbol weight))
;; is the list a leaf?
;; list? -> boolean?
(define (leaf? element)
(eq? (first element) 'leaf))
;; leaf? -> symbol?
(define symbol-leaf second)
;; leaf? -> natural-number/c
(define weight-leaf third)
(module+ test
(require rackunit)
(define lf '(leaf B 3))
(check-equal? (leaf? lf) #t)
(check-equal? (symbol-leaf lf) 'B)
(check-equal? (weight-leaf lf) 3))
;; BRANCHES HERE
;; a code tree is made out of 4 parts
;; the left and the right branches
;; the symbols from both branches
;; and the weight from both branches
;; (oc/c leaf? tree?) (oc/c leaf? tree?) -> tree?
(define (make-code-tree left right)
(list left
right
(append (symbols left)
(symbols right))
(+ (weight left)
(weight right))))
;; Gets the left or right element from a tree
;; the result can be either a leaf or tree
;; tree? -> (oc/c leaf? tree?)
(define left-branch first)
(define right-branch second)
;; returns the symbols from a tree or a leaf
;; tree? -> symbol?
(define (symbols tree)
(if (leaf? tree)
(list (symbol-leaf tree))
(third tree)))
;; returns the weight of a tree or a single leaf
;; tree? -> natural-number/c
(define (weight tree)
(if (leaf? tree)
(weight-leaf tree)
(fourth tree)))
(module+ test
(define cleaf '(leaf C 1))
(define dleaf '(leaf D 2))
(define code-tree (make-code-tree dleaf cleaf))
(check-equal? (symbols code-tree) '(D C))
(check-equal? (weight code-tree) 3)
(check-equal? (left-branch code-tree) '(leaf D 2))
(check-equal? (right-branch code-tree) '(leaf C 1))
(check-equal? (make-code-tree dleaf cleaf) '((leaf D 2) (leaf C 1) (D C) 3)))
;;sort the words
;;recursively make the tree by joining the smallest ones first
;; (oc/c (listof leaf?) tree?) -> tree?
(define (recur-tree els)
(cond
[(equal? (length els) 2) (make-code-tree (second els) (first els))]
[else
;;first code tree the first 2 elements
(define code-tree (make-code-tree (second els) (first els)))
(recur-tree (cons code-tree (cddr els)))]))
;; build a tree from a unordered list of leafs
;; by first sorting the leafs by weight
;; (listof leaf?) -> tree?
(define (generate-huffman-tree leafs)
(define sorted-leafs (sort leafs (λ (1st 2nd) (< (weight-leaf 1st) (weight-leaf 2nd)))))
(recur-tree sorted-leafs))
(module+ test
(define unsorted-tree
'((leaf C 1) (leaf B 3) (leaf E 2) (leaf H 1) (leaf D 1) (leaf F 1) (leaf A 8) (leaf G 1)))
(define built-tree (generate-huffman-tree unsorted-tree))
(check-equal? (generate-huffman-tree unsorted-tree) '((leaf A 8)
((leaf B 3)
((leaf E 2)
((leaf G 1)
((leaf F 1)
((leaf D 1)
((leaf H 1) (leaf C 1) (H C) 2)
(D H C) 3) (F D H C) 4)
(G F D H C) 5)
(E G F D H C) 7) (B E G F D H C) 10)
(A B E G F D H C) 18))
(check-equal? (weight built-tree) 18)
(check-equal? (symbols built-tree) '(A B E G F D H C)))
;;decoding
;;moves into a branch according to the bit
;; tree? bit? -> (or/c leaf? tree?)
(define (move-branch tree bit)
(when (leaf? tree) (error "Cant get inside a leaf"))
(match bit
[0 (left-branch tree)]
[1 (right-branch tree)]))
;; moves within a tree using the specified bits
;; it returns the branch and the remaining bits to decode
;; tree? (listof bit?) -> list?
(define (move-tree-and-remainder tree bits)
(define bit (first bits))
(define remaining (rest bits))
(define branch (move-branch tree bit))
(if (leaf? branch)
(list (move-branch tree bit) remaining)
(if (null? remaining)
(error "bad encoding bits, last couldnt not find element")
(move-tree-and-remainder branch remaining))))
(define (consume-tree tree bits output)
(match (move-tree-and-remainder tree bits)
[(list leaf '())
(reverse (cons leaf output))]
[(list leaf remaining)
(consume-tree tree remaining (cons leaf output))]))
;; decodes the message for the tree with the specified bits
;; tree? (listof bit?) -> (listof leaf?)
(define (decode-huffman-tree tree bits)
(map second (consume-tree tree bits '())))
;; finds the bit representation for a given symbol in the tree
;; symbol? tree? -> (listof bit?)
(define (find-symbol-bits symbol tree previous-bits)
(match tree
[(list 'leaf leaf-symbol leaf-weight)
(if (eq? symbol leaf-symbol)
previous-bits
(error "Couldnt find symbol in tree" symbol tree))]
[(list left-branch right-branch symbols weight)
;; if the left branch has the symbol, return the the bits
(if (and (leaf? left-branch) (equal? (symbol-leaf left-branch) symbol))
(append previous-bits '(0))
(find-symbol-bits symbol right-branch (append previous-bits '(1))))]))
(define (encode-message message tree output)
(cond
[(empty? message) output]
[else
(define message-symbol (first message))
(define remaining (rest message))
(define bits (find-symbol-bits message-symbol tree '()))
(encode-message remaining tree (append output bits))]))
;; encodes a message with a huffman tree
;; returning a list of bits containing the message
;; (listof symbol?) tree? -> (listof bit?)
(define (encode-huffman-message message tree)
(encode-message message tree '()))
(module+ test
(define tree '((leaf A 8)
((leaf B 3)
((leaf E 2)
((leaf G 1)
((leaf F 1)
((leaf D 1)
((leaf H 1) (leaf C 1) (H C) 2)
(D H C) 3) (F D H C) 4)
(G F D H C) 5)
(E G F D H C) 7) (B E G F D H C) 10)
(A B E G F D H C) 18))
(define bits '(0 1 1 0 0 1 0 1 0 1 1 1 0))
(define message '(A C B D A A D B C C C))
(check-equal? (move-branch tree 0) '(leaf A 8))
(check-exn exn:fail? (λ () (move-branch '(leaf A 8) 1)))
(check-equal? (move-branch tree 1) '((leaf B 3)
((leaf E 2)
((leaf G 1)
((leaf F 1)
((leaf D 1)
((leaf H 1) (leaf C 1) (H C) 2)
(D H C) 3) (F D H C) 4)
(G F D H C) 5)
(E G F D H C) 7) (B E G F D H C) 10))
(check-equal? (decode-huffman-tree tree bits)
'(A E A B B G))
(check-equal? (find-symbol-bits 'E tree '()) '(1 1 0))
(check-equal? (find-symbol-bits 'A tree '()) '(0))
(check-exn exn:fail? (λ () (find-symbol-bits 'T tree '())))
(define small-tree '((leaf A 4) (leaf B 2) (A B) 6))
(check-equal?
(encode-huffman-message '(A A B) small-tree) '(0 0 1))
(check-equal? (decode-huffman-tree small-tree (encode-huffman-message '(A A B) small-tree))
'(A A B)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment