Skip to content

Instantly share code, notes, and snippets.

@bedekelly
Created October 23, 2016 14:34
Show Gist options
  • Select an option

  • Save bedekelly/22afaebf8ef39d1f4e7396c0566644d4 to your computer and use it in GitHub Desktop.

Select an option

Save bedekelly/22afaebf8ef39d1f4e7396c0566644d4 to your computer and use it in GitHub Desktop.
Implementation of Huffman Coding in Haskell. N.B. this sends the frequency table, not the tree itself.
module Huffman where
import Data.List
import Data.Char
-- Extract the value from a Maybe type.
fromJust :: Maybe t -> t
fromJust (Just x) = x
fromJust Nothing = error "Can't retrive value from Nothing!"
-- Generate all characters in the ASCII charset.
chars = takeWhile isAscii $ map chr [0..]
-- Initialize a mapping of {char: frequency}
initialFrequency char = (char, 0)
initialFrequencies = map initialFrequency chars
-- Increment the value associated with a key in an associative list.
incrementByKey' key left ((k, v):right)
| k == key = left ++ ((k, v+1):right)
| otherwise = incrementByKey' key (left++[(k,v)]) right
incrementByKey key xs = incrementByKey' key [] xs
-- Generate an associative list of (char, freq) for each char in some text.
frequencies' acc [] = acc
frequencies' acc (x:xs) = frequencies' (incrementByKey x acc) xs
frequencies [] = initialFrequencies
frequencies xs = frequencies' initialFrequencies xs
-- Generate the above associative list, sorted ascendingly by frequency.
sortedFrequencies = sortOn snd . frequencies
-- Define a simple Bit type:
data Bit = Zero | One deriving (Eq, Ord)
instance Show Bit where
show Zero = "0"
show One = "1"
showList [] = id
showList (x:xs) = \out -> (show x) ++ showList xs out
-- Define a simple Tree type to work with.
data Tree = Leaf Char Int
| Branch Tree Tree
deriving Show
-- Define some helper functions to work with trees.
left (Branch l r) = l
right (Branch l r) = r
weight (Leaf _ w) = w
weight (Branch l r) = weight l + weight r
-- A helper function to take a character from a bitsequence.
takeChar (Leaf c _) bitsLeft = (c, bitsLeft)
takeChar (Branch l r) (b:bits) =
if b == One then takeChar r bits
else takeChar l bits
-- Given a tree and a bitsequence, output the characters.
decompress' tree [] chars = chars
decompress' tree bits chars = decompress' tree bitsLeft (char:chars)
where (char, bitsLeft) = (takeChar tree bits)
decompress tree bits = reverse (decompress' tree bits [])
-- Take single lowest element from two queues.
takeLowest (x:xs) (y:ys) =
if weight x < weight y then (x, xs, (y:ys))
else (y, (x:xs), ys)
takeLowest (x:xs) [] = (x, xs, [])
takeLowest [] (y:ys) = (y, [], ys)
-- Take two lowest elements from two queues.
takeTwoLowest xs ys = (lowest, sndLowest, xs'', ys'')
where (sndLowest, xs'', ys'') = takeLowest xs' ys'
(lowest, xs', ys') = takeLowest xs ys
-- Using the "two queues" algorithm to make a tree.
makeTree' [] (y:[]) = y
makeTree' xs ys =
makeTree' xs' (ys' ++ [Branch x y])
where (x, y, xs', ys') = takeTwoLowest xs ys
-- Create a tree, either from frequencies or from a string.
makeLeaves = map $ uncurry Leaf
makeTreeFromFrequencies xs = makeTree' (makeLeaves xs) []
makeTree xs = makeTree' (makeLeaves $ sortedFrequencies xs) []
-- Create an encoding table by doing a depth-first search on a tree.
makeTable' (Leaf c _) s = [(c, s)]
makeTable' (Branch l r) s = leftChars ++ rightChars
where leftChars = makeTable' l (s++[Zero])
rightChars = makeTable' r (s++[One])
makeTable tree = makeTable' tree []
-- Helper function to encode a single character.
encodeChar ch encodingTable = fromJust (lookup ch encodingTable)
-- Encode a string by building a frequency table, then a Huffman tree, and
-- finally an encoding table. Only requires the string to encode as an arg.
encode' str encodingTable = concat [encodeChar ch encodingTable | ch<-str]
encode str = (encode' str encodingTable, freqs)
where encodingTable = makeTable encodingTree
encodingTree = makeTreeFromFrequencies freqs
freqs = sortedFrequencies str
-- Decode a bit-sequence using the given frequency table.
decode' bits frequencies = decompress encodingTree bits
where encodingTree = makeTreeFromFrequencies frequencies
decode (bits, frequencies) = decode' bits frequencies
-- Check that for some `t`, encoding it and decoding it again has no effect.
test t = (decode $ encode t) == t
{-
Helper functions for demonstrating this module's usefulness.
bitsSaved: the number of bits this saves over no (ASCII) encoding.
testUseful: check that this number of bits < 0
testEncodable: check that some text can be encoded such that testUseful is True
Assumptions:
* It takes 4*8=32 bits to represent an entry in the frequency table
* Unencoded characters use 7 bits (i.e. ASCII)
-}
bitsSaved text bits freqs = length text * 7 - length bits - (32 * length freqs)
testUseful text bits freqs = bitsSaved text bits freqs > 0
testEncodable text = testUseful text bits freqs
where (bits, freqs) = encode text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment