Created
October 23, 2016 14:34
-
-
Save bedekelly/22afaebf8ef39d1f4e7396c0566644d4 to your computer and use it in GitHub Desktop.
Implementation of Huffman Coding in Haskell. N.B. this sends the frequency table, not the tree itself.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| module Huffman where | |
| import Data.List | |
| import Data.Char | |
| -- Extract the value from a Maybe type. | |
| fromJust :: Maybe t -> t | |
| fromJust (Just x) = x | |
| fromJust Nothing = error "Can't retrive value from Nothing!" | |
| -- Generate all characters in the ASCII charset. | |
| chars = takeWhile isAscii $ map chr [0..] | |
| -- Initialize a mapping of {char: frequency} | |
| initialFrequency char = (char, 0) | |
| initialFrequencies = map initialFrequency chars | |
| -- Increment the value associated with a key in an associative list. | |
| incrementByKey' key left ((k, v):right) | |
| | k == key = left ++ ((k, v+1):right) | |
| | otherwise = incrementByKey' key (left++[(k,v)]) right | |
| incrementByKey key xs = incrementByKey' key [] xs | |
| -- Generate an associative list of (char, freq) for each char in some text. | |
| frequencies' acc [] = acc | |
| frequencies' acc (x:xs) = frequencies' (incrementByKey x acc) xs | |
| frequencies [] = initialFrequencies | |
| frequencies xs = frequencies' initialFrequencies xs | |
| -- Generate the above associative list, sorted ascendingly by frequency. | |
| sortedFrequencies = sortOn snd . frequencies | |
| -- Define a simple Bit type: | |
| data Bit = Zero | One deriving (Eq, Ord) | |
| instance Show Bit where | |
| show Zero = "0" | |
| show One = "1" | |
| showList [] = id | |
| showList (x:xs) = \out -> (show x) ++ showList xs out | |
| -- Define a simple Tree type to work with. | |
| data Tree = Leaf Char Int | |
| | Branch Tree Tree | |
| deriving Show | |
| -- Define some helper functions to work with trees. | |
| left (Branch l r) = l | |
| right (Branch l r) = r | |
| weight (Leaf _ w) = w | |
| weight (Branch l r) = weight l + weight r | |
| -- A helper function to take a character from a bitsequence. | |
| takeChar (Leaf c _) bitsLeft = (c, bitsLeft) | |
| takeChar (Branch l r) (b:bits) = | |
| if b == One then takeChar r bits | |
| else takeChar l bits | |
| -- Given a tree and a bitsequence, output the characters. | |
| decompress' tree [] chars = chars | |
| decompress' tree bits chars = decompress' tree bitsLeft (char:chars) | |
| where (char, bitsLeft) = (takeChar tree bits) | |
| decompress tree bits = reverse (decompress' tree bits []) | |
| -- Take single lowest element from two queues. | |
| takeLowest (x:xs) (y:ys) = | |
| if weight x < weight y then (x, xs, (y:ys)) | |
| else (y, (x:xs), ys) | |
| takeLowest (x:xs) [] = (x, xs, []) | |
| takeLowest [] (y:ys) = (y, [], ys) | |
| -- Take two lowest elements from two queues. | |
| takeTwoLowest xs ys = (lowest, sndLowest, xs'', ys'') | |
| where (sndLowest, xs'', ys'') = takeLowest xs' ys' | |
| (lowest, xs', ys') = takeLowest xs ys | |
| -- Using the "two queues" algorithm to make a tree. | |
| makeTree' [] (y:[]) = y | |
| makeTree' xs ys = | |
| makeTree' xs' (ys' ++ [Branch x y]) | |
| where (x, y, xs', ys') = takeTwoLowest xs ys | |
| -- Create a tree, either from frequencies or from a string. | |
| makeLeaves = map $ uncurry Leaf | |
| makeTreeFromFrequencies xs = makeTree' (makeLeaves xs) [] | |
| makeTree xs = makeTree' (makeLeaves $ sortedFrequencies xs) [] | |
| -- Create an encoding table by doing a depth-first search on a tree. | |
| makeTable' (Leaf c _) s = [(c, s)] | |
| makeTable' (Branch l r) s = leftChars ++ rightChars | |
| where leftChars = makeTable' l (s++[Zero]) | |
| rightChars = makeTable' r (s++[One]) | |
| makeTable tree = makeTable' tree [] | |
| -- Helper function to encode a single character. | |
| encodeChar ch encodingTable = fromJust (lookup ch encodingTable) | |
| -- Encode a string by building a frequency table, then a Huffman tree, and | |
| -- finally an encoding table. Only requires the string to encode as an arg. | |
| encode' str encodingTable = concat [encodeChar ch encodingTable | ch<-str] | |
| encode str = (encode' str encodingTable, freqs) | |
| where encodingTable = makeTable encodingTree | |
| encodingTree = makeTreeFromFrequencies freqs | |
| freqs = sortedFrequencies str | |
| -- Decode a bit-sequence using the given frequency table. | |
| decode' bits frequencies = decompress encodingTree bits | |
| where encodingTree = makeTreeFromFrequencies frequencies | |
| decode (bits, frequencies) = decode' bits frequencies | |
| -- Check that for some `t`, encoding it and decoding it again has no effect. | |
| test t = (decode $ encode t) == t | |
| {- | |
| Helper functions for demonstrating this module's usefulness. | |
| bitsSaved: the number of bits this saves over no (ASCII) encoding. | |
| testUseful: check that this number of bits < 0 | |
| testEncodable: check that some text can be encoded such that testUseful is True | |
| Assumptions: | |
| * It takes 4*8=32 bits to represent an entry in the frequency table | |
| * Unencoded characters use 7 bits (i.e. ASCII) | |
| -} | |
| bitsSaved text bits freqs = length text * 7 - length bits - (32 * length freqs) | |
| testUseful text bits freqs = bitsSaved text bits freqs > 0 | |
| testEncodable text = testUseful text bits freqs | |
| where (bits, freqs) = encode text |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment