Skip to content

Instantly share code, notes, and snippets.

@hrj
Created October 10, 2015 20:03
Show Gist options
  • Save hrj/66ed4c16eb5028963666 to your computer and use it in GitHub Desktop.
Save hrj/66ed4c16eb5028963666 to your computer and use it in GitHub Desktop.
huffman estimation
// Huffman estimation given symbol counts
val symbolCountsIn = Array(57728,32858,49395,15703,26381,4506,5797,1129,958,392,164,278,61,206,30,193,19,172,9,150,4,91,4,91,1,71,4,44,2,29,1,28,1,32,0,15,0,8,0,12,0,11,0,15,0,6,0,4,0,0,0,2,0,1,0,2)
case class Symbol(count:Int, idx: Int)
sealed trait TreeNode {
def value:Int
}
case class SymbolNode(sym: Symbol) extends TreeNode {
def value = sym.count
}
case class JointNode(left:TreeNode, right:TreeNode) extends TreeNode {
def value = left.value + right.value
}
val symbols = symbolCountsIn.filter(_ > 0).zipWithIndex.map(ci => Symbol(ci._1, ci._2)).sortWith((a:Symbol,b:Symbol) => a.count < b.count)
var nodes:Seq[TreeNode] = symbols.map(s => SymbolNode(s)).toSeq
while (nodes.length > 1) {
val first = nodes(0)
val second = nodes(1)
nodes = nodes.drop(2)
nodes :+= JointNode(first, second)
nodes = nodes.sortWith(_.value < _.value)
}
var count = 0
def huffman(node: TreeNode, prefix: String) {
node match {
case SymbolNode(sym) =>
println(sym + " -> " + prefix)
count += (sym.count * prefix.length);
case JointNode(left, right) =>
huffman(left, prefix + "0")
huffman(right, prefix + "1")
}
}
huffman(nodes.head, "")
println("bit count: " + count)
println("byte count: " + (count/8))
println("Symbol count: " + symbolCountsIn.sum)
println("bits per symbol: " + (count.toDouble / symbolCountsIn.sum))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment