Skip to content

Instantly share code, notes, and snippets.

@lazyval
Created November 28, 2011 03:46
Show Gist options
  • Save lazyval/1399008 to your computer and use it in GitHub Desktop.
Save lazyval/1399008 to your computer and use it in GitHub Desktop.
Proof-of-concept of Huffman coding algorithm in Scala
package com.github.omnomnom
import scala.collection.mutable._
// 28 nov 2011
// Golikov Konstantine
// [email protected]
abstract class Node extends Ordered[Node] {
val w: Double
def compare(that: Node) = that.w.compare(w)
};
case class InnerNode(val w: Double, val left: Node, val right: Node) extends Node
case class Leaf(val l: Char, val w: Double) extends Node
object Huffman {
//Right now coding table is hardcoded
//could be changed latter
private val freq = Map(
' ' -> 0.174,
'о' -> 0.090,
'е' -> 0.072,
'ё' -> 0.072,
'а' -> 0.062,
'и' -> 0.062,
'т' -> 0.053,
'н' -> 0.053,
'с' -> 0.045,
'р' -> 0.040,
'в' -> 0.038,
'л' -> 0.035,
'к' -> 0.028,
'м' -> 0.026,
'д' -> 0.025,
'п' -> 0.023,
'у' -> 0.012,
'я' -> 0.018,
'ы' -> 0.016,
'з' -> 0.016,
'ь' -> 0.014,
'ъ' -> 0.014,
'б' -> 0.014,
'г' -> 0.013,
'ч' -> 0.012,
'й' -> 0.010,
'х' -> 0.009,
'ж' -> 0.007,
'ю' -> 0.006,
'ш' -> 0.006,
'ц' -> 0.004,
'щ' -> 0.003,
'э' -> 0.003,
'ф' -> 0.002
)
//Will hold all the input values in sorted order
private val Q = new PriorityQueue[Node]
//Rebuild flat Queue to tree according to node weighs
Q ++= freq.map {
case (l, w) => Leaf(l, w)
}
while (Q.length > 1) {
val n1 = Q.dequeue
val n2 = Q.dequeue
Q.enqueue(InnerNode(n1.w + n2.w, n1, n2))
}
//this method takes tree and builds codetable
private def huffman(node: Node, code: String = "") {
node match {
case InnerNode(_, left, right) => huffman(left, code + 0); huffman(right, code + 1)
case Leaf(l, _) => encodeMap += (l -> code)
}
}
private lazy val encodeMap = new HashMap[Char, String]
private lazy val decodeMap = encodeMap.toList.map(_.swap).toMap
huffman(Q.head)
val maxCodeLength = encodeMap.toList.map(_._2.length).max
private def dec(s: String): List[Char] = {
val lookup = (1 to maxCodeLength).find(i => decodeMap.get(s.take(i)).isDefined)
lookup match {
case Some(id) => decodeMap.get(s.take(id)).get :: dec(s.drop(id))
case None => Nil
}
}
def decode(s: String) = {
require(s.distinct.diff("01").length == 0, "Decoder can only accept binary string that contains 0 or 1")
dec(s).mkString
}
def encode(s: String) = {
s.map(encodeMap.getOrElse(_, "--------")).flatten.mkString
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment