Created
November 28, 2011 03:46
-
-
Save lazyval/1399008 to your computer and use it in GitHub Desktop.
Proof-of-concept of Huffman coding algorithm in Scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.github.omnomnom | |
import scala.collection.mutable._ | |
// 28 nov 2011 | |
// Golikov Konstantine | |
// [email protected] | |
abstract class Node extends Ordered[Node] { | |
val w: Double | |
def compare(that: Node) = that.w.compare(w) | |
}; | |
case class InnerNode(val w: Double, val left: Node, val right: Node) extends Node | |
case class Leaf(val l: Char, val w: Double) extends Node | |
object Huffman { | |
//Right now coding table is hardcoded | |
//could be changed latter | |
private val freq = Map( | |
' ' -> 0.174, | |
'о' -> 0.090, | |
'е' -> 0.072, | |
'ё' -> 0.072, | |
'а' -> 0.062, | |
'и' -> 0.062, | |
'т' -> 0.053, | |
'н' -> 0.053, | |
'с' -> 0.045, | |
'р' -> 0.040, | |
'в' -> 0.038, | |
'л' -> 0.035, | |
'к' -> 0.028, | |
'м' -> 0.026, | |
'д' -> 0.025, | |
'п' -> 0.023, | |
'у' -> 0.012, | |
'я' -> 0.018, | |
'ы' -> 0.016, | |
'з' -> 0.016, | |
'ь' -> 0.014, | |
'ъ' -> 0.014, | |
'б' -> 0.014, | |
'г' -> 0.013, | |
'ч' -> 0.012, | |
'й' -> 0.010, | |
'х' -> 0.009, | |
'ж' -> 0.007, | |
'ю' -> 0.006, | |
'ш' -> 0.006, | |
'ц' -> 0.004, | |
'щ' -> 0.003, | |
'э' -> 0.003, | |
'ф' -> 0.002 | |
) | |
//Will hold all the input values in sorted order | |
private val Q = new PriorityQueue[Node] | |
//Rebuild flat Queue to tree according to node weighs | |
Q ++= freq.map { | |
case (l, w) => Leaf(l, w) | |
} | |
while (Q.length > 1) { | |
val n1 = Q.dequeue | |
val n2 = Q.dequeue | |
Q.enqueue(InnerNode(n1.w + n2.w, n1, n2)) | |
} | |
//this method takes tree and builds codetable | |
private def huffman(node: Node, code: String = "") { | |
node match { | |
case InnerNode(_, left, right) => huffman(left, code + 0); huffman(right, code + 1) | |
case Leaf(l, _) => encodeMap += (l -> code) | |
} | |
} | |
private lazy val encodeMap = new HashMap[Char, String] | |
private lazy val decodeMap = encodeMap.toList.map(_.swap).toMap | |
huffman(Q.head) | |
val maxCodeLength = encodeMap.toList.map(_._2.length).max | |
private def dec(s: String): List[Char] = { | |
val lookup = (1 to maxCodeLength).find(i => decodeMap.get(s.take(i)).isDefined) | |
lookup match { | |
case Some(id) => decodeMap.get(s.take(id)).get :: dec(s.drop(id)) | |
case None => Nil | |
} | |
} | |
def decode(s: String) = { | |
require(s.distinct.diff("01").length == 0, "Decoder can only accept binary string that contains 0 or 1") | |
dec(s).mkString | |
} | |
def encode(s: String) = { | |
s.map(encodeMap.getOrElse(_, "--------")).flatten.mkString | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment