Last active
June 16, 2018 04:40
-
-
Save v3c70r/2250cefc1b2804c9e1fe940f5d680115 to your computer and use it in GitHub Desktop.
Java Huffman tree
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.FileInputStream; | |
import java.io.IOException; | |
import java.util.ArrayList; | |
import java.util.HashMap; | |
import java.util.Map; | |
import java.util.Scanner; | |
public final class Huffman { | |
static class HuffmanRecord { | |
char ch = '\uffff'; | |
int occurrence = 0; | |
int firstOccurrenceIdx = 2147483647; | |
}; | |
static class OccNFirstOcc | |
{ | |
OccNFirstOcc(int o, int f) | |
{ | |
occ = o; | |
firstOcc = f; | |
} | |
int occ = 0; | |
int firstOcc = 2147483647; | |
}; | |
static TreeNode<HuffmanRecord> root; | |
static class HuffmanEncoder { | |
// ! Add a char to encoder | |
Map<Character, OccNFirstOcc> nodeMap = new HashMap<Character, OccNFirstOcc>(); | |
String text = new String(); | |
HashMap<Character, String> lookupMap; | |
public void addChar(char ch) { | |
text += ch; | |
if (!nodeMap.containsKey(ch)) { | |
nodeMap.put(ch, new OccNFirstOcc(1, text.length() - 1)); | |
} else { | |
nodeMap.get(ch).occ++; | |
} | |
} | |
// ! | |
public void constructTree() { | |
ArrayList<TreeNode<HuffmanRecord>> recs = new ArrayList<TreeNode<HuffmanRecord>>(); | |
for (Map.Entry<Character, OccNFirstOcc> entry : nodeMap.entrySet()) { | |
HuffmanRecord rec = new HuffmanRecord(); | |
rec.ch = entry.getKey(); | |
rec.occurrence = entry.getValue().occ; | |
rec.firstOccurrenceIdx = entry.getValue().firstOcc; | |
recs.add(new TreeNode<HuffmanRecord>(rec)); | |
} | |
// sort with lambda | |
recs.sort( | |
(p1, p2) -> | |
p2.data.occurrence - p1.data.occurrence == 0 | |
? p1.data.firstOccurrenceIdx - p2.data.firstOccurrenceIdx | |
: p2.data.occurrence - p1.data.occurrence); | |
while (recs.size() > 1) { | |
// pop out 2 smallest elements | |
// lesser on the left node, the other on the right node | |
TreeNode<HuffmanRecord> leftNode = recs.get(recs.size() - 1); | |
recs.remove(leftNode); | |
TreeNode<HuffmanRecord> rightNode = recs.remove(recs.size() - 1); | |
recs.remove(rightNode); | |
// compose a new node with those two; | |
HuffmanRecord newRecord = new HuffmanRecord(); | |
newRecord.occurrence = leftNode.data.occurrence + rightNode.data.occurrence; | |
TreeNode<HuffmanRecord> newNode = new TreeNode<HuffmanRecord>(newRecord); | |
newNode.lChild = leftNode; | |
leftNode.parent = newNode; | |
newNode.rChild = rightNode; | |
rightNode.parent = newNode; | |
recs.add(newNode); | |
// sort again after insertion | |
recs.sort( | |
(p1, p2) -> | |
p2.data.occurrence - p1.data.occurrence == 0 | |
? p1.data.firstOccurrenceIdx - p2.data.firstOccurrenceIdx | |
: p2.data.occurrence - p1.data.occurrence); | |
} | |
// the only one left in the array is root | |
assert recs.size() == 1; | |
root = recs.get(0); | |
// construct a hashmap for faster bit array lookup | |
lookupMap = constructHashmap(root, new HashMap<Character, String>(), ""); | |
} | |
private HashMap<Character, String> constructHashmap( | |
TreeNode<HuffmanRecord> node, | |
HashMap<Character, String> currentMap, | |
String currentBitArray) { | |
if (node.lChild != null) { | |
assert node.data.ch == '\uffff'; | |
constructHashmap(node.lChild, currentMap, currentBitArray + '0'); | |
} | |
if (node.rChild != null) { | |
assert node.data.ch == '\uffff'; | |
constructHashmap(node.rChild, currentMap, currentBitArray + '1'); | |
} | |
// only return at leaves | |
if (node.rChild == null && node.lChild == null) currentMap.put(node.data.ch, currentBitArray); | |
return currentMap; | |
} | |
public String encode() { | |
String res = new String(); | |
for (char ch : text.toCharArray()) | |
{ | |
res += lookupMap.get(ch); | |
} | |
return res; | |
} | |
}; | |
// main | |
public static void main(String[] args) { | |
HuffmanEncoder enc = new HuffmanEncoder(); | |
// Load from file | |
if (args.length > 0) { | |
FileInputStream is = null; | |
try { | |
is = new FileInputStream(args[0]); | |
int ch; | |
while ((ch = is.read()) != -1) { | |
enc.addChar((char) ch); | |
} | |
is.close(); | |
enc.constructTree(); | |
System.out.print(enc.encode()); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} finally {; | |
} | |
} else { | |
// read from stdin | |
Scanner scanner = new Scanner(System.in); | |
String input = scanner.next(); | |
for (char ch : input.toCharArray()) | |
enc.addChar(ch); | |
scanner.close(); | |
enc.constructTree(); | |
System.out.print(enc.encode()); | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class TreeNode<T> { | |
public T data; | |
public TreeNode<T> parent; | |
public TreeNode<T> rChild; | |
public TreeNode<T> lChild; | |
public TreeNode(T d) { | |
data = d; | |
rChild = null; | |
lChild = null; | |
parent = null; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment