Skip to content

Instantly share code, notes, and snippets.

@awendland
Last active August 29, 2015 14:24
Show Gist options
  • Save awendland/bbe975995b2e092045e9 to your computer and use it in GitHub Desktop.
Save awendland/bbe975995b2e092045e9 to your computer and use it in GitHub Desktop.
A huffman code table generator from a given input string
import java.math.BigInteger;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
* Alex Wendland © 2015
*/
public class HuffmanCodesGenerator {
static class WeightedEntry {
Character val;
int freq;
public WeightedEntry(Character val, int freq) {
this.val = val;
this.freq = freq;
}
@Override
public String toString() {
return "[" + (val == null ? "null" : val.toString()) + "]: " + freq;
}
@Override
public int hashCode() {
int result = val != null ? val.hashCode() : 0;
result = 31 * result + freq;
return result;
}
}
static class WeightedNode extends WeightedEntry {
WeightedEntry left;
WeightedEntry right;
public WeightedNode(WeightedEntry smallest, WeightedEntry smaller) {
super(null, smallest.freq + smaller.freq);
left = smaller;
right = smallest;
}
}
static int weightedSort(WeightedEntry a, WeightedEntry b) {
return Integer.compare(a.freq, b.freq);
}
static WeightedEntry[] mergeSmallest(WeightedEntry[] wl) {
WeightedEntry smallest = wl[0];
WeightedEntry smaller = wl[1];
WeightedEntry[] arr = Arrays.copyOfRange(wl, 1, wl.length);
arr[0] = new WeightedNode(smallest, smaller);
Arrays.sort(arr, HuffmanCodesGenerator::weightedSort);
if (arr.length == 1)
return arr;
else
return mergeSmallest(arr);
}
static void generateHuffmanString(
WeightedEntry entry, int[] code, Map<Character, String> codes) {
if (entry instanceof WeightedNode) {
WeightedNode node = (WeightedNode) entry;
if (node.left != null) {
code[code.length - 1] = 1;
generateHuffmanString(node.left, Arrays.copyOf(code, code.length + 1), codes);
}
if (node.right != null) {
code[code.length - 1] = 0;
generateHuffmanString(node.right, Arrays.copyOf(code, code.length + 1), codes);
}
} else {
codes.put(entry.val, Arrays.stream(code).mapToObj(String::valueOf).collect(Collectors.joining("")));
}
}
public static void main(String[] args) {
String inputString = "Hello, my name is Alex Wendland";
Map<Character, Integer> freq = new HashMap<>();
inputString.chars().forEach(ci -> {
char c = (char) ci;
freq.put(c, freq.getOrDefault(c, 0) + 1);
});
List<WeightedEntry> freqSort = freq.entrySet().stream()
.map(e -> new WeightedEntry(e.getKey(), e.getValue()))
.sorted(HuffmanCodesGenerator::weightedSort)
.collect(Collectors.toList());
WeightedNode tree = (WeightedNode) mergeSmallest(freqSort.toArray(new WeightedEntry[0]))[0];
Map<Character, String> huffmanCodes = new HashMap<>();
generateHuffmanString(tree, new int[1], huffmanCodes);
String encoded = inputString.chars()
.mapToObj(i -> (char) i)
.map(huffmanCodes::get)
.collect(Collectors.joining(""));
System.out.println(huffmanCodes);
double prop = (double) (encoded.length() / 8) / (double) inputString.length();
System.out.println(inputString.length() + " vs " + encoded.length() / 8 + " = " + prop + ": " + encoded);
String padded = "1" + encoded;
BigInteger b = new BigInteger(padded, 2);
System.out.println(b.toString(16));
String unBinaried = b.toString(2).substring(1);
System.out.println(unBinaried);
Map<String, Character> huffmanReverseLookup = huffmanCodes.entrySet().stream()
.collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
String decoded = "";
String bucket = "";
for (int index = 0; index < unBinaried.length(); index++) {
bucket += unBinaried.charAt(index);
if (huffmanReverseLookup.containsKey(bucket)) {
decoded += huffmanReverseLookup.get(bucket);
bucket = "";
}
}
System.out.println(decoded);
}
}
**Table:** `{ =1110, a=10110, A=101000, d=10000, e=0110, H=101010, i=010100, l=1100, ,=010110, m=10010, n=0010, o=010000, s=010010, W=000100, x=000110, y=00000}`
**18 bytes** vs **31 bytes** = _0.5806451612903226_
**Bin:** _101010011011001100010000010110111010010000001110001010110100100110111001010001001011101010001100011000011011100001000110001010000110010110001010000_
**Hex:** _d4d9882dd20715a4dca25d4630dc231432c50_
**Input and output:** _Hello, my name is Alex Wendland_
@awendland
Copy link
Author

Formatted Output:

Table: { =1110, a=10110, A=101000, d=10000, e=0110, H=101010, i=010100, l=1100, ,=010110, m=10010, n=0010, o=010000, s=010010, W=000100, x=000110, y=00000}

18 bytes vs 31 bytes = 0.5806451612903226
Bin: 101010011011001100010000010110111010010000001110001010110100100110111001010001001011101010001100011000011011100001000110001010000110010110001010000
Hex: d4d9882dd20715a4dca25d4630dc231432c50

Input and output: Hello, my name is Alex Wendland

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment