Last active
August 29, 2015 14:24
-
-
Save awendland/bbe975995b2e092045e9 to your computer and use it in GitHub Desktop.
A huffman code table generator from a given input string
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.math.BigInteger; | |
import java.util.Arrays; | |
import java.util.HashMap; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.stream.Collectors; | |
/** | |
* Alex Wendland © 2015 | |
*/ | |
public class HuffmanCodesGenerator { | |
static class WeightedEntry { | |
Character val; | |
int freq; | |
public WeightedEntry(Character val, int freq) { | |
this.val = val; | |
this.freq = freq; | |
} | |
@Override | |
public String toString() { | |
return "[" + (val == null ? "null" : val.toString()) + "]: " + freq; | |
} | |
@Override | |
public int hashCode() { | |
int result = val != null ? val.hashCode() : 0; | |
result = 31 * result + freq; | |
return result; | |
} | |
} | |
static class WeightedNode extends WeightedEntry { | |
WeightedEntry left; | |
WeightedEntry right; | |
public WeightedNode(WeightedEntry smallest, WeightedEntry smaller) { | |
super(null, smallest.freq + smaller.freq); | |
left = smaller; | |
right = smallest; | |
} | |
} | |
static int weightedSort(WeightedEntry a, WeightedEntry b) { | |
return Integer.compare(a.freq, b.freq); | |
} | |
static WeightedEntry[] mergeSmallest(WeightedEntry[] wl) { | |
WeightedEntry smallest = wl[0]; | |
WeightedEntry smaller = wl[1]; | |
WeightedEntry[] arr = Arrays.copyOfRange(wl, 1, wl.length); | |
arr[0] = new WeightedNode(smallest, smaller); | |
Arrays.sort(arr, HuffmanCodesGenerator::weightedSort); | |
if (arr.length == 1) | |
return arr; | |
else | |
return mergeSmallest(arr); | |
} | |
static void generateHuffmanString( | |
WeightedEntry entry, int[] code, Map<Character, String> codes) { | |
if (entry instanceof WeightedNode) { | |
WeightedNode node = (WeightedNode) entry; | |
if (node.left != null) { | |
code[code.length - 1] = 1; | |
generateHuffmanString(node.left, Arrays.copyOf(code, code.length + 1), codes); | |
} | |
if (node.right != null) { | |
code[code.length - 1] = 0; | |
generateHuffmanString(node.right, Arrays.copyOf(code, code.length + 1), codes); | |
} | |
} else { | |
codes.put(entry.val, Arrays.stream(code).mapToObj(String::valueOf).collect(Collectors.joining(""))); | |
} | |
} | |
public static void main(String[] args) { | |
String inputString = "Hello, my name is Alex Wendland"; | |
Map<Character, Integer> freq = new HashMap<>(); | |
inputString.chars().forEach(ci -> { | |
char c = (char) ci; | |
freq.put(c, freq.getOrDefault(c, 0) + 1); | |
}); | |
List<WeightedEntry> freqSort = freq.entrySet().stream() | |
.map(e -> new WeightedEntry(e.getKey(), e.getValue())) | |
.sorted(HuffmanCodesGenerator::weightedSort) | |
.collect(Collectors.toList()); | |
WeightedNode tree = (WeightedNode) mergeSmallest(freqSort.toArray(new WeightedEntry[0]))[0]; | |
Map<Character, String> huffmanCodes = new HashMap<>(); | |
generateHuffmanString(tree, new int[1], huffmanCodes); | |
String encoded = inputString.chars() | |
.mapToObj(i -> (char) i) | |
.map(huffmanCodes::get) | |
.collect(Collectors.joining("")); | |
System.out.println(huffmanCodes); | |
double prop = (double) (encoded.length() / 8) / (double) inputString.length(); | |
System.out.println(inputString.length() + " vs " + encoded.length() / 8 + " = " + prop + ": " + encoded); | |
String padded = "1" + encoded; | |
BigInteger b = new BigInteger(padded, 2); | |
System.out.println(b.toString(16)); | |
String unBinaried = b.toString(2).substring(1); | |
System.out.println(unBinaried); | |
Map<String, Character> huffmanReverseLookup = huffmanCodes.entrySet().stream() | |
.collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey)); | |
String decoded = ""; | |
String bucket = ""; | |
for (int index = 0; index < unBinaried.length(); index++) { | |
bucket += unBinaried.charAt(index); | |
if (huffmanReverseLookup.containsKey(bucket)) { | |
decoded += huffmanReverseLookup.get(bucket); | |
bucket = ""; | |
} | |
} | |
System.out.println(decoded); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
**Table:** `{ =1110, a=10110, A=101000, d=10000, e=0110, H=101010, i=010100, l=1100, ,=010110, m=10010, n=0010, o=010000, s=010010, W=000100, x=000110, y=00000}` | |
**18 bytes** vs **31 bytes** = _0.5806451612903226_ | |
**Bin:** _101010011011001100010000010110111010010000001110001010110100100110111001010001001011101010001100011000011011100001000110001010000110010110001010000_ | |
**Hex:** _d4d9882dd20715a4dca25d4630dc231432c50_ | |
**Input and output:** _Hello, my name is Alex Wendland_ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Formatted Output:
Table:
{ =1110, a=10110, A=101000, d=10000, e=0110, H=101010, i=010100, l=1100, ,=010110, m=10010, n=0010, o=010000, s=010010, W=000100, x=000110, y=00000}
18 bytes vs 31 bytes = 0.5806451612903226
Bin: 101010011011001100010000010110111010010000001110001010110100100110111001010001001011101010001100011000011011100001000110001010000110010110001010000
Hex: d4d9882dd20715a4dca25d4630dc231432c50
Input and output: Hello, my name is Alex Wendland