Skip to content

Instantly share code, notes, and snippets.

@tropicbliss
Last active April 7, 2025 16:47
Show Gist options
  • Save tropicbliss/2e8dfd7415f4d9554000bf47e19e4566 to your computer and use it in GitHub Desktop.
Save tropicbliss/2e8dfd7415f4d9554000bf47e19e4566 to your computer and use it in GitHub Desktop.
// Time Complexity Analysis
// parseText method
// Creating frequency map: O(n) where n is the length of the input text
// Creating letters list: O(k) where k is the number of unique characters
// Sorting letters: O(k log k)
// Overall: O(n + k log k)
// buildTree method
// While loop runs k-1 times (combining nodes until only one remains)
// Each iteration:
// Removing first two elements: O(1) with LinkedList implementation
// Creating a new TreeNode: O(1)
// Adding node and sorting: O(k log k)
// Overall: O(k² log k) due to repeated sorting inside the loop
// traverseTree method
// Recursively visits each node in the tree once
// Time complexity: O(k) where k is the number of unique characters
// encodeAsBits method
// Iterates through the input text once: O(n)
// Each character lookup in encoding map is O(1)
// Overall: O(n)
// bitsToBytes method
// Processing each bit in the bitString: O(b) where b is the length of the bitString
// Since b is proportional to n, this is effectively O(n)
// huffman method (main algorithm)
// parseText: O(n + k log k)
// buildTree: O(k² log k)
// traverseTree: O(k)
// Creating encodingMap: O(k)
// encodeAsBits: O(n)
// bitsToBytes: O(n)
// Overall: O(n + k² log k)
// decode method
// Converting bytes to bits: O(b) where b is the number of bytes
// Creating the decoding map: O(k)
// Decoding the bitstring: O(b) in the worst case
// Overall: O(b + k), which is O(n + k) since b is proportional to n
// Space Complexity
// Frequency map: O(k)
// Letters list: O(k)
// Tree nodes: O(k)
// Encoding/decoding maps: O(k)
// Bit strings: O(n)
// Byte array: O(n)
// Overall: O(n + k)
import java.util.*;
public class HuffmanCoding {
static class Letter {
char letter;
int freq;
Map<Character, String> bitstring;
public Letter(char letter, int freq) {
this.letter = letter;
this.freq = freq;
this.bitstring = new HashMap<>();
}
@Override
public String toString() {
return letter + ":" + freq;
}
}
static class TreeNode {
int freq;
Object left; // Either Letter or TreeNode
Object right; // Either Letter or TreeNode
public TreeNode(int freq, Object left, Object right) {
this.freq = freq;
this.left = left;
this.right = right;
}
}
/**
* Parses the input text to count character frequencies.
* Time Complexity: O(n), where n is the length of the text.
* - Iterating through the text: O(n)
* - HashMap operations for frequency counting: O(1) average
* - Sorting the letters: O(k log k), where k is the number of unique characters (typically k << n)
*/
private static List<Letter> parseText(String text) {
Map<Character, Integer> chars = new HashMap<>();
for (char c : text.toCharArray()) {
chars.put(c, chars.getOrDefault(c, 0) + 1);
}
List<Letter> letters = new ArrayList<>();
for (Map.Entry<Character, Integer> entry : chars.entrySet()) {
letters.add(new Letter(entry.getKey(), entry.getValue()));
}
letters.sort(Comparator.comparingInt(l -> l.freq));
return letters;
}
/**
* Builds the Huffman tree from the list of letters.
* Time Complexity: O(k log k), where k is the number of unique characters.
* - While loop runs at most k-1 times for k unique characters
* - Each iteration:
* - Removing from front of list: O(1) (assuming ArrayList.removeFirst() is amortized O(1))
* - Adding to list: O(1)
* - Sorting the list: O(k log k)
* Overall, we have O(k) iterations with O(k log k) work in each, so O(k^2 log k)
* However, using a priority queue could optimize this to O(k log k)
*/
private static Object buildTree(List<Letter> letters) {
List<Object> response = new ArrayList<>(letters);
while (response.size() > 1) {
Object left = response.removeFirst();
Object right = response.removeFirst();
int leftFreq = (left instanceof Letter) ? ((Letter) left).freq : ((TreeNode) left).freq;
int rightFreq = (right instanceof Letter) ? ((Letter) right).freq : ((TreeNode) right).freq;
int totalFreq = leftFreq + rightFreq;
TreeNode node = new TreeNode(totalFreq, left, right);
response.add(node);
response.sort((a, b) -> {
int freqA = (a instanceof Letter) ? ((Letter) a).freq : ((TreeNode) a).freq;
int freqB = (b instanceof Letter) ? ((Letter) b).freq : ((TreeNode) b).freq;
return Integer.compare(freqA, freqB);
});
}
return response.getFirst();
}
/**
* Traverses the Huffman tree to generate bit encodings for each character.
* Time Complexity: O(k), where k is the number of unique characters.
* - Each node in the tree is visited exactly once
* - The tree has 2k-1 nodes for k leaves (unique characters)
* - Each node operation is O(1)
* - String concatenation (bitstring + "0"/"1") is O(1) since strings are small
*/
private static List<Letter> traverseTree(Object root, String bitstring) {
if (root instanceof Letter letter) {
letter.bitstring.put(letter.letter, bitstring);
return List.of(letter);
}
TreeNode treeNode = (TreeNode) root;
List<Letter> letters = new ArrayList<>();
letters.addAll(traverseTree(treeNode.left, bitstring + "0"));
letters.addAll(traverseTree(treeNode.right, bitstring + "1"));
return letters;
}
/**
* Encodes the original text into a bit string using the encoding map.
* Time Complexity: O(n), where n is the length of the text.
* - Iterating through each character: O(n)
* - HashMap lookup for each character: O(1)
* - StringBuilder append: O(1) amortized
*/
private static String encodeAsBits(String text, Map<Character, String> encodingMap) {
StringBuilder bitString = new StringBuilder();
for (char c : text.toCharArray()) {
bitString.append(encodingMap.get(c));
}
return bitString.toString();
}
private static class ByteResult {
byte[] data;
int padding;
public ByteResult(byte[] data, int padding) {
this.data = data;
this.padding = padding;
}
}
/**
* Converts a bit string to bytes.
* Time Complexity: O(b), where b is the length of the bit string.
* - String operations: O(b)
* - Iterating through the padded string in chunks of 8: O(b/8) = O(b)
* - Parsing and byte conversion: O(1) per chunk
*/
private static ByteResult bitsToBytes(String bitString) {
// Pad to multiple of 8
int padding = (bitString.length() % 8 != 0) ? 8 - (bitString.length() % 8) : 0;
String paddedBits = bitString + "0".repeat(padding);
// Convert to bytes
byte[] byteArray = new byte[paddedBits.length() / 8];
for (int i = 0; i < paddedBits.length(); i += 8) {
String byteStr = paddedBits.substring(i, i + 8);
byteArray[i / 8] = (byte) Integer.parseInt(byteStr, 2);
}
return new ByteResult(byteArray, padding);
}
public static class HuffmanResult {
Map<Character, String> encodingMap;
String bitString;
byte[] byteData;
int padding;
public HuffmanResult(Map<Character, String> encodingMap, String bitString, byte[] byteData, int padding) {
this.encodingMap = encodingMap;
this.bitString = bitString;
this.byteData = byteData;
this.padding = padding;
}
}
/**
* Main method to perform Huffman coding on a text string.
* Time Complexity: O(n + k log k), where n is the length of the text and k is the number of unique characters.
* - parseText: O(n)
* - buildTree: O(k^2 log k) or optimized O(k log k) with priority queue
* - traverseTree: O(k)
* - encodeAsBits: O(n)
* - bitsToBytes: O(n) in worst case as bit string length is proportional to n
*
* Since typically k << n (number of unique characters is much smaller than text length),
* the overall complexity is dominated by O(n) operations.
*/
public static HuffmanResult huffman(String text) {
if (text.isEmpty()) {
return new HuffmanResult(new HashMap<>(), "", new byte[0], 0);
}
List<Letter> lettersList = parseText(text);
// Handle the special case of only one unique character
if (lettersList.size() == 1) {
Letter letter = lettersList.getFirst();
Map<Character, String> encodingMap = new HashMap<>();
encodingMap.put(letter.letter, "0");
String bitString = "0".repeat(text.length());
ByteResult byteResult = bitsToBytes(bitString);
return new HuffmanResult(encodingMap, bitString, byteResult.data, byteResult.padding);
}
Object root = buildTree(lettersList);
List<Letter> letters = traverseTree(root, "");
Map<Character, String> encodingMap = new HashMap<>();
for (Letter letter : letters) {
encodingMap.put(letter.letter, letter.bitstring.get(letter.letter));
}
String bitString = encodeAsBits(text, encodingMap);
ByteResult byteResult = bitsToBytes(bitString);
return new HuffmanResult(encodingMap, bitString, byteResult.data, byteResult.padding);
}
/**
* Decodes Huffman-encoded byte data back to the original text.
* Time Complexity: O(n + b), where n is the length of the original text and b is the number of bits.
* - Converting bytes to bits: O(b)
* - Inverting the encoding map: O(k) where k is the number of unique characters
* - Decoding the bit string: O(b) in worst case, but typically closer to O(n) where n is the length of the output text
*/
public static String decode(byte[] byteData, Map<Character, String> encodingMap, int padding) {
// Convert bytes back to bits
StringBuilder bitString = new StringBuilder();
for (byte b : byteData) {
StringBuilder bits = new StringBuilder(Integer.toBinaryString(b & 0xFF));
// Ensure each byte is represented as 8 bits
while (bits.length() < 8) {
bits.insert(0, "0");
}
bitString.append(bits);
}
// Remove padding
if (padding > 0) {
bitString.delete(bitString.length() - padding, bitString.length());
}
// Invert the encoding map for decoding
StringBuilder decodedText = getDecodedText(encodingMap, bitString);
return decodedText.toString();
}
/**
* Helper method to decode the bit string using the encoding map.
* Time Complexity: O(b), where b is the length of the bit string.
* - Inverting the encoding map: O(k) where k is the number of unique characters
* - Iterating through each bit: O(b)
* - HashMap lookup for each potential code: O(1)
* - StringBuilder operations: O(1) amortized
*
* While the loop iterates through each bit (O(b)), we typically find matches after a small
* number of bits (average Huffman code length), so the actual work is closer to O(n) where
* n is the length of the original text.
*/
private static StringBuilder getDecodedText(Map<Character, String> encodingMap, StringBuilder bitString) {
Map<String, Character> decodingMap = new HashMap<>();
for (Map.Entry<Character, String> entry : encodingMap.entrySet()) {
decodingMap.put(entry.getValue(), entry.getKey());
}
// Decode the bit string
StringBuilder decodedText = new StringBuilder();
StringBuilder currentBits = new StringBuilder();
for (int i = 0; i < bitString.length(); i++) {
currentBits.append(bitString.charAt(i));
if (decodingMap.containsKey(currentBits.toString())) {
decodedText.append(decodingMap.get(currentBits.toString()));
currentBits.setLength(0);
}
}
return decodedText;
}
public static void main(String[] args) {
String sampleText = "hello world";
HuffmanResult result = huffman(sampleText);
System.out.println("Original text: " + sampleText);
System.out.println("Encoding map: " + result.encodingMap);
System.out.println("Encoded bit string: " + result.bitString);
System.out.println("Encoded as bytes: " + Arrays.toString(result.byteData));
System.out.println("Padding bits: " + result.padding);
// Calculate compression ratio
int originalSize = sampleText.length() * 8; // Assuming 8 bits per character (ASCII)
int compressedSize = result.bitString.length();
double compressionRatio = (double) (originalSize - compressedSize) / originalSize * 100;
System.out.println("Original size (bits): " + originalSize);
System.out.println("Compressed size (bits): " + compressedSize);
System.out.printf("Compression ratio: %.2f%%\n", compressionRatio);
// Decode and verify
String decodedText = decode(result.byteData, result.encodingMap, result.padding);
System.out.println("Decoded text: " + decodedText);
System.out.println("Decoding successful: " + decodedText.equals(sampleText));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment