tropicbliss · April 7, 2025 16:47
diff --git a/huffman.java b/huffman.java
 // Time Complexity Analysis
 // parseText method

 // Creating frequency map: O(n) where n is the length of the input text
 // Creating letters list: O(k) where k is the number of unique characters
 // Sorting letters: O(k log k)
 // Overall: O(n + k log k)

 // buildTree method

 // While loop runs k-1 times (combining nodes until only one remains)
 // Each iteration:

 // Removing first two elements: O(1) with LinkedList implementation
 // Creating a new TreeNode: O(1)
 // Adding node and sorting: O(k log k)


 // Overall: O(k² log k) due to repeated sorting inside the loop

 // traverseTree method

 // Recursively visits each node in the tree once
 // Time complexity: O(k) where k is the number of unique characters

 // encodeAsBits method

 // Iterates through the input text once: O(n)
 // Each character lookup in encoding map is O(1)
 // Overall: O(n)

 // bitsToBytes method

 // Processing each bit in the bitString: O(b) where b is the length of the bitString
 // Since b is proportional to n, this is effectively O(n)

 // huffman method (main algorithm)

 // parseText: O(n + k log k)
 // buildTree: O(k² log k)
 // traverseTree: O(k)
 // Creating encodingMap: O(k)
 // encodeAsBits: O(n)
 // bitsToBytes: O(n)
 // Overall: O(n + k² log k)

 // decode method

 // Converting bytes to bits: O(b) where b is the number of bytes
 // Creating the decoding map: O(k)
 // Decoding the bitstring: O(b) in the worst case
 // Overall: O(b + k), which is O(n + k) since b is proportional to n

 // Space Complexity

 // Frequency map: O(k)
 // Letters list: O(k)
 // Tree nodes: O(k)
 // Encoding/decoding maps: O(k)
 // Bit strings: O(n)
 // Byte array: O(n)
 // Overall: O(n + k)

 import java.util.*;

 public class HuffmanCoding {

    static class Letter {
        char letter;
        int freq;
        Map<Character, String> bitstring;

        public Letter(char letter, int freq) {
            this.letter = letter;
            this.freq = freq;
            this.bitstring = new HashMap<>();
        }

        @Override
        public String toString() {
            return letter + ":" + freq;
        }
    }

    static class TreeNode {
        int freq;
        Object left;  // Either Letter or TreeNode
        Object right; // Either Letter or TreeNode

        public TreeNode(int freq, Object left, Object right) {
            this.freq = freq;
            this.left = left;
            this.right = right;
        }
    }

    /**
     * Parses the input text to count character frequencies.
     * Time Complexity: O(n), where n is the length of the text.
     * - Iterating through the text: O(n)
     * - HashMap operations for frequency counting: O(1) average
     * - Sorting the letters: O(k log k), where k is the number of unique characters (typically k << n)
     */
    private static List<Letter> parseText(String text) {
        Map<Character, Integer> chars = new HashMap<>();
        for (char c : text.toCharArray()) {
            chars.put(c, chars.getOrDefault(c, 0) + 1);
        }

        List<Letter> letters = new ArrayList<>();
        for (Map.Entry<Character, Integer> entry : chars.entrySet()) {
            letters.add(new Letter(entry.getKey(), entry.getValue()));
        }

        letters.sort(Comparator.comparingInt(l -> l.freq));
        return letters;
    }

    /**
     * Builds the Huffman tree from the list of letters.
     * Time Complexity: O(k log k), where k is the number of unique characters.
     * - While loop runs at most k-1 times for k unique characters
     * - Each iteration:
     *   - Removing from front of list: O(1) (assuming ArrayList.removeFirst() is amortized O(1))
     *   - Adding to list: O(1)
     *   - Sorting the list: O(k log k)
     * Overall, we have O(k) iterations with O(k log k) work in each, so O(k^2 log k)
     * However, using a priority queue could optimize this to O(k log k)
     */
    private static Object buildTree(List<Letter> letters) {
        List<Object> response = new ArrayList<>(letters);

        while (response.size() > 1) {
            Object left = response.removeFirst();
            Object right = response.removeFirst();

            int leftFreq = (left instanceof Letter) ? ((Letter) left).freq : ((TreeNode) left).freq;
            int rightFreq = (right instanceof Letter) ? ((Letter) right).freq : ((TreeNode) right).freq;

            int totalFreq = leftFreq + rightFreq;
            TreeNode node = new TreeNode(totalFreq, left, right);

            response.add(node);
            response.sort((a, b) -> {
                int freqA = (a instanceof Letter) ? ((Letter) a).freq : ((TreeNode) a).freq;
                int freqB = (b instanceof Letter) ? ((Letter) b).freq : ((TreeNode) b).freq;
                return Integer.compare(freqA, freqB);
            });
        }

        return response.getFirst();
    }

    /**
     * Traverses the Huffman tree to generate bit encodings for each character.
     * Time Complexity: O(k), where k is the number of unique characters.
     * - Each node in the tree is visited exactly once
     * - The tree has 2k-1 nodes for k leaves (unique characters)
     * - Each node operation is O(1)
     * - String concatenation (bitstring + "0"/"1") is O(1) since strings are small
     */
    private static List<Letter> traverseTree(Object root, String bitstring) {
        if (root instanceof Letter letter) {
            letter.bitstring.put(letter.letter, bitstring);
            return List.of(letter);
        }

        TreeNode treeNode = (TreeNode) root;
        List<Letter> letters = new ArrayList<>();

        letters.addAll(traverseTree(treeNode.left, bitstring + "0"));
        letters.addAll(traverseTree(treeNode.right, bitstring + "1"));

        return letters;
    }

    /**
     * Encodes the original text into a bit string using the encoding map.
     * Time Complexity: O(n), where n is the length of the text.
     * - Iterating through each character: O(n)
     * - HashMap lookup for each character: O(1)
     * - StringBuilder append: O(1) amortized
     */
    private static String encodeAsBits(String text, Map<Character, String> encodingMap) {
        StringBuilder bitString = new StringBuilder();
        for (char c : text.toCharArray()) {
            bitString.append(encodingMap.get(c));
        }
        return bitString.toString();
    }

    private static class ByteResult {
        byte[] data;
        int padding;

        public ByteResult(byte[] data, int padding) {
            this.data = data;
            this.padding = padding;
        }
    }

    /**
     * Converts a bit string to bytes.
     * Time Complexity: O(b), where b is the length of the bit string.
     * - String operations: O(b)
     * - Iterating through the padded string in chunks of 8: O(b/8) = O(b)
     * - Parsing and byte conversion: O(1) per chunk
     */
    private static ByteResult bitsToBytes(String bitString) {
        // Pad to multiple of 8
        int padding = (bitString.length() % 8 != 0) ? 8 - (bitString.length() % 8) : 0;
        String paddedBits = bitString + "0".repeat(padding);

        // Convert to bytes
        byte[] byteArray = new byte[paddedBits.length() / 8];
        for (int i = 0; i < paddedBits.length(); i += 8) {
            String byteStr = paddedBits.substring(i, i + 8);
            byteArray[i / 8] = (byte) Integer.parseInt(byteStr, 2);
        }

        return new ByteResult(byteArray, padding);
    }

    public static class HuffmanResult {
        Map<Character, String> encodingMap;
        String bitString;
        byte[] byteData;
        int padding;

        public HuffmanResult(Map<Character, String> encodingMap, String bitString, byte[] byteData, int padding) {
            this.encodingMap = encodingMap;
            this.bitString = bitString;
            this.byteData = byteData;
            this.padding = padding;
        }
    }

    /**
     * Main method to perform Huffman coding on a text string.
     * Time Complexity: O(n + k log k), where n is the length of the text and k is the number of unique characters.
     * - parseText: O(n)
     * - buildTree: O(k^2 log k) or optimized O(k log k) with priority queue
     * - traverseTree: O(k)
     * - encodeAsBits: O(n)
     * - bitsToBytes: O(n) in worst case as bit string length is proportional to n
     * 
     * Since typically k << n (number of unique characters is much smaller than text length),
     * the overall complexity is dominated by O(n) operations.
     */
    public static HuffmanResult huffman(String text) {
        if (text.isEmpty()) {
            return new HuffmanResult(new HashMap<>(), "", new byte[0], 0);
        }

        List<Letter> lettersList = parseText(text);

        // Handle the special case of only one unique character
        if (lettersList.size() == 1) {
            Letter letter = lettersList.getFirst();
            Map<Character, String> encodingMap = new HashMap<>();
            encodingMap.put(letter.letter, "0");

            String bitString = "0".repeat(text.length());
            ByteResult byteResult = bitsToBytes(bitString);

            return new HuffmanResult(encodingMap, bitString, byteResult.data, byteResult.padding);
        }

        Object root = buildTree(lettersList);

        List<Letter> letters = traverseTree(root, "");
        Map<Character, String> encodingMap = new HashMap<>();
        for (Letter letter : letters) {
            encodingMap.put(letter.letter, letter.bitstring.get(letter.letter));
        }

        String bitString = encodeAsBits(text, encodingMap);
        ByteResult byteResult = bitsToBytes(bitString);

        return new HuffmanResult(encodingMap, bitString, byteResult.data, byteResult.padding);
    }

    /**
     * Decodes Huffman-encoded byte data back to the original text.
     * Time Complexity: O(n + b), where n is the length of the original text and b is the number of bits.
     * - Converting bytes to bits: O(b)
     * - Inverting the encoding map: O(k) where k is the number of unique characters
     * - Decoding the bit string: O(b) in worst case, but typically closer to O(n) where n is the length of the output text
     */
    public static String decode(byte[] byteData, Map<Character, String> encodingMap, int padding) {
        // Convert bytes back to bits
        StringBuilder bitString = new StringBuilder();
        for (byte b : byteData) {
            StringBuilder bits = new StringBuilder(Integer.toBinaryString(b & 0xFF));
            // Ensure each byte is represented as 8 bits
            while (bits.length() < 8) {
                bits.insert(0, "0");
            }
            bitString.append(bits);
        }

        // Remove padding
        if (padding > 0) {
            bitString.delete(bitString.length() - padding, bitString.length());
        }

        // Invert the encoding map for decoding
        StringBuilder decodedText = getDecodedText(encodingMap, bitString);

        return decodedText.toString();
    }

    /**
     * Helper method to decode the bit string using the encoding map.
     * Time Complexity: O(b), where b is the length of the bit string.
     * - Inverting the encoding map: O(k) where k is the number of unique characters
     * - Iterating through each bit: O(b)
     * - HashMap lookup for each potential code: O(1)
     * - StringBuilder operations: O(1) amortized
     * 
     * While the loop iterates through each bit (O(b)), we typically find matches after a small
     * number of bits (average Huffman code length), so the actual work is closer to O(n) where
     * n is the length of the original text.
     */
    private static StringBuilder getDecodedText(Map<Character, String> encodingMap, StringBuilder bitString) {
        Map<String, Character> decodingMap = new HashMap<>();
        for (Map.Entry<Character, String> entry : encodingMap.entrySet()) {
            decodingMap.put(entry.getValue(), entry.getKey());
        }

        // Decode the bit string
        StringBuilder decodedText = new StringBuilder();
        StringBuilder currentBits = new StringBuilder();

        for (int i = 0; i < bitString.length(); i++) {
            currentBits.append(bitString.charAt(i));
            if (decodingMap.containsKey(currentBits.toString())) {
                decodedText.append(decodingMap.get(currentBits.toString()));
                currentBits.setLength(0);
            }
        }
        return decodedText;
    }

    public static void main(String[] args) {
        String sampleText = "hello world";
        HuffmanResult result = huffman(sampleText);

        System.out.println("Original text: " + sampleText);
        System.out.println("Encoding map: " + result.encodingMap);
        System.out.println("Encoded bit string: " + result.bitString);
        System.out.println("Encoded as bytes: " + Arrays.toString(result.byteData));
        System.out.println("Padding bits: " + result.padding);

        // Calculate compression ratio
        int originalSize = sampleText.length() * 8;  // Assuming 8 bits per character (ASCII)
        int compressedSize = result.bitString.length();
        double compressionRatio = (double) (originalSize - compressedSize) / originalSize * 100;

        System.out.println("Original size (bits): " + originalSize);
        System.out.println("Compressed size (bits): " + compressedSize);
        System.out.printf("Compression ratio: %.2f%%\n", compressionRatio);

        // Decode and verify
        String decodedText = decode(result.byteData, result.encodingMap, result.padding);
        System.out.println("Decoded text: " + decodedText);
        System.out.println("Decoding successful: " + decodedText.equals(sampleText));
    }
 }
	// Time Complexity Analysis
	// parseText method

	// Creating frequency map: O(n) where n is the length of the input text
	// Creating letters list: O(k) where k is the number of unique characters
	// Sorting letters: O(k log k)
	// Overall: O(n + k log k)

	// buildTree method

	// While loop runs k-1 times (combining nodes until only one remains)
	// Each iteration:

	// Removing first two elements: O(1) with LinkedList implementation
	// Creating a new TreeNode: O(1)
	// Adding node and sorting: O(k log k)


	// Overall: O(k² log k) due to repeated sorting inside the loop

	// traverseTree method

	// Recursively visits each node in the tree once
	// Time complexity: O(k) where k is the number of unique characters

	// encodeAsBits method

	// Iterates through the input text once: O(n)
	// Each character lookup in encoding map is O(1)
	// Overall: O(n)

	// bitsToBytes method

	// Processing each bit in the bitString: O(b) where b is the length of the bitString
	// Since b is proportional to n, this is effectively O(n)

	// huffman method (main algorithm)

	// parseText: O(n + k log k)
	// buildTree: O(k² log k)
	// traverseTree: O(k)
	// Creating encodingMap: O(k)
	// encodeAsBits: O(n)
	// bitsToBytes: O(n)
	// Overall: O(n + k² log k)

	// decode method

	// Converting bytes to bits: O(b) where b is the number of bytes
	// Creating the decoding map: O(k)
	// Decoding the bitstring: O(b) in the worst case
	// Overall: O(b + k), which is O(n + k) since b is proportional to n

	// Space Complexity

	// Frequency map: O(k)
	// Letters list: O(k)
	// Tree nodes: O(k)
	// Encoding/decoding maps: O(k)
	// Bit strings: O(n)
	// Byte array: O(n)
	// Overall: O(n + k)

	import java.util.*;

	public class HuffmanCoding {

	static class Letter {
	char letter;
	int freq;
	Map<Character, String> bitstring;

	public Letter(char letter, int freq) {
	this.letter = letter;
	this.freq = freq;
	this.bitstring = new HashMap<>();
	}

	@Override
	public String toString() {
	return letter + ":" + freq;
	}
	}

	static class TreeNode {
	int freq;
	Object left; // Either Letter or TreeNode
	Object right; // Either Letter or TreeNode

	public TreeNode(int freq, Object left, Object right) {
	this.freq = freq;
	this.left = left;
	this.right = right;
	}
	}

	/**
	* Parses the input text to count character frequencies.
	* Time Complexity: O(n), where n is the length of the text.
	* - Iterating through the text: O(n)
	* - HashMap operations for frequency counting: O(1) average
	* - Sorting the letters: O(k log k), where k is the number of unique characters (typically k << n)
	*/
	private static List<Letter> parseText(String text) {
	Map<Character, Integer> chars = new HashMap<>();
	for (char c : text.toCharArray()) {
	chars.put(c, chars.getOrDefault(c, 0) + 1);
	}

	List<Letter> letters = new ArrayList<>();
	for (Map.Entry<Character, Integer> entry : chars.entrySet()) {
	letters.add(new Letter(entry.getKey(), entry.getValue()));
	}

	letters.sort(Comparator.comparingInt(l -> l.freq));
	return letters;
	}

	/**
	* Builds the Huffman tree from the list of letters.
	* Time Complexity: O(k log k), where k is the number of unique characters.
	* - While loop runs at most k-1 times for k unique characters
	* - Each iteration:
	* - Removing from front of list: O(1) (assuming ArrayList.removeFirst() is amortized O(1))
	* - Adding to list: O(1)
	* - Sorting the list: O(k log k)
	* Overall, we have O(k) iterations with O(k log k) work in each, so O(k^2 log k)
	* However, using a priority queue could optimize this to O(k log k)
	*/
	private static Object buildTree(List<Letter> letters) {
	List<Object> response = new ArrayList<>(letters);

	while (response.size() > 1) {
	Object left = response.removeFirst();
	Object right = response.removeFirst();

	int leftFreq = (left instanceof Letter) ? ((Letter) left).freq : ((TreeNode) left).freq;
	int rightFreq = (right instanceof Letter) ? ((Letter) right).freq : ((TreeNode) right).freq;

	int totalFreq = leftFreq + rightFreq;
	TreeNode node = new TreeNode(totalFreq, left, right);

	response.add(node);
	response.sort((a, b) -> {
	int freqA = (a instanceof Letter) ? ((Letter) a).freq : ((TreeNode) a).freq;
	int freqB = (b instanceof Letter) ? ((Letter) b).freq : ((TreeNode) b).freq;
	return Integer.compare(freqA, freqB);
	});
	}

	return response.getFirst();
	}

	/**
	* Traverses the Huffman tree to generate bit encodings for each character.
	* Time Complexity: O(k), where k is the number of unique characters.
	* - Each node in the tree is visited exactly once
	* - The tree has 2k-1 nodes for k leaves (unique characters)
	* - Each node operation is O(1)
	* - String concatenation (bitstring + "0"/"1") is O(1) since strings are small
	*/
	private static List<Letter> traverseTree(Object root, String bitstring) {
	if (root instanceof Letter letter) {
	letter.bitstring.put(letter.letter, bitstring);
	return List.of(letter);
	}

	TreeNode treeNode = (TreeNode) root;
	List<Letter> letters = new ArrayList<>();

	letters.addAll(traverseTree(treeNode.left, bitstring + "0"));
	letters.addAll(traverseTree(treeNode.right, bitstring + "1"));

	return letters;
	}

	/**
	* Encodes the original text into a bit string using the encoding map.
	* Time Complexity: O(n), where n is the length of the text.
	* - Iterating through each character: O(n)
	* - HashMap lookup for each character: O(1)
	* - StringBuilder append: O(1) amortized
	*/
	private static String encodeAsBits(String text, Map<Character, String> encodingMap) {
	StringBuilder bitString = new StringBuilder();
	for (char c : text.toCharArray()) {
	bitString.append(encodingMap.get(c));
	}
	return bitString.toString();
	}

	private static class ByteResult {
	byte[] data;
	int padding;

	public ByteResult(byte[] data, int padding) {
	this.data = data;
	this.padding = padding;
	}
	}

	/**
	* Converts a bit string to bytes.
	* Time Complexity: O(b), where b is the length of the bit string.
	* - String operations: O(b)
	* - Iterating through the padded string in chunks of 8: O(b/8) = O(b)
	* - Parsing and byte conversion: O(1) per chunk
	*/
	private static ByteResult bitsToBytes(String bitString) {
	// Pad to multiple of 8
	int padding = (bitString.length() % 8 != 0) ? 8 - (bitString.length() % 8) : 0;
	String paddedBits = bitString + "0".repeat(padding);

	// Convert to bytes
	byte[] byteArray = new byte[paddedBits.length() / 8];
	for (int i = 0; i < paddedBits.length(); i += 8) {
	String byteStr = paddedBits.substring(i, i + 8);
	byteArray[i / 8] = (byte) Integer.parseInt(byteStr, 2);
	}

	return new ByteResult(byteArray, padding);
	}

	public static class HuffmanResult {
	Map<Character, String> encodingMap;
	String bitString;
	byte[] byteData;
	int padding;

	public HuffmanResult(Map<Character, String> encodingMap, String bitString, byte[] byteData, int padding) {
	this.encodingMap = encodingMap;
	this.bitString = bitString;
	this.byteData = byteData;
	this.padding = padding;
	}
	}

	/**
	* Main method to perform Huffman coding on a text string.
	* Time Complexity: O(n + k log k), where n is the length of the text and k is the number of unique characters.
	* - parseText: O(n)
	* - buildTree: O(k^2 log k) or optimized O(k log k) with priority queue
	* - traverseTree: O(k)
	* - encodeAsBits: O(n)
	* - bitsToBytes: O(n) in worst case as bit string length is proportional to n
	*
	* Since typically k << n (number of unique characters is much smaller than text length),
	* the overall complexity is dominated by O(n) operations.
	*/
	public static HuffmanResult huffman(String text) {
	if (text.isEmpty()) {
	return new HuffmanResult(new HashMap<>(), "", new byte[0], 0);
	}

	List<Letter> lettersList = parseText(text);

	// Handle the special case of only one unique character
	if (lettersList.size() == 1) {
	Letter letter = lettersList.getFirst();
	Map<Character, String> encodingMap = new HashMap<>();
	encodingMap.put(letter.letter, "0");

	String bitString = "0".repeat(text.length());
	ByteResult byteResult = bitsToBytes(bitString);

	return new HuffmanResult(encodingMap, bitString, byteResult.data, byteResult.padding);
	}

	Object root = buildTree(lettersList);

	List<Letter> letters = traverseTree(root, "");
	Map<Character, String> encodingMap = new HashMap<>();
	for (Letter letter : letters) {
	encodingMap.put(letter.letter, letter.bitstring.get(letter.letter));
	}

	String bitString = encodeAsBits(text, encodingMap);
	ByteResult byteResult = bitsToBytes(bitString);

	return new HuffmanResult(encodingMap, bitString, byteResult.data, byteResult.padding);
	}

	/**
	* Decodes Huffman-encoded byte data back to the original text.
	* Time Complexity: O(n + b), where n is the length of the original text and b is the number of bits.
	* - Converting bytes to bits: O(b)
	* - Inverting the encoding map: O(k) where k is the number of unique characters
	* - Decoding the bit string: O(b) in worst case, but typically closer to O(n) where n is the length of the output text
	*/
	public static String decode(byte[] byteData, Map<Character, String> encodingMap, int padding) {
	// Convert bytes back to bits
	StringBuilder bitString = new StringBuilder();
	for (byte b : byteData) {
	StringBuilder bits = new StringBuilder(Integer.toBinaryString(b & 0xFF));
	// Ensure each byte is represented as 8 bits
	while (bits.length() < 8) {
	bits.insert(0, "0");
	}
	bitString.append(bits);
	}

	// Remove padding
	if (padding > 0) {
	bitString.delete(bitString.length() - padding, bitString.length());
	}

	// Invert the encoding map for decoding
	StringBuilder decodedText = getDecodedText(encodingMap, bitString);

	return decodedText.toString();
	}

	/**
	* Helper method to decode the bit string using the encoding map.
	* Time Complexity: O(b), where b is the length of the bit string.
	* - Inverting the encoding map: O(k) where k is the number of unique characters
	* - Iterating through each bit: O(b)
	* - HashMap lookup for each potential code: O(1)
	* - StringBuilder operations: O(1) amortized
	*
	* While the loop iterates through each bit (O(b)), we typically find matches after a small
	* number of bits (average Huffman code length), so the actual work is closer to O(n) where
	* n is the length of the original text.
	*/
	private static StringBuilder getDecodedText(Map<Character, String> encodingMap, StringBuilder bitString) {
	Map<String, Character> decodingMap = new HashMap<>();
	for (Map.Entry<Character, String> entry : encodingMap.entrySet()) {
	decodingMap.put(entry.getValue(), entry.getKey());
	}

	// Decode the bit string
	StringBuilder decodedText = new StringBuilder();
	StringBuilder currentBits = new StringBuilder();

	for (int i = 0; i < bitString.length(); i++) {
	currentBits.append(bitString.charAt(i));
	if (decodingMap.containsKey(currentBits.toString())) {
	decodedText.append(decodingMap.get(currentBits.toString()));
	currentBits.setLength(0);
	}
	}
	return decodedText;
	}

	public static void main(String[] args) {
	String sampleText = "hello world";
	HuffmanResult result = huffman(sampleText);

	System.out.println("Original text: " + sampleText);
	System.out.println("Encoding map: " + result.encodingMap);
	System.out.println("Encoded bit string: " + result.bitString);
	System.out.println("Encoded as bytes: " + Arrays.toString(result.byteData));
	System.out.println("Padding bits: " + result.padding);

	// Calculate compression ratio
	int originalSize = sampleText.length() * 8; // Assuming 8 bits per character (ASCII)
	int compressedSize = result.bitString.length();
	double compressionRatio = (double) (originalSize - compressedSize) / originalSize * 100;

	System.out.println("Original size (bits): " + originalSize);
	System.out.println("Compressed size (bits): " + compressedSize);
	System.out.printf("Compression ratio: %.2f%%\n", compressionRatio);

	// Decode and verify
	String decodedText = decode(result.byteData, result.encodingMap, result.padding);
	System.out.println("Decoded text: " + decodedText);
	System.out.println("Decoding successful: " + decodedText.equals(sampleText));
	}
	}