danielrothfus · February 11, 2013 05:29
diff --git a/derhuff.c b/derhuff.c
 /** \file derhuff.c
 \author Daniel Rothfus
 \version 1.0
 \brief A really simple Huffman implementation.  
 
 Derhuff is a simple utility to compress files using the Huffman coding scheme.
 To compress a file, it analyzes the frequencies of all the bytes in the file 
 and builds a tree using these frequencies, which it writes to the output. With 
 this tree, the program then converts each byte of the input to the path to get
 to the corresponding leaf in the tree. To decode, it just reads the tree from 
 the file and uses that to translate each path back to its corresponding byte. 
 This scheme works best on files with uneven distributions of character 
 frequencies, such as plain text.  To see how to use this program, pass the -h 
 or --help flag to the executable. This program is released under the 
 zlib/libpng license included below: 
 
 Copyright (c) 2012 Daniel Rothfus
 
 This software is provided 'as-is', without any express or implied warranty. In 
 no event will the authors be held liable for any damages arising from the use 
 of this software.
 
 Permission is granted to anyone to use this software for any purpose, including 
 commercial applications, and to alter it and redistribute it freely, subject to
 the following restrictions:
 
 1. The origin of this software must not be misrepresented; you must not claim 
    that you wrote the original software. If you use this software in a product,
    an acknowledgment in the product documentation would be appreciated but is 
    not required.
 
 2. Altered source versions must be plainly marked as such, and must not be 
    misrepresented as being the original software.

 3. This notice may not be removed or altered from any source distribution. */

 // Includes  -------------------------------------------------------------------
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <getopt.h>
 #include <stdint.h>

 // Types -----------------------------------------------------------------------
 /** \brief This type is used for representing offsets within a file and the size
 of a file itself. */
 typedef uint64_t file_size;

 /** \brief This type is used for direct lookup from a character to their huffman
 equivalent. */
 typedef struct{
    uint8_t code[8];
    uint8_t length;
 } translate_buffer;

 /// \brief This is the basic data type that makes up a tree. */
 typedef struct node{
    struct node *on, *off;
    int byte;
    file_size occurrences;
 } node;

 // Prototypes  -----------------------------------------------------------------
 /** \brief Print the help for this program.
 \param programName The name of the current program as it was called from the 
 command line. */
 void PrintHelp(const char *programName);

 /** \brief Swap a 64 bit value to/from network byte order if it is necessary.
 \param value The value to swap the endianness of. 
 \return This returns the network byte order version of the host value or the
 host version of the network byte order value. */
 uint64_t ToFromNetwork64(uint64_t value);

 /** \brief This organizes a tree structure inside of a buffer of nodes.
 \param bufStart The first node in the buffer containing the letter frequencies.
 These nodes will reference each other such that a tree is created in the 
 buffer.
 \paraom totalOcurrences The total number of units found of all types in the
 input.
 \return This returns a pointer to the root of the tree. */
 node *OrganizeTree(node *bufStart, file_size totalOcurrences);

 /** \brief Write the encoding tree to the file.
 \param output The file to write the tree to.
 \return This returns an exit status for the program - 0 on success. */
 char WriteTree(FILE *output, node *treeRoot);

 /** \brief Read a tree used for decoding from a file.
 \param output The file to read the tree from.
 \param nodeBuf A buffer of at least 511 nodes to write the tree to. 
 \param treeRoot A pointer to a pointer to assign to the root of the tree.
 \return This returns an exit status for the program - 0 on success. */
 char ReadTree(FILE *input, node *nodeBuf, node **treeRoot);

 /** \brief Compress a file and write the compressed file to the output file.
 \param input The file to compress.
 \param output The file to write the compressed data to.
 \return This returns an exit code for the application. */
 char CompressFile(FILE *input, FILE *output);

 /** \brief Decompress a given file to another file.
 \param input The file to decompress.
 \param output The file to put the decompressed data into.
 \return This returns an exit code for the application. */
 char DecompressFile(FILE *input, FILE *output);

 /** \brief Read all the frequencies of all the characters from the file from the
 current point to the end.
 \param file The file to read all the letter frequencies from.
 \param counts A pointer to a buffer of at least 256 file size indicators, which
 will be used to indicate the number of each byte. */
 file_size ReadFrequencies(FILE *file, node *counts);

 /** \brief Initialize all the nodes in a buffer to the correct states.
 \param allNodes A pointer to the buffer of at least 511 nodes. */
 void InitNodes(node *allNodes);

 /** \brief This is a debug utility that prints out a tree to the standard output
 in dot format, using the graph name test.
 \param rootNode The root node of the tree to print. */
 void PrintTree(node *rootNode);

 /** \brief A debug utility that prints the translate buffers to standard output.
 \param buffs A point to a buffer of at least 256 translate_buffers. */
 void PrintTranslateBuffers(translate_buffer *buffers);

 /** \brief Build the translate buffer for going directly from characters to
 huffman equivalents. 
 \param rootNode The root node of the tree to convert.
 \param buffers The translate buffers to fill with equivalents.  There must be
 size for at least 256 translate_buffers in the buffer. */
 void BuildTranslateBuffers(node *rootNode, translate_buffer *buffers);

 // Constants -------------------------------------------------------------------
 /// \brief An array to tell getopt_long how to handle long program arguments
 static const struct option progOptsLong[] = {
    { "compress", no_argument, NULL, 'c' },
    { "decompress", no_argument, NULL, 'd' },
    { "output", required_argument, NULL, 'o' },
    { "help", no_argument, NULL, 'h' },
    { NULL, no_argument, NULL, 0 }
 };

 /// \brief A string to tell getopt_long how to handle short command line input
 static const char *progOptsStr  = "cdo:h";

 /// \brief A simple version number to allow for changes to the format later
 static const uint8_t versionByte = 1;

 // Implementations -------------------------------------------------------------
 /** \brief The entry point of the application.
 \param argc The number of arguments.
 \param argv The arguments to the application. */
 int main(int argc, char * argv[]){
    int option, programStatus = EXIT_SUCCESS;
    char compress = 0, decompress = 0;
    FILE *output = stdout, *input = NULL;
    
    // Parse through option arguments
    while((option = getopt_long(argc, argv, progOptsStr, progOptsLong, NULL)) != -1){
        switch(option){
            case 'c':
                compress = 1;
                break;
            case 'd':
                decompress = 1;
                break;
            case 'o':
                output = fopen(optarg, "wb");
                if(!output){
                    perror("Could not open output file");
                    programStatus = EXIT_FAILURE;
                    goto cleanup;
                }
                break;
            case '?':
                fprintf(stderr, "Ambiguous option given!\n");
                return EXIT_FAILURE;
            case 'h':
                PrintHelp(argv[0]);
                return EXIT_SUCCESS;
            case ':':
                fprintf(stderr, "Missing option argument!\n");
                return EXIT_FAILURE;
            default:
                fprintf(stderr, "Please pardon my insanity...");
                return EXIT_FAILURE;
        }
    }
    
    // Get action
    if(compress * decompress == 1 || compress + decompress == 0){
        fprintf(stderr, "Need either compress flag or decompress flag!\n");
        programStatus = EXIT_FAILURE;
        goto cleanup;
    }
    
    // Get input file
    argc -= optind;
    argv += optind;
    if(argc == 1){
        input = fopen(argv[0], "rb");
        if(!input){
            perror("Could not open input file");
            programStatus = EXIT_FAILURE;
            goto cleanup;
        }
    }
    else if(argc > 1){
        fprintf(stderr, "This program can only accept one input file!\n");
        programStatus = EXIT_FAILURE;
        goto cleanup;
    }
    else{
        fprintf(stderr, "The input file is required!\n");
        programStatus = EXIT_FAILURE;
        goto cleanup;
    }
    
    // Do the actual compression or decompression
    if(compress){
        programStatus = CompressFile(input, output);
    }
    else{
        programStatus = DecompressFile(input, output);
    }
    
 cleanup:
    if(input && input != stdin){
        fclose(input);
    }
    if(output && output != stdout){
        fclose(output);
    }
    return programStatus;
 }

 void PrintHelp(const char *programName){
    printf("Usage:\n\
    %s [-c|-d] [-o OUTPUT] INPUT\n\
 Options:\n\
    -c compress   : Compress the input file to the output file. (Default)\n\
    -d decompress : Decompress the compressed input file to the output file.\n\
    -o output     : Specify the output file.\n\
    -h help       : Show this help message again.\n", programName);
 }

 #ifdef __BIG_ENDIAN__

 uint64_t ToFromNetwork64(uint64_t value){
    return value;
 }

 #elif defined(__LITTLE_ENDIAN__)

 uint64_t ToFromNetwork64(uint64_t value){
    uint64_t toRet;
    
    // Swap all bytes opposite each other
    for(uint8_t c = 0; c < 8; c++){
        ((uint8_t *)&toRet)[c] = ((uint8_t *)&value)[7 - c];
    }
    
    return toRet;
 }

 #else
 #error Could not detect endianness of environment!
 #endif

 node *OrganizeTree(node *bufStart, file_size totalOcurrences){
    node *rootNode = bufStart, *lowest1, *lowest2;
    unsigned short combinations = 0;
    
    // Combine two lowest nodes
    while(rootNode->occurrences < totalOcurrences){
        lowest1 = lowest2 = NULL;
        for(unsigned short c = 0; c < 256 + combinations; c++){
            if(bufStart[c].occurrences > 0){
                
                // Make sure we assign lowest1
                if(!lowest1){
                    lowest1 = &bufStart[c];
                }
                
                // Make sure we assign lowest2
                else if(!lowest2){
                    lowest2 = &bufStart[c];
                }
                
                // Check to see if it is lower than either of our others
                else{
                    if(bufStart[c].occurrences < lowest1->occurrences){
                        if(lowest1->occurrences < lowest2->occurrences){
                            lowest2 = lowest1;
                        }
                        lowest1 = &bufStart[c];
                    }
                    else if(bufStart[c].occurrences < lowest2->occurrences){
                        if(lowest2->occurrences < lowest1->occurrences){
                            lowest1 = lowest2;
                        }
                        lowest2 = &bufStart[c];
                    }
                }
            }
        }
        
        // Make sure that both have values
        if(lowest2 == NULL || lowest1 == NULL){
            if(lowest2 == NULL){
                lowest2 = &bufStart[(lowest1->byte + 1) % 256];
            }
            if(lowest1 == NULL){
                lowest1 = &bufStart[(lowest2->byte + 1) % 256];
            }
        }
        
        // Now combine two nodes
        rootNode = &bufStart[256 + combinations];
        rootNode->on = lowest1;
        rootNode->off = lowest2;
        rootNode->occurrences = lowest1->occurrences + lowest2->occurrences;
        lowest1->occurrences = lowest2->occurrences = 0;
        combinations++;
    }
    
    return rootNode;
 }

 char WriteTree(FILE *output, node *treeRoot){
    
    // Check if we do have a tree to write to output
    if(treeRoot){
        char status;
        uint8_t bytesToWrite[2];
        uint8_t numBytesToWrite;
        
        // Sort out what we have to write, accounting for the special case null
        if(treeRoot->byte < 0){
            bytesToWrite[0] = 0;
            numBytesToWrite = 1;
        }
        else if(treeRoot->byte == 0){
            bytesToWrite[0] = 1;
            bytesToWrite[1] = 1;
            numBytesToWrite = 2;
        }
        else{
            bytesToWrite[0] = treeRoot->byte;
            numBytesToWrite = 1;
        }
        
        // Write current node value
        if(fwrite(bytesToWrite, numBytesToWrite, 1, output) != 1){
            fprintf(stderr, "Could not write tree to output!\n");
            return EXIT_FAILURE;
        }
        
        // Write left and right trees
        status = WriteTree(output, treeRoot->on);
        if(status != EXIT_SUCCESS){
            return status;
        }
        return WriteTree(output, treeRoot->off);
    }
    else{
        return EXIT_SUCCESS;
    }
 }

 static char _ReadSubtree(FILE *input, node *nodeBuf, node **assignTo, 
                        unsigned char *internalsUsed){
    char status;
    uint8_t read;
    int charRead;
    
    // Read in current character value
    if(fread(&read, 1, 1, input) != 1){
        goto failure;
    }
    charRead = (read == 0) ? -1 : read;
    
    // Check if this is the special case null byte
    if(read == 1){
        if(fread(&read, 1, 1, input) != 1){
            goto failure;
        }
        if(read == 1){
            charRead = 0;
        }
        else{
            if(fseek(input, -1L, SEEK_CUR) != 0){
                goto failure;
            }
            charRead = 1;
        }
    }
    
    // Assign assignTo to the correct node in the tree
    if(charRead == -1){
        *assignTo = &nodeBuf[256 + (*internalsUsed)++];
        status = _ReadSubtree(input, nodeBuf, &(*assignTo)->on, internalsUsed);
        if(status != EXIT_SUCCESS){
            return status;
        }
        return _ReadSubtree(input, nodeBuf, &(*assignTo)->off, internalsUsed);
    }
    else{
        *assignTo = &nodeBuf[charRead];
        return EXIT_SUCCESS;
    }
    
 failure:
    fprintf(stderr, "Could not read tree from input!\n");
    return EXIT_FAILURE;
 }

 char ReadTree(FILE *input, node *nodeBuf, node **treeRoot){
    unsigned char internalsUsed = 0;
    return _ReadSubtree(input, nodeBuf, treeRoot, &internalsUsed);
 }

 char CompressFile(FILE *inFile, FILE *outFile){
    file_size fileSize, netFileSize, bytesRead;
    translate_buffer transBuf[256];
    node allNodes[511], *rootNode;
    char status, codeByteInd, codeByteBitsLeft;
    uint8_t toConv, outByte, outByteLeft, chunkSize, codeByte;
    
    // Initialize all nodes to have the right values
    InitNodes(allNodes);
    
    // Get all letter counts and rewind
    fileSize = ReadFrequencies(inFile, allNodes);
    if(fseek(inFile, 0L, SEEK_SET) != 0){
        perror("Could not rewind input file");
        return EXIT_FAILURE;
    };
    
    // Write out version byte
    if(fwrite(&versionByte, 1, 1, outFile) != 1){
        fprintf(stderr, "Could version byte to output!\n");
        return EXIT_FAILURE;
    }
    
    // Write out file size
    netFileSize = ToFromNetwork64(fileSize);
    if(fwrite(&netFileSize, sizeof(file_size), 1, outFile) != 1){
        fprintf(stderr, "Could write file size to output!\n");
        return EXIT_FAILURE;
    }
    
    // Create and write out encoding tree
    rootNode = OrganizeTree(allNodes, fileSize);
    status = WriteTree(outFile, rootNode);
    if(status != EXIT_SUCCESS){
        return status;
    }
    
    // Build translate buffer objects
    BuildTranslateBuffers(rootNode, transBuf);
    
    // Use translate buffers to translate
    bytesRead = 0;
    outByteLeft = 8;
    while(fread(&toConv, 1, 1, inFile)){
        
        // For every byte in the code
        codeByteInd = 0; 
        while((codeByteBitsLeft = transBuf[toConv].length - codeByteInd * 8) > 0){
            
            // Make sure we only write max 8 bits
            codeByteBitsLeft = codeByteBitsLeft > 8 ? 8 : codeByteBitsLeft;
            
            // For every bit in the current byte to write
            codeByte = transBuf[toConv].code[codeByteInd];
            while(codeByteBitsLeft > 0){
                
                // Select the maximum amount we can write
                chunkSize = outByteLeft < codeByteBitsLeft ? outByteLeft : codeByteBitsLeft;
                outByte = (outByte & (255 >> outByteLeft)) | (codeByte << (8 - outByteLeft));
                codeByte = codeByte >> chunkSize;
                codeByteBitsLeft -= chunkSize;
                outByteLeft -= chunkSize;
                
                // Check if we need to flush output
                if(outByteLeft == 0){
                    if(fwrite(&outByte, 1, 1, outFile) != 1){
                        goto outFailure;
                    }
                    outByteLeft = 8;
                }
            }
            codeByteInd++;
        }
        bytesRead++;
    }
    
    // Write that last chunk out
    if(outByteLeft < 8){
        if(fwrite(&outByte, 1, 1, outFile) != 1){
            goto outFailure;
        }
    }
    
    // Do a sanity check
    if(bytesRead != fileSize){
        fprintf(stderr, "Input file size error!\n");
    }
    
    return EXIT_SUCCESS;
    
 outFailure:
    fprintf(stderr, "Could not write to output file!\n");
    return EXIT_FAILURE;
 }

 char DecompressFile(FILE *inFile, FILE *outFile){
    node allNodes[511], *rootNode, *curNode;
    file_size fileSize, bytesWritten;
    char status, curBit;
    uint8_t curByte, toWrite, tmpVersionByte;
    
    // Initialize all nodes to have the right values
    InitNodes(allNodes);
    
    // Read in check byte and version byte and check them
    if(fread(&tmpVersionByte, 1, 1, inFile) != 1){
        fprintf(stderr, "Could not read version byte!\n");
        return EXIT_FAILURE;
    }
    if(tmpVersionByte != versionByte){
        fprintf(stderr, "Cannot handle version of derhuff file!\n");
        return EXIT_FAILURE;
    }
    
    // Read in number of bytes in file
    if(fread(&fileSize, sizeof(fileSize), 1, inFile) != 1){
        fprintf(stderr, "Could not read file size from input!\n");
        return EXIT_FAILURE;
    }
    fileSize = ToFromNetwork64(fileSize);
    
    // Read in the tree used to encode the file
    status = ReadTree(inFile, allNodes, &rootNode);
    if(status != EXIT_SUCCESS){
        return status;
    }
    
    // Convert file using tree
    curNode = rootNode;
    curBit = 8;
    bytesWritten = 0;
    while(bytesWritten < fileSize){
        
        // Try to refill mini-buffer
        if(curBit == 8){
            if(fread(&curByte, 1, 1, inFile) != 1){
                fprintf(stderr, "Input file not of expected size!\n");
                return EXIT_FAILURE;
            }
            curBit = 0;
        }
        
        // Take next decision
        if(curByte & (1 << curBit)){
            curNode = curNode->on;
        }
        else{
            curNode = curNode->off;
        }
        
        // Check for an invalid current node
        if(!curNode){
            fprintf(stderr, "Corrupt input file!\n");
            return EXIT_FAILURE;
        }
        
        // See if we reached an end byte
        if(curNode->byte >= 0){
            toWrite = curNode->byte;
            if(fwrite(&toWrite, 1, 1, outFile) != 1){
                fprintf(stderr, "Could not write to output file!\n");
                return EXIT_FAILURE;
            }
            bytesWritten++;
            curNode = rootNode;
        }
        
        curBit++;  
    }
    
    return EXIT_SUCCESS;
 }

 file_size ReadFrequencies(FILE *file, node *counts){
    file_size fileSize = 0;
    uint8_t tmpByte;
    
    // Scan through input file counting occurrences and file size
    while(1){
        if(fread(&tmpByte, 1, 1, file) != 1){
            break;
        }
        fileSize++;
        counts[tmpByte].occurrences++;
    }
    
    return fileSize;
 }

 void InitNodes(node *allNodes){
    
    // Clear everything except the ones representing characters
    memset(allNodes, 0, sizeof(node) * 511);
    for(unsigned short c = 0; c < 256; c++){
        allNodes[c].byte = c;
    }
    for(unsigned short c = 256; c < 511; c++){
        allNodes[c].byte = 255 - c;
    }
 }

 static void _PrintSubtree(node *rootNode){
    if(rootNode->on){
        _PrintSubtree(rootNode->on);
        printf("\t%d -- %d;\n", rootNode->byte ,rootNode->on->byte);
    }
    if(rootNode->off){
        _PrintSubtree(rootNode->off);
        printf("\t%d -- %d;\n", rootNode->byte ,rootNode->off->byte);
    }
 }

 void PrintTree(node *rootNode){
    printf("graph test{\n");
    _PrintSubtree(rootNode);
    printf("}\n");
 }

 static void _PrintCode(translate_buffer *buffer){
    for(unsigned short c = 0; c < 256 && c < buffer->length; c++){
        printf("%c", (buffer->code[c / 8] & (1 << (c % 8))) ? '1' : '0');
    }
 }

 void PrintTranslateBuffers(translate_buffer *buffers){
    for(unsigned short c = 0; c < 256; c++){
        printf("%c: ", c);
        _PrintCode(&buffers[c]);
        printf("\n");
    }
 }

 static void _BuildPlaceEntry(node *curNode, translate_buffer *modify, 
                             translate_buffer *insertInto){
    uint8_t charIndex;
    uint8_t bitIndex;
    
    // Check for internal node or leaf
    if(curNode->byte < 0){
        
        charIndex = modify->length / 8;
        bitIndex = modify->length % 8;
        modify->length++;
        
        // Encode a 1 to the position
        if(curNode->on){
            modify->code[charIndex] |= (1 << bitIndex);
            _BuildPlaceEntry(curNode->on, modify, insertInto);
        }
        
        // Encode a 0 to the position
        if(curNode->off){
            modify->code[charIndex] &= ~(1 << bitIndex);
            _BuildPlaceEntry(curNode->off, modify, insertInto);
        }
        
        modify->length--;
    }
    else{
        memcpy(&insertInto[curNode->byte], modify, sizeof(translate_buffer));
    }
 }

 void BuildTranslateBuffers(node *rootNode, translate_buffer *buffers){
    translate_buffer modify;
    
    memset(&modify, 0, sizeof(translate_buffer));
    _BuildPlaceEntry(rootNode, &modify, buffers);
 }
	/** \file derhuff.c
	\author Daniel Rothfus
	\version 1.0
	\brief A really simple Huffman implementation.

	Derhuff is a simple utility to compress files using the Huffman coding scheme.
	To compress a file, it analyzes the frequencies of all the bytes in the file
	and builds a tree using these frequencies, which it writes to the output. With
	this tree, the program then converts each byte of the input to the path to get
	to the corresponding leaf in the tree. To decode, it just reads the tree from
	the file and uses that to translate each path back to its corresponding byte.
	This scheme works best on files with uneven distributions of character
	frequencies, such as plain text. To see how to use this program, pass the -h
	or --help flag to the executable. This program is released under the
	zlib/libpng license included below:

	Copyright (c) 2012 Daniel Rothfus

	This software is provided 'as-is', without any express or implied warranty. In
	no event will the authors be held liable for any damages arising from the use
	of this software.

	Permission is granted to anyone to use this software for any purpose, including
	commercial applications, and to alter it and redistribute it freely, subject to
	the following restrictions:

	1. The origin of this software must not be misrepresented; you must not claim
	that you wrote the original software. If you use this software in a product,
	an acknowledgment in the product documentation would be appreciated but is
	not required.

	2. Altered source versions must be plainly marked as such, and must not be
	misrepresented as being the original software.

	3. This notice may not be removed or altered from any source distribution. */

	// Includes -------------------------------------------------------------------
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#include <getopt.h>
	#include <stdint.h>

	// Types -----------------------------------------------------------------------
	/** \brief This type is used for representing offsets within a file and the size
	of a file itself. */
	typedef uint64_t file_size;

	/** \brief This type is used for direct lookup from a character to their huffman
	equivalent. */
	typedef struct{
	uint8_t code[8];
	uint8_t length;
	} translate_buffer;

	/// \brief This is the basic data type that makes up a tree. */
	typedef struct node{
	struct node on, off;
	int byte;
	file_size occurrences;
	} node;

	// Prototypes -----------------------------------------------------------------
	/** \brief Print the help for this program.
	\param programName The name of the current program as it was called from the
	command line. */
	void PrintHelp(const char *programName);

	/** \brief Swap a 64 bit value to/from network byte order if it is necessary.
	\param value The value to swap the endianness of.
	\return This returns the network byte order version of the host value or the
	host version of the network byte order value. */
	uint64_t ToFromNetwork64(uint64_t value);

	/** \brief This organizes a tree structure inside of a buffer of nodes.
	\param bufStart The first node in the buffer containing the letter frequencies.
	These nodes will reference each other such that a tree is created in the
	buffer.
	\paraom totalOcurrences The total number of units found of all types in the
	input.
	\return This returns a pointer to the root of the tree. */
	node OrganizeTree(node bufStart, file_size totalOcurrences);

	/** \brief Write the encoding tree to the file.
	\param output The file to write the tree to.
	\return This returns an exit status for the program - 0 on success. */
	char WriteTree(FILE output, node treeRoot);

	/** \brief Read a tree used for decoding from a file.
	\param output The file to read the tree from.
	\param nodeBuf A buffer of at least 511 nodes to write the tree to.
	\param treeRoot A pointer to a pointer to assign to the root of the tree.
	\return This returns an exit status for the program - 0 on success. */
	char ReadTree(FILE input, node nodeBuf, node **treeRoot);

	/** \brief Compress a file and write the compressed file to the output file.
	\param input The file to compress.
	\param output The file to write the compressed data to.
	\return This returns an exit code for the application. */
	char CompressFile(FILE input, FILE output);

	/** \brief Decompress a given file to another file.
	\param input The file to decompress.
	\param output The file to put the decompressed data into.
	\return This returns an exit code for the application. */
	char DecompressFile(FILE input, FILE output);

	/** \brief Read all the frequencies of all the characters from the file from the
	current point to the end.
	\param file The file to read all the letter frequencies from.
	\param counts A pointer to a buffer of at least 256 file size indicators, which
	will be used to indicate the number of each byte. */
	file_size ReadFrequencies(FILE file, node counts);

	/** \brief Initialize all the nodes in a buffer to the correct states.
	\param allNodes A pointer to the buffer of at least 511 nodes. */
	void InitNodes(node *allNodes);

	/** \brief This is a debug utility that prints out a tree to the standard output
	in dot format, using the graph name test.
	\param rootNode The root node of the tree to print. */
	void PrintTree(node *rootNode);

	/** \brief A debug utility that prints the translate buffers to standard output.
	\param buffs A point to a buffer of at least 256 translate_buffers. */
	void PrintTranslateBuffers(translate_buffer *buffers);

	/** \brief Build the translate buffer for going directly from characters to
	huffman equivalents.
	\param rootNode The root node of the tree to convert.
	\param buffers The translate buffers to fill with equivalents. There must be
	size for at least 256 translate_buffers in the buffer. */
	void BuildTranslateBuffers(node rootNode, translate_buffer buffers);

	// Constants -------------------------------------------------------------------
	/// \brief An array to tell getopt_long how to handle long program arguments
	static const struct option progOptsLong[] = {
	{ "compress", no_argument, NULL, 'c' },
	{ "decompress", no_argument, NULL, 'd' },
	{ "output", required_argument, NULL, 'o' },
	{ "help", no_argument, NULL, 'h' },
	{ NULL, no_argument, NULL, 0 }
	};

	/// \brief A string to tell getopt_long how to handle short command line input
	static const char *progOptsStr = "cdo:h";

	/// \brief A simple version number to allow for changes to the format later
	static const uint8_t versionByte = 1;

	// Implementations -------------------------------------------------------------
	/** \brief The entry point of the application.
	\param argc The number of arguments.
	\param argv The arguments to the application. */
	int main(int argc, char * argv[]){
	int option, programStatus = EXIT_SUCCESS;
	char compress = 0, decompress = 0;
	FILE output = stdout, input = NULL;

	// Parse through option arguments
	while((option = getopt_long(argc, argv, progOptsStr, progOptsLong, NULL)) != -1){
	switch(option){
	case 'c':
	compress = 1;
	break;
	case 'd':
	decompress = 1;
	break;
	case 'o':
	output = fopen(optarg, "wb");
	if(!output){
	perror("Could not open output file");
	programStatus = EXIT_FAILURE;
	goto cleanup;
	}
	break;
	case '?':
	fprintf(stderr, "Ambiguous option given!\n");
	return EXIT_FAILURE;
	case 'h':
	PrintHelp(argv[0]);
	return EXIT_SUCCESS;
	case ':':
	fprintf(stderr, "Missing option argument!\n");
	return EXIT_FAILURE;
	default:
	fprintf(stderr, "Please pardon my insanity...");
	return EXIT_FAILURE;
	}
	}

	// Get action
	if(compress * decompress == 1 \|\| compress + decompress == 0){
	fprintf(stderr, "Need either compress flag or decompress flag!\n");
	programStatus = EXIT_FAILURE;
	goto cleanup;
	}

	// Get input file
	argc -= optind;
	argv += optind;
	if(argc == 1){
	input = fopen(argv[0], "rb");
	if(!input){
	perror("Could not open input file");
	programStatus = EXIT_FAILURE;
	goto cleanup;
	}
	}
	else if(argc > 1){
	fprintf(stderr, "This program can only accept one input file!\n");
	programStatus = EXIT_FAILURE;
	goto cleanup;
	}
	else{
	fprintf(stderr, "The input file is required!\n");
	programStatus = EXIT_FAILURE;
	goto cleanup;
	}

	// Do the actual compression or decompression
	if(compress){
	programStatus = CompressFile(input, output);
	}
	else{
	programStatus = DecompressFile(input, output);
	}

	cleanup:
	if(input && input != stdin){
	fclose(input);
	}
	if(output && output != stdout){
	fclose(output);
	}
	return programStatus;
	}

	void PrintHelp(const char *programName){
	printf("Usage:\n\
	%s [-c\|-d] [-o OUTPUT] INPUT\n\
	Options:\n\
	-c compress : Compress the input file to the output file. (Default)\n\
	-d decompress : Decompress the compressed input file to the output file.\n\
	-o output : Specify the output file.\n\
	-h help : Show this help message again.\n", programName);
	}

	#ifdef __BIG_ENDIAN__

	uint64_t ToFromNetwork64(uint64_t value){
	return value;
	}

	#elif defined(__LITTLE_ENDIAN__)

	uint64_t ToFromNetwork64(uint64_t value){
	uint64_t toRet;

	// Swap all bytes opposite each other
	for(uint8_t c = 0; c < 8; c++){
	((uint8_t )&toRet)[c] = ((uint8_t )&value)[7 - c];
	}

	return toRet;
	}

	#else
	#error Could not detect endianness of environment!
	#endif

	node OrganizeTree(node bufStart, file_size totalOcurrences){
	node rootNode = bufStart, lowest1, *lowest2;
	unsigned short combinations = 0;

	// Combine two lowest nodes
	while(rootNode->occurrences < totalOcurrences){
	lowest1 = lowest2 = NULL;
	for(unsigned short c = 0; c < 256 + combinations; c++){
	if(bufStart[c].occurrences > 0){

	// Make sure we assign lowest1
	if(!lowest1){
	lowest1 = &bufStart[c];
	}

	// Make sure we assign lowest2
	else if(!lowest2){
	lowest2 = &bufStart[c];
	}

	// Check to see if it is lower than either of our others
	else{
	if(bufStart[c].occurrences < lowest1->occurrences){
	if(lowest1->occurrences < lowest2->occurrences){
	lowest2 = lowest1;
	}
	lowest1 = &bufStart[c];
	}
	else if(bufStart[c].occurrences < lowest2->occurrences){
	if(lowest2->occurrences < lowest1->occurrences){
	lowest1 = lowest2;
	}
	lowest2 = &bufStart[c];
	}
	}
	}
	}

	// Make sure that both have values
	if(lowest2 == NULL \|\| lowest1 == NULL){
	if(lowest2 == NULL){
	lowest2 = &bufStart[(lowest1->byte + 1) % 256];
	}
	if(lowest1 == NULL){
	lowest1 = &bufStart[(lowest2->byte + 1) % 256];
	}
	}

	// Now combine two nodes
	rootNode = &bufStart[256 + combinations];
	rootNode->on = lowest1;
	rootNode->off = lowest2;
	rootNode->occurrences = lowest1->occurrences + lowest2->occurrences;
	lowest1->occurrences = lowest2->occurrences = 0;
	combinations++;
	}

	return rootNode;
	}

	char WriteTree(FILE output, node treeRoot){

	// Check if we do have a tree to write to output
	if(treeRoot){
	char status;
	uint8_t bytesToWrite[2];
	uint8_t numBytesToWrite;

	// Sort out what we have to write, accounting for the special case null
	if(treeRoot->byte < 0){
	bytesToWrite[0] = 0;
	numBytesToWrite = 1;
	}
	else if(treeRoot->byte == 0){
	bytesToWrite[0] = 1;
	bytesToWrite[1] = 1;
	numBytesToWrite = 2;
	}
	else{
	bytesToWrite[0] = treeRoot->byte;
	numBytesToWrite = 1;
	}

	// Write current node value
	if(fwrite(bytesToWrite, numBytesToWrite, 1, output) != 1){
	fprintf(stderr, "Could not write tree to output!\n");
	return EXIT_FAILURE;
	}

	// Write left and right trees
	status = WriteTree(output, treeRoot->on);
	if(status != EXIT_SUCCESS){
	return status;
	}
	return WriteTree(output, treeRoot->off);
	}
	else{
	return EXIT_SUCCESS;
	}
	}

	static char _ReadSubtree(FILE input, node nodeBuf, node **assignTo,
	unsigned char *internalsUsed){
	char status;
	uint8_t read;
	int charRead;

	// Read in current character value
	if(fread(&read, 1, 1, input) != 1){
	goto failure;
	}
	charRead = (read == 0) ? -1 : read;

	// Check if this is the special case null byte
	if(read == 1){
	if(fread(&read, 1, 1, input) != 1){
	goto failure;
	}
	if(read == 1){
	charRead = 0;
	}
	else{
	if(fseek(input, -1L, SEEK_CUR) != 0){
	goto failure;
	}
	charRead = 1;
	}
	}

	// Assign assignTo to the correct node in the tree
	if(charRead == -1){
	assignTo = &nodeBuf[256 + (internalsUsed)++];
	status = _ReadSubtree(input, nodeBuf, &(*assignTo)->on, internalsUsed);
	if(status != EXIT_SUCCESS){
	return status;
	}
	return _ReadSubtree(input, nodeBuf, &(*assignTo)->off, internalsUsed);
	}
	else{
	*assignTo = &nodeBuf[charRead];
	return EXIT_SUCCESS;
	}

	failure:
	fprintf(stderr, "Could not read tree from input!\n");
	return EXIT_FAILURE;
	}

	char ReadTree(FILE input, node nodeBuf, node **treeRoot){
	unsigned char internalsUsed = 0;
	return _ReadSubtree(input, nodeBuf, treeRoot, &internalsUsed);
	}

	char CompressFile(FILE inFile, FILE outFile){
	file_size fileSize, netFileSize, bytesRead;
	translate_buffer transBuf[256];
	node allNodes[511], *rootNode;
	char status, codeByteInd, codeByteBitsLeft;
	uint8_t toConv, outByte, outByteLeft, chunkSize, codeByte;

	// Initialize all nodes to have the right values
	InitNodes(allNodes);

	// Get all letter counts and rewind
	fileSize = ReadFrequencies(inFile, allNodes);
	if(fseek(inFile, 0L, SEEK_SET) != 0){
	perror("Could not rewind input file");
	return EXIT_FAILURE;
	};

	// Write out version byte
	if(fwrite(&versionByte, 1, 1, outFile) != 1){
	fprintf(stderr, "Could version byte to output!\n");
	return EXIT_FAILURE;
	}

	// Write out file size
	netFileSize = ToFromNetwork64(fileSize);
	if(fwrite(&netFileSize, sizeof(file_size), 1, outFile) != 1){
	fprintf(stderr, "Could write file size to output!\n");
	return EXIT_FAILURE;
	}

	// Create and write out encoding tree
	rootNode = OrganizeTree(allNodes, fileSize);
	status = WriteTree(outFile, rootNode);
	if(status != EXIT_SUCCESS){
	return status;
	}

	// Build translate buffer objects
	BuildTranslateBuffers(rootNode, transBuf);

	// Use translate buffers to translate
	bytesRead = 0;
	outByteLeft = 8;
	while(fread(&toConv, 1, 1, inFile)){

	// For every byte in the code
	codeByteInd = 0;
	while((codeByteBitsLeft = transBuf[toConv].length - codeByteInd * 8) > 0){

	// Make sure we only write max 8 bits
	codeByteBitsLeft = codeByteBitsLeft > 8 ? 8 : codeByteBitsLeft;

	// For every bit in the current byte to write
	codeByte = transBuf[toConv].code[codeByteInd];
	while(codeByteBitsLeft > 0){

	// Select the maximum amount we can write
	chunkSize = outByteLeft < codeByteBitsLeft ? outByteLeft : codeByteBitsLeft;
	outByte = (outByte & (255 >> outByteLeft)) \| (codeByte << (8 - outByteLeft));
	codeByte = codeByte >> chunkSize;
	codeByteBitsLeft -= chunkSize;
	outByteLeft -= chunkSize;

	// Check if we need to flush output
	if(outByteLeft == 0){
	if(fwrite(&outByte, 1, 1, outFile) != 1){
	goto outFailure;
	}
	outByteLeft = 8;
	}
	}
	codeByteInd++;
	}
	bytesRead++;
	}

	// Write that last chunk out
	if(outByteLeft < 8){
	if(fwrite(&outByte, 1, 1, outFile) != 1){
	goto outFailure;
	}
	}

	// Do a sanity check
	if(bytesRead != fileSize){
	fprintf(stderr, "Input file size error!\n");
	}

	return EXIT_SUCCESS;

	outFailure:
	fprintf(stderr, "Could not write to output file!\n");
	return EXIT_FAILURE;
	}

	char DecompressFile(FILE inFile, FILE outFile){
	node allNodes[511], rootNode, curNode;
	file_size fileSize, bytesWritten;
	char status, curBit;
	uint8_t curByte, toWrite, tmpVersionByte;

	// Initialize all nodes to have the right values
	InitNodes(allNodes);

	// Read in check byte and version byte and check them
	if(fread(&tmpVersionByte, 1, 1, inFile) != 1){
	fprintf(stderr, "Could not read version byte!\n");
	return EXIT_FAILURE;
	}
	if(tmpVersionByte != versionByte){
	fprintf(stderr, "Cannot handle version of derhuff file!\n");
	return EXIT_FAILURE;
	}

	// Read in number of bytes in file
	if(fread(&fileSize, sizeof(fileSize), 1, inFile) != 1){
	fprintf(stderr, "Could not read file size from input!\n");
	return EXIT_FAILURE;
	}
	fileSize = ToFromNetwork64(fileSize);

	// Read in the tree used to encode the file
	status = ReadTree(inFile, allNodes, &rootNode);
	if(status != EXIT_SUCCESS){
	return status;
	}

	// Convert file using tree
	curNode = rootNode;
	curBit = 8;
	bytesWritten = 0;
	while(bytesWritten < fileSize){

	// Try to refill mini-buffer
	if(curBit == 8){
	if(fread(&curByte, 1, 1, inFile) != 1){
	fprintf(stderr, "Input file not of expected size!\n");
	return EXIT_FAILURE;
	}
	curBit = 0;
	}

	// Take next decision
	if(curByte & (1 << curBit)){
	curNode = curNode->on;
	}
	else{
	curNode = curNode->off;
	}

	// Check for an invalid current node
	if(!curNode){
	fprintf(stderr, "Corrupt input file!\n");
	return EXIT_FAILURE;
	}

	// See if we reached an end byte
	if(curNode->byte >= 0){
	toWrite = curNode->byte;
	if(fwrite(&toWrite, 1, 1, outFile) != 1){
	fprintf(stderr, "Could not write to output file!\n");
	return EXIT_FAILURE;
	}
	bytesWritten++;
	curNode = rootNode;
	}

	curBit++;
	}

	return EXIT_SUCCESS;
	}

	file_size ReadFrequencies(FILE file, node counts){
	file_size fileSize = 0;
	uint8_t tmpByte;

	// Scan through input file counting occurrences and file size
	while(1){
	if(fread(&tmpByte, 1, 1, file) != 1){
	break;
	}
	fileSize++;
	counts[tmpByte].occurrences++;
	}

	return fileSize;
	}

	void InitNodes(node *allNodes){

	// Clear everything except the ones representing characters
	memset(allNodes, 0, sizeof(node) * 511);
	for(unsigned short c = 0; c < 256; c++){
	allNodes[c].byte = c;
	}
	for(unsigned short c = 256; c < 511; c++){
	allNodes[c].byte = 255 - c;
	}
	}

	static void _PrintSubtree(node *rootNode){
	if(rootNode->on){
	_PrintSubtree(rootNode->on);
	printf("\t%d -- %d;\n", rootNode->byte ,rootNode->on->byte);
	}
	if(rootNode->off){
	_PrintSubtree(rootNode->off);
	printf("\t%d -- %d;\n", rootNode->byte ,rootNode->off->byte);
	}
	}

	void PrintTree(node *rootNode){
	printf("graph test{\n");
	_PrintSubtree(rootNode);
	printf("}\n");
	}

	static void _PrintCode(translate_buffer *buffer){
	for(unsigned short c = 0; c < 256 && c < buffer->length; c++){
	printf("%c", (buffer->code[c / 8] & (1 << (c % 8))) ? '1' : '0');
	}
	}

	void PrintTranslateBuffers(translate_buffer *buffers){
	for(unsigned short c = 0; c < 256; c++){
	printf("%c: ", c);
	_PrintCode(&buffers[c]);
	printf("\n");
	}
	}

	static void _BuildPlaceEntry(node curNode, translate_buffer modify,
	translate_buffer *insertInto){
	uint8_t charIndex;
	uint8_t bitIndex;

	// Check for internal node or leaf
	if(curNode->byte < 0){

	charIndex = modify->length / 8;
	bitIndex = modify->length % 8;
	modify->length++;

	// Encode a 1 to the position
	if(curNode->on){
	modify->code[charIndex] \|= (1 << bitIndex);
	_BuildPlaceEntry(curNode->on, modify, insertInto);
	}

	// Encode a 0 to the position
	if(curNode->off){
	modify->code[charIndex] &= ~(1 << bitIndex);
	_BuildPlaceEntry(curNode->off, modify, insertInto);
	}

	modify->length--;
	}
	else{
	memcpy(&insertInto[curNode->byte], modify, sizeof(translate_buffer));
	}
	}

	void BuildTranslateBuffers(node rootNode, translate_buffer buffers){
	translate_buffer modify;

	memset(&modify, 0, sizeof(translate_buffer));
	_BuildPlaceEntry(rootNode, &modify, buffers);
	}