amintos · July 6, 2013 20:33
diff --git a/compressor.java b/compressor.java
 /** Fast and simple text compression.
 * 
 * This is a modified variant of LZ77:
 * The algorithm inserts "marker bytes" whose bits state which
 * of the following 8 symbols are actual symbols (0) or backward
 * references (1). Backward references are actually two bytes, the
 * first 6 bits encoding the length and the next 10 bits encoding
 * the negative offset where the repetition occurred.
 * The idea is related to LZO and LZJB compression.
 * 
 * To detect backward references, the position of each trigram is
 * stored to a limited-size hash table, eventually being
 * overwritten by hash collisions.
 * 
 * 
 * 
 * */
 public class Compressor {
  
 	private static final int TABLE_SIZE = 256;
 	private static final int MATCH_BITS = 6;
 	private static final int MATCH_MIN = 3;
 	private static final int MATCH_MAX = 
 			((1 << MATCH_BITS) + (MATCH_MIN - 1));
 	private static final int OFFSET_MASK = 
 			((1 << (16 - MATCH_BITS)) - 1); 
 	
 		
 	
 	
 	public static String compress(String text) {
 		int[] table = new int[TABLE_SIZE];
 		char[] input = text.toCharArray();
 		StringBuffer output = new StringBuffer();
 		
 		int len = input.length;
 		int src = 0;
 		int dst = 0;
 		int cpy = 0;
 		int copymask = 1 << 7;	// next indicator bit to be set
 		int copymap = 0;		// last indicator byte
 		
 		for (int i = 0; i < TABLE_SIZE; ++i) {
 			table[i] = 0xdeadbeef;
 		}
 		
 		while (src < len) {
 			copymask <<= 1;
 			
 			if (copymask == 1 << 8) {
 				// need next indicator byte.
 				copymask = 1;
 				copymap  = dst;
 				output.append((char)0);
 				dst++;
 			}
 			
 			if (src > len - MATCH_MAX) {
 				// better not match past this point
 				output.append(input[src++]);
 				dst++;
 				continue;
 			}
 			
 			// simple hash function
 			int hash = (((int)(input[src]) + 13) ^
 					    ((int)(input[src + 1]) - 13) ^
 					    ((int)(input[src + 2]))) % TABLE_SIZE;
 			
 			// hash-table lookup
 			int offset = (src - table[hash]) & OFFSET_MASK;
 			
 			// store current trigram position to hash table
 			table[hash] = src;
 			cpy = src - offset;
 			
 			// check whether real match found or overwritten table entry
 			if (cpy >= 0 && cpy != src &&
 					input[src] == input[cpy] &&
 					input[src + 1] == input[cpy + 1] &&
 					input[src + 2] == input[cpy + 2]) {
 				
 				// toggle indicator bit in indicator byte.
 				output.setCharAt(copymap, (char) (output.charAt(copymap) | copymask));
 				
 				// do forward matching from there
 				int mlen;
 				for (mlen = MATCH_MIN; mlen < MATCH_MAX; ++mlen) {
 					if (input[src + mlen] != input[cpy + mlen]) break;
 				}
 				
 				// output 2 bytes, 6 bit length + 10 bit offset
 				output.append( (char)(
 						    ((mlen - MATCH_MIN ) << (8 - MATCH_BITS)) |
 						    (offset >> 8) ));
 				output.append( (char)(offset & 0xff) );
 				src += mlen;
 				dst += 2;
 				
 			// no match, just emit uncompressed symbol
 			} else {
 				output.append(input[src++]);
 				dst++;
 			}
 			
 		}
 		return output.toString();
 	}
 	
 	public static String decompress(String source) {
 		char[] input = source.toCharArray();
 		StringBuffer output = new StringBuffer();
 		
 		int src = 0;
 		int dst = 0;
 		int cpy = 0;
 		int copymask = 1 << 7;
 		int copymap = 0;
 		int len = input.length;
 		
 		while (src < len) {
 			copymask <<= 1;
 			
 			if (copymask == 1 << 8) {
 				// indicator byte. just read it.
 				copymask = 1;
 				copymap = input[src++];
 			}
 			
 			// indicator byte indicates a backward reference here
 			if ((copymap & copymask) > 0) {
 				
 				// read and decode length and offset of reference
 				int mlen = (input[src] >> (8 - MATCH_BITS)) + MATCH_MIN;
 				int offset = ((input[src] << 8) | input[src + 1]) & OFFSET_MASK;
 				src += 2;
 				cpy = dst - offset;
 				
 				// copy all bytes from reference
 				if (cpy >= 0) {
 					while (--mlen >= 0) {
 						output.append(output.charAt(cpy));
 						cpy++;
 						dst++;
 					}
 				} else {
 					// stream broken. just emit what we have and cancel.
 					return output.toString();
 					//throw new RuntimeException("Compressed data corrupted");
 				}
 			
 			// no match, just emit uncompressed symbol
 			} else {
 				output.append(input[src++]);
 				dst++;
 			}
 		}
 		
 		return output.toString();
 	}
 }
	/** Fast and simple text compression.
	*
	* This is a modified variant of LZ77:
	* The algorithm inserts "marker bytes" whose bits state which
	* of the following 8 symbols are actual symbols (0) or backward
	* references (1). Backward references are actually two bytes, the
	* first 6 bits encoding the length and the next 10 bits encoding
	* the negative offset where the repetition occurred.
	* The idea is related to LZO and LZJB compression.
	*
	* To detect backward references, the position of each trigram is
	* stored to a limited-size hash table, eventually being
	* overwritten by hash collisions.
	*
	*
	*
	* */
	public class Compressor {

	private static final int TABLE_SIZE = 256;
	private static final int MATCH_BITS = 6;
	private static final int MATCH_MIN = 3;
	private static final int MATCH_MAX =
	((1 << MATCH_BITS) + (MATCH_MIN - 1));
	private static final int OFFSET_MASK =
	((1 << (16 - MATCH_BITS)) - 1);




	public static String compress(String text) {
	int[] table = new int[TABLE_SIZE];
	char[] input = text.toCharArray();
	StringBuffer output = new StringBuffer();

	int len = input.length;
	int src = 0;
	int dst = 0;
	int cpy = 0;
	int copymask = 1 << 7; // next indicator bit to be set
	int copymap = 0; // last indicator byte

	for (int i = 0; i < TABLE_SIZE; ++i) {
	table[i] = 0xdeadbeef;
	}

	while (src < len) {
	copymask <<= 1;

	if (copymask == 1 << 8) {
	// need next indicator byte.
	copymask = 1;
	copymap = dst;
	output.append((char)0);
	dst++;
	}

	if (src > len - MATCH_MAX) {
	// better not match past this point
	output.append(input[src++]);
	dst++;
	continue;
	}

	// simple hash function
	int hash = (((int)(input[src]) + 13) ^
	((int)(input[src + 1]) - 13) ^
	((int)(input[src + 2]))) % TABLE_SIZE;

	// hash-table lookup
	int offset = (src - table[hash]) & OFFSET_MASK;

	// store current trigram position to hash table
	table[hash] = src;
	cpy = src - offset;

	// check whether real match found or overwritten table entry
	if (cpy >= 0 && cpy != src &&
	input[src] == input[cpy] &&
	input[src + 1] == input[cpy + 1] &&
	input[src + 2] == input[cpy + 2]) {

	// toggle indicator bit in indicator byte.
	output.setCharAt(copymap, (char) (output.charAt(copymap) \| copymask));

	// do forward matching from there
	int mlen;
	for (mlen = MATCH_MIN; mlen < MATCH_MAX; ++mlen) {
	if (input[src + mlen] != input[cpy + mlen]) break;
	}

	// output 2 bytes, 6 bit length + 10 bit offset
	output.append( (char)(
	((mlen - MATCH_MIN ) << (8 - MATCH_BITS)) \|
	(offset >> 8) ));
	output.append( (char)(offset & 0xff) );
	src += mlen;
	dst += 2;

	// no match, just emit uncompressed symbol
	} else {
	output.append(input[src++]);
	dst++;
	}

	}
	return output.toString();
	}

	public static String decompress(String source) {
	char[] input = source.toCharArray();
	StringBuffer output = new StringBuffer();

	int src = 0;
	int dst = 0;
	int cpy = 0;
	int copymask = 1 << 7;
	int copymap = 0;
	int len = input.length;

	while (src < len) {
	copymask <<= 1;

	if (copymask == 1 << 8) {
	// indicator byte. just read it.
	copymask = 1;
	copymap = input[src++];
	}

	// indicator byte indicates a backward reference here
	if ((copymap & copymask) > 0) {

	// read and decode length and offset of reference
	int mlen = (input[src] >> (8 - MATCH_BITS)) + MATCH_MIN;
	int offset = ((input[src] << 8) \| input[src + 1]) & OFFSET_MASK;
	src += 2;
	cpy = dst - offset;

	// copy all bytes from reference
	if (cpy >= 0) {
	while (--mlen >= 0) {
	output.append(output.charAt(cpy));
	cpy++;
	dst++;
	}
	} else {
	// stream broken. just emit what we have and cancel.
	return output.toString();
	//throw new RuntimeException("Compressed data corrupted");
	}

	// no match, just emit uncompressed symbol
	} else {
	output.append(input[src++]);
	dst++;
	}
	}

	return output.toString();
	}
	}