Created
February 13, 2012 02:40
-
-
Save tig/1812875 to your computer and use it in GitHub Desktop.
COMPRESS - An LZW file compressor written in 1987
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
compress.c (c) Charles E. Kindel, Jr., 1987 | |
COMPRESS - An ASCII file compress program | |
Author: Charles E. Kindel, Jr. (tigger) | |
Started: June 14, 1987 | |
Version: 2.0 | |
SYNOPSIS | |
compress [-v] [-a] | |
uncompress [-v] [-a] | |
-v Write compress statistics to standard error. | |
-a Codes are written (read) in ASCII hex format. | |
DESCRIPTION | |
Compress reduces the size of the standard input file using adaptive | |
Lempel-Ziv coding. Standard input and output are used exclusively by | |
the program. Compressed files can be restored to their original form | |
using uncompress. | |
Compress uses the modified Lempel-Ziv algorithm popularized in "A | |
Technique for High Performance Data Compression", Terry A. Welch, IEEE | |
Computer, vol. 17, no. 6 (June 1984), pp. 8-19. Common substrings in | |
the file are first replaced by 12-bit codes 128 to 4094. | |
After the code limit is attained, compress automatically discards the | |
current table and starts from scratch. The amount of compression | |
obtained depends on the size of the input, the number of bits per code, | |
and the distribution of common substrings. Typically, text such as | |
source code or English is reduced by 50-60%. Compression is generally | |
much better than that achieved by Huffman coding (as used in pack), | |
or adaptive Huffman coding (compact), and takes less time to compute. | |
A message is generated to standard error if the size of the original | |
file is not reduced. Exit status is normally 0; if the last file is | |
larger after (attempted) compression, the status is 2; if an error | |
occurs, exit status is 1. | |
Code output is normally packed one code to every 1.5 bytes. With the | |
-a option, codes are represented by three HEX bytes (ASCII) separated | |
by spaces. | |
EXAMPLES | |
compress -av <infile.txt >outfile.lpz | |
Compresses infile.txt to outifile.lpz using ASCII code format. | |
Prints out a listing of diagnostics. | |
uncompress <infile.lpz >outfile.txt | |
Uncompresses infile.lpz to outfile.txt using normal code format. | |
MESSAGES | |
On an invalid command line the above synopsis is printed to stderr. | |
infile: not in compressed format (file directed through standard input | |
has not been compressed). | |
infile: not an ASCII file. (Character codes greater than 127 were | |
encountered in the input. File to be compressed already | |
compressed). | |
LIMITATIONS | |
Output is not always smaller than the input. Only one size code is | |
allowed (12 bits). Only ASCII files may be compressed. | |
FACILITY | |
Originally written on an IBM PC/XT using TurboC and Dos 3.1. Adapted | |
to Unix 4.2 bsd. | |
NOTES | |
To get this program to run on an IBM PC you must modify the main so | |
that you can fetch the options from the command line. Also errors will | |
go to standard output with TurboC, so an error file might be helpful. | |
REVISIONS | |
DATE VERSION NAME CHANGE | |
June 14, 1987 1.0 C.E. Kindel Original | |
June 15, 1987 1.1 C.E. Kindel Incorperated getopt | |
June 18, 1987 1.2 C.E. Kindel added find_or_insert, | |
compress, and clear_table. | |
June 22, 1987 1.3 C.E. Kindel Added uncompress. | |
June 23, 1987 1.4 C.E. Kindel Ported to UNIX. | |
June 23, 1987 1.5 C.E. Kindel Debugged -a option, made an | |
attempt to get compressed | |
codes to work. | |
June 23, 1987 2.0 C.E. Kindel Fine tuned code, got verbose | |
to work correctly. Time. | |
Final documentation. | |
*/ | |
/*IMPORTS */ | |
#include <stdio.h> /* printf, fprintf */ | |
#include <ctype.h> /* toupper */ | |
#include <time.h> /* time */ | |
/* LOCAL DECLARATIONS */ | |
#define void | |
static compress (); | |
static uncompress (); | |
static short int find_or_insert (); | |
static clear_table (); | |
static putcode (); | |
static short int getcode (); | |
static void verbose (); | |
/* LOCAL SYMBOLIC DEFINITIONS */ | |
/* flags */ | |
#define COMMAND 0 /* argv index for command */ | |
#define NORMAL 0 /* normal program exit */ | |
#define ERROR 1 /* error program exit */ | |
#define NOTCOMPRESSED 2 /* input bytes < output bytes */ | |
#define TRUE 1 /* true */ | |
#define FALSE 0 /* false */ | |
#define START 128 /* beginiing of the table */ | |
#define ASCIILIMIT 127 /* where ascii chars stop */ | |
#define NOT_FOUND 0xfff /* not found flag */ | |
#define END 0xfff /* end flag */ | |
#define CLEARED 0 /* table cleared flag */ | |
#define MAXTABLE 4095 /* max table entry */ | |
#define T_SIZE 4096 /* table size */ | |
#define CODE_SIZE 3 /* size of compact code in bytes */ | |
/* DATA DEFINITIONS */ | |
static long int elapsed; /* elapsed time */ | |
static short int aflag; /* flag for -a option (ASCII) */ | |
static short int status; /* return status for program | |
0 = normal, 1 = error, 2 = notcompressed */ | |
static int pair; /* has a pair been output yet */ | |
static short int prefix [T_SIZE]; /* prefix table */ | |
static char suffix [T_SIZE]; /* suffix table */ | |
static short int link [T_SIZE]; /* link table */ | |
static short int head [T_SIZE]; /* headers for link table */ | |
static short int current; /* current table entry */ | |
static char next_c; /* next input character */ | |
static short int c_prefix; /* current prefix */ | |
static float in_bytes; /* number of input bytes */ | |
static float out_bytes; /* number of output bytes */ | |
static char pcode [CODE_SIZE]; /* output codes */ | |
static short int c_table_count; /* #times table cleared */ | |
static short int largest_code; /* largest code output since reset */ | |
/* FUNCTIONS */ | |
/*--------------------------------------------------------------------------*/ | |
/* main program */ | |
/* */ | |
/* Gets arguments from command line, calls compress, or uncompress, and */ | |
/* verbose according to command line. */ | |
/*--------------------------------------------------------------------------*/ | |
int main (argc, argv) | |
int argc; | |
char **argv; | |
{ | |
static int comp; /* flag for compress or uncompress */ | |
static int vflag; /* flag for -v option (verbose) */ | |
static char c; /* option letter */ | |
elapsed = time ((long*) 0); /* get the start time */ | |
if (toupper (*argv[COMMAND]) == 'C') /* 1st char of command */ | |
comp = TRUE; /* to compress. */ | |
else | |
comp = FALSE; /* to uncompress */ | |
/* command line options */ | |
while ((c = getopt (argc, argv, "avu")) != EOF) | |
switch (c) | |
{ case 'a': aflag = TRUE; break; | |
case 'v': vflag = TRUE; break; | |
default: /* whoops! bad option. */ | |
fprintf (stderr, "Usage:\n compress [-v] [-a]\n"); | |
fprintf (stderr, " uncompress [-v] [-a]\n"); | |
fprintf (stderr, | |
" -v Write compress statistics to standard error.\n"); | |
fprintf (stderr, | |
" -a Codes are written (read) in ASCII hex format.\n"); | |
exit (ERROR); /* send a 1 to Unix */ | |
break; | |
} | |
if (comp) /* if were to compress */ | |
compress (); | |
else /* else were to uncompress */ | |
uncompress (); | |
if (vflag) /* if -v option then spit it out */ | |
verbose (); | |
return (status); /* 0 = norm, 1 = err, 2 = in < out */ | |
} | |
/*--------------------------------------------------------------------------*/ | |
/* End of main program */ | |
/*==========================================================================*/ | |
/* compress */ | |
/* */ | |
/* Uses Lepel-Ziv compression to compress an ASCII file. Calls putcode */ | |
/* to output codes according to flags. */ | |
/* uses find_or_insert to search in the string table for an entry, if the */ | |
/* entry is not found it is inserted. find_or_insert also calls */ | |
/* clear table. */ | |
/*--------------------------------------------------------------------------*/ | |
static void compress () | |
{ static short int code; /* code to be outputted */ | |
clear_table (); /* clear the tables */ | |
c_table_count = 0; | |
largest_code = START; /* largest code since t. clear */ | |
in_bytes = 0; /* number of input bytes */ | |
out_bytes = 0; /* number of output bytes */ | |
pair = FALSE; /* output a pair of codes? */ | |
if ((next_c = getchar ()) != EOF) /* if not End Of input */ | |
{ in_bytes++; /* increment input bytes */ | |
current = START; /* point to first table entry (128) */ | |
c_prefix = next_c; | |
while ((next_c = getchar ()) != EOF) /* while not at EOF */ | |
{ if (next_c > ASCIILIMIT) /* if its not an ascii char */ | |
{ fprintf (stderr, "infile: not an ASCII file.\n"); | |
exit (ERROR); | |
} | |
in_bytes ++; /* increment input byte count */ | |
switch ((code = find_or_insert ())) /* find or insert entry */ | |
{ case NOT_FOUND: /* entry was inserted */ | |
putcode (c_prefix); | |
c_prefix = next_c; | |
break; | |
case CLEARED: /* table was cleared */ | |
c_table_count++; /* ct. table clears */ | |
current = START; | |
putcode (c_prefix); | |
putcode (CLEARED); | |
c_prefix = next_c; | |
break; | |
default: /* entry was found, keep looking */ | |
c_prefix = code; | |
break; | |
} | |
} | |
putcode (c_prefix); | |
putcode (END); | |
putcode (END); | |
} | |
return; | |
} | |
/*--------------------------------------------------------------------------*/ | |
/* end of compress */ | |
/*==========================================================================*/ | |
/* find_or_insert */ | |
/* */ | |
/* Used by compress to find codes in the string table. If a entry is not */ | |
/* found it is inserted and NOT_FOUND is returned. If there is no more */ | |
/* room in table the table is cleared and CLEARED is returned. If the */ | |
/* entry is found then its location is returned. */ | |
/*--------------------------------------------------------------------------*/ | |
static short int find_or_insert () | |
{ | |
static int x; /* table index */ | |
x = head [c_prefix]; /* first entry with this prefix */ | |
while (x != END) | |
if (suffix [x] == next_c) /* if suffix at entry is same as c */ | |
return (x); /* it exists, so we keep looking */ | |
else | |
x = link [x]; /* else we look at next link */ | |
if (current <= MAXTABLE) /* if table isnt full */ | |
{ prefix [current] = c_prefix; /* install prefix */ | |
suffix [current] = next_c; /* and suffix */ | |
if (head [c_prefix] != END) /* another entry with this prefix? */ | |
{ x = head [c_prefix]; /* yes, look for it */ | |
while (link [x] != END) /* while not the last one */ | |
x = link [x]; /* look at the next */ | |
link [x] = current; /* point the last one to cur entry */ | |
} | |
else /* otherwise point the head to */ | |
head [c_prefix] = current; /* current entry */ | |
largest_code = ++current; /* increment current entry */ | |
return (NOT_FOUND); /* tell em we had to insert new one */ | |
} | |
else | |
{ clear_table (); /* if table is full clear it */ | |
return (CLEARED); | |
} | |
} | |
/*--------------------------------------------------------------------------*/ | |
/* end of find_or_insert */ | |
/*==========================================================================*/ | |
/* uncompress */ | |
/* */ | |
/* Uncompresses a file using Lemel-Ziv coding. Calls getcode to output */ | |
/* codes. If it gets a 0x000 as a code it clears the table. It reads */ | |
/* until it gets an 0xFFF as a code. */ | |
/*--------------------------------------------------------------------------*/ | |
static void uncompress () | |
{ | |
static char string [T_SIZE]; | |
static short int code; | |
static short int next_entry; | |
static char *string_p; | |
pair = FALSE; | |
c_table_count = 0; /* #times table was cleared */ | |
in_bytes = 0; | |
out_bytes = 0; | |
clear_table (); | |
string [MAXTABLE] = '\0'; /* put a null at the end of string */ | |
next_entry = START; /* point to the beg. of the table */ | |
code = getcode (); | |
while (code != END) /* while code is not last one */ | |
{ prefix [next_entry] = code; /* add it to next entry */ | |
current = next_entry; /* point to this entry */ | |
string_p = &string [MAXTABLE]; /* init. string */ | |
while (prefix [current] != END) /* while there are more chars left */ | |
{ current = prefix [current]; /* in string.... */ | |
if ((current + 1) == next_entry) /* if special case */ | |
suffix [current] = suffix [current - 1]; | |
else if ((current + 1) > next_entry) | |
{ fprintf (stderr, | |
"infile: not in compressed format."); | |
exit (ERROR); | |
} | |
string_p--; /* add char. to string */ | |
*string_p = suffix [current]; | |
out_bytes++; | |
} | |
fputs (string_p, stdout); | |
if (next_entry > START) /* if not first code */ | |
suffix [next_entry - 1] = *string_p; | |
next_entry++; | |
code = getcode (); | |
if (code == CLEARED) | |
{ clear_table (); | |
c_table_count++; | |
next_entry = START; | |
code = getcode (); | |
} | |
} | |
} | |
/*--------------------------------------------------------------------------*/ | |
/* end of uncompress */ | |
/*==========================================================================*/ | |
/* clear_table */ | |
/* */ | |
/* clears the string table to 0xFFF's where appropriate. */ | |
/*--------------------------------------------------------------------------*/ | |
static void clear_table () | |
{ | |
static short int i; /* counter */ | |
for (i = 0; i <= MAXTABLE; i++) /* for i = 0 to 4095 */ | |
{ if (i <= ASCIILIMIT) /* if < 127 then we can put an ASCII */ | |
suffix [i] = i; /* char in suffix */ | |
else | |
suffix [i] = END; /* otherwise suffix = fff */ | |
prefix [i] = link [i] = head [i] = END; | |
} | |
} | |
/*--------------------------------------------------------------------------*/ | |
/* end of clear_table */ | |
/*==========================================================================*/ | |
/* putcode */ | |
/* */ | |
/* Outputs to standard out an output code. */ | |
/*--------------------------------------------------------------------------*/ | |
static void putcode (outcode) | |
short int outcode; | |
{ | |
if (aflag == TRUE) | |
{ printf ("%03x ", outcode); | |
out_bytes ++; | |
} | |
else | |
{ if (pair == TRUE) | |
{ pcode[2] |= outcode << 4; | |
pcode[3] = outcode >> 4; | |
printf ("%c%c%c", pcode[1],pcode[2],pcode[3]); | |
out_bytes += CODE_SIZE; | |
pair = FALSE; | |
} | |
else | |
{ pcode[1] = outcode; | |
pcode[2] = outcode >> 8; | |
pair = TRUE; | |
} | |
} | |
return; | |
} | |
/*--------------------------------------------------------------------------*/ | |
/* end of putcode */ | |
/*==========================================================================*/ | |
/* getcode */ | |
/*--------------------------------------------------------------------------*/ | |
static short int getcode () | |
{ static short int incode; | |
if (aflag == TRUE) | |
{ fscanf (stdin, "%03x ", &incode); | |
in_bytes ++; | |
} | |
else | |
{ if (pair == TRUE) /* then were on the 2nd code */ | |
{ incode = ((pcode[2] >> 4) & 0x000F) | (pcode[3] << 4) & END; | |
pair = FALSE; | |
} | |
else /* we need to read in a packed code */ | |
{ fscanf (stdin, "%c%c%c", &pcode[1],&pcode[2],&pcode[3]); | |
incode = ((pcode[2] << 8) | (pcode[1] & 0x00FF)) & END; | |
in_bytes += CODE_SIZE; | |
pair = TRUE; | |
} | |
} | |
return (incode); | |
} | |
/*--------------------------------------------------------------------------*/ | |
/* end of getcode */ | |
/*==========================================================================*/ | |
/* verbose */ | |
/*--------------------------------------------------------------------------*/ | |
static void verbose () | |
{ static float ratio; | |
ratio = ((out_bytes/in_bytes)*100.); | |
elapsed = time ((long*) 0) - elapsed; | |
fprintf (stderr, "\nCompression ratio = %6.2f%%\n", ratio); | |
fprintf (stderr, "Input bytes = %6.0f\n", in_bytes); | |
fprintf (stderr, "Output bytes = %6.0f\n", out_bytes); | |
fprintf (stderr, "Cleared string table %d times.\n", c_table_count); | |
fprintf (stderr, "Elapsed time = %d seconds.\n", elapsed); | |
fprintf (stderr, "\n\n"); | |
} | |
/*--------------------------------------------------------------------------*/ | |
/* end of verbose */ | |
/*--------------------------------------------------------------------------*/ | |
/*==========================================================================*/ | |
/* END OF COMPRESS / UNCOMPRESS (C) KindelCo Software Systems, 1987 */ | |
/*==========================================================================*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment