Last active
June 4, 2017 15:31
-
-
Save vivkin/e69995fbce7081436be6 to your computer and use it in GitHub Desktop.
Small LZ77 compressor. Uses variable length integers from xz for coding length\offset
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define LZ_IMPLEMENTATION | |
#include "lz.h" | |
#include <stdio.h> | |
int main(int argc, char **argv) { | |
uint8_t src[1 << 16]; | |
uint8_t dst[1 << 16]; | |
size_t size = fread(src, 1, sizeof(src), stdin); | |
if (argc > 1 && argv[1][0] == '-'&& argv[1][1] == 'd') | |
size = lz_decompress(dst, src, size); | |
else | |
size = lz_compress(dst, src, size); | |
fwrite(dst, 1, size, stdout); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef LZ_H | |
#define LZ_H | |
#include <stddef.h> | |
#include <stdint.h> | |
#if defined(LZ_STATIC) | |
#define LZDEF static | |
#elif defined(__cplusplus) | |
#define LZDEF extern "C" | |
#else | |
#define LZDEF extern | |
#endif | |
LZDEF size_t lz_compress(void *dst, const void *src, size_t n); | |
LZDEF size_t lz_decompress(void *dst, const void *src, size_t n); | |
#endif // LZ_H | |
#ifdef LZ_IMPLEMENTATION | |
enum { LZ_HASH_BITS = 14 }; | |
static inline uint32_t lz_read32(const void *src) { | |
return *(const uint32_t *)src; | |
} | |
static inline void lz_write32(void *dst, uint32_t value) { | |
*(uint32_t *)dst = value; | |
} | |
static inline size_t lz_pack32(void *dst, uint32_t value) { | |
uint8_t *d = (uint8_t *)dst; | |
while (value >= 0x80) { | |
*d++ = value | 0x80; | |
value >>= 7; | |
} | |
*d++ = value; | |
return d - (uint8_t *)dst; | |
} | |
static inline size_t lz_unpack32(const void *src, uint32_t *value) { | |
const uint8_t *s = (const uint8_t *)src; | |
*value = *s & 0x7F; | |
while (*s++ & 0x80) | |
*value |= (uint32_t)(*s & 0x7F) << ((s - (const uint8_t *)src) * 7); | |
return s - (const uint8_t *)src; | |
} | |
static inline void *lz_copy(void *dst, const void *src, size_t n) { | |
uint8_t *d = (uint8_t *)dst; | |
const uint8_t *s = (const uint8_t *)src; | |
for (; n >= 4; n -= 4, d += 4, s += 4) | |
lz_write32(d, lz_read32(s)); | |
while (n--) | |
*d++ = *s++; | |
return d; | |
} | |
static inline uint32_t lz_hash(uint32_t x) { | |
return (x * 2654435761u) >> (32 - LZ_HASH_BITS); | |
} | |
size_t lz_compress(void *dst, const void *src, size_t n) { | |
uint8_t *d = (uint8_t *)dst; | |
const uint8_t *s = (const uint8_t *)src; | |
const uint8_t *s_end = s + n; | |
const uint8_t *anchor = s; | |
uint32_t hash_table[1 << LZ_HASH_BITS] = {0}; | |
uint32_t next_h = lz_hash(lz_read32(++s)); | |
size_t length; | |
while (1) { | |
const uint8_t *ref; | |
const uint8_t *next_ip = s; | |
size_t step = 1 << 5; | |
do { | |
uint32_t h = next_h; | |
s = next_ip; | |
next_ip = s + (++step >> 5); | |
if (next_ip >= s_end - 4) { | |
goto remainder; | |
} | |
next_h = lz_hash(lz_read32(next_ip)); | |
ref = (const uint8_t *)src + hash_table[h]; | |
hash_table[h] = s - (const uint8_t *)src; | |
} while (lz_read32(s) != lz_read32(ref)); | |
// write literals | |
length = s - anchor; | |
if (length) { | |
d += lz_pack32(d, length); | |
d += lz_pack32(d, 0); | |
d = (uint8_t *)lz_copy(d, anchor, length); | |
} | |
// find match length | |
anchor = s; | |
do { | |
s += 4; | |
ref += 4; | |
uint32_t x = lz_read32(s) ^ lz_read32(ref); | |
if (x) { | |
uint32_t n = __builtin_ctz(x) >> 3; | |
s += n; | |
ref += n; | |
break; | |
} | |
} while (s < s_end - 4); | |
// fill table | |
hash_table[lz_hash(lz_read32(s - 3))] = s - (const uint8_t *)src - 3; | |
hash_table[lz_hash(lz_read32(s - 2))] = s - (const uint8_t *)src - 2; | |
hash_table[lz_hash(lz_read32(s - 1))] = s - (const uint8_t *)src - 1; | |
d += lz_pack32(d, s - anchor); | |
d += lz_pack32(d, s - ref); | |
anchor = s; | |
next_h = lz_hash(lz_read32(s)); | |
} | |
remainder: | |
length = s_end - anchor; | |
d += lz_pack32(d, length); | |
d += lz_pack32(d, 0); | |
d = (uint8_t *)lz_copy(d, anchor, length); | |
return d - (uint8_t *)dst; | |
} | |
size_t lz_decompress(void *dst, const void *src, size_t n) { | |
uint8_t *d = (uint8_t *)dst; | |
const uint8_t *s = (const uint8_t *)src; | |
const uint8_t *s_end = s + n; | |
uint32_t length; | |
uint32_t offset; | |
while (s != s_end) { | |
s += lz_unpack32(s, &length); | |
s += lz_unpack32(s, &offset); | |
if (offset) { | |
const uint8_t *ref = d - offset; | |
size_t dist = d - ref; | |
for (; dist < 4; dist = d - ref) { | |
lz_write32(d, lz_read32(ref)); | |
d += dist; | |
length -= dist; | |
} | |
d = (uint8_t *)lz_copy(d, ref, length); | |
} else { | |
d = (uint8_t *)lz_copy(d, s, length); | |
s += length; | |
} | |
} | |
return d - (uint8_t *)dst; | |
} | |
#endif // LZ_IMPLEMENTATION |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CC ?= cc | |
CFLAGS ?= -Wall -Wextra -std=c11 -O3 | |
BINDIR = bin | |
OUTNAME = lz | |
SOURCES = lz.c | |
OBJECTS = $(SOURCES:%.c=$(BINDIR)/%.o) | |
export V := false | |
export CMD_PREFIX := @ | |
ifeq ($(V),true) | |
CMD_PREFIX := | |
endif | |
all: dirs $(BINDIR)/$(OUTNAME) | |
@echo "Build finished" | |
dirs: | |
@echo "Creating directories" | |
@mkdir -p $(BINDIR) $(dir $(OBJECTS)) | |
clean: | |
@echo "Deleting binaries" | |
$(CMD_PREFIX)rm -f $(OBJECTS) $(BINDIR)/$(OUTNAME) | |
test: $(BINDIR)/$(OUTNAME) | |
@echo "Testing" | |
cat $(SOURCES) | md5 | |
cat $(SOURCES) | $(BINDIR)/$(OUTNAME) | $(BINDIR)/$(OUTNAME) -d | md5 | |
$(BINDIR)/%.o: %.c | |
@echo "Compiling: $< -> $@" | |
$(CMD_PREFIX)$(CC) $(CFLAGS) -c $< -o $@ | |
$(BINDIR)/$(OUTNAME): $(OBJECTS) | |
@echo "Linking: $@" | |
$(CMD_PREFIX)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) | |
.PHONY: all dirs clean test |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment