Skip to content

Instantly share code, notes, and snippets.

@giuseppe998e
Created August 9, 2021 13:18
Show Gist options
  • Save giuseppe998e/5d5f500c34f09d88813a20b6fdb9a21a to your computer and use it in GitHub Desktop.
Save giuseppe998e/5d5f500c34f09d88813a20b6fdb9a21a to your computer and use it in GitHub Desktop.
A simple compression test of a book in TXT UTF-8 format
#!/bin/python3
import lzma
import gzip
import deflate #pip install deflate
import brotli #pip install brotlipy
# From https://www.gutenberg.org/files/1342/1342-0.txt
BOOK_NAME = "pride_and_prejudice-jane_austen.txt"
def compress_brotli(txt_bytes):
bytes_ = brotli.compress(txt_bytes, brotli.BrotliEncoderMode.TEXT, 11) # 11 is maximum compression rate
print_datasize("Brotli", bytes_)
def compress_lzma(txt_bytes):
bytes_ = lzma.compress(txt_bytes, format=lzma.FORMAT_ALONE, preset=9) # 9 is maximum compression rate
print_datasize("LZMA", bytes_)
def compress_deflategzip(txt_bytes):
bytes_ = deflate.gzip_compress(txt_bytes, 12) # 12 is maximum compression rate
print_datasize("Deflate+GZip", bytes_)
def compress_gzip(txt_bytes):
bytes_ = gzip.compress(txt_bytes)
print_datasize("GZip", bytes_)
def print_datasize(algo, bytes_):
bytes_count = len(bytes_) / 1000
print(f"Size using {algo}: {bytes_count:.3f}KB")
### MAIN ###
with open(BOOK_NAME, "rb") as book:
txt_bytes = book.read()
# Print uncompressed file size
print_datasize("NO COMPRESSION", txt_bytes)
# Test compression algorithms
compress_brotli(txt_bytes) # 1°
compress_lzma(txt_bytes) # 2°
compress_deflategzip(txt_bytes) # 3°
compress_gzip(txt_bytes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment