Created
August 9, 2021 13:18
-
-
Save giuseppe998e/5d5f500c34f09d88813a20b6fdb9a21a to your computer and use it in GitHub Desktop.
A simple compression test of a book in TXT UTF-8 format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/python3 | |
import lzma | |
import gzip | |
import deflate #pip install deflate | |
import brotli #pip install brotlipy | |
# From https://www.gutenberg.org/files/1342/1342-0.txt | |
BOOK_NAME = "pride_and_prejudice-jane_austen.txt" | |
def compress_brotli(txt_bytes): | |
bytes_ = brotli.compress(txt_bytes, brotli.BrotliEncoderMode.TEXT, 11) # 11 is maximum compression rate | |
print_datasize("Brotli", bytes_) | |
def compress_lzma(txt_bytes): | |
bytes_ = lzma.compress(txt_bytes, format=lzma.FORMAT_ALONE, preset=9) # 9 is maximum compression rate | |
print_datasize("LZMA", bytes_) | |
def compress_deflategzip(txt_bytes): | |
bytes_ = deflate.gzip_compress(txt_bytes, 12) # 12 is maximum compression rate | |
print_datasize("Deflate+GZip", bytes_) | |
def compress_gzip(txt_bytes): | |
bytes_ = gzip.compress(txt_bytes) | |
print_datasize("GZip", bytes_) | |
def print_datasize(algo, bytes_): | |
bytes_count = len(bytes_) / 1000 | |
print(f"Size using {algo}: {bytes_count:.3f}KB") | |
### MAIN ### | |
with open(BOOK_NAME, "rb") as book: | |
txt_bytes = book.read() | |
# Print uncompressed file size | |
print_datasize("NO COMPRESSION", txt_bytes) | |
# Test compression algorithms | |
compress_brotli(txt_bytes) # 1° | |
compress_lzma(txt_bytes) # 2° | |
compress_deflategzip(txt_bytes) # 3° | |
compress_gzip(txt_bytes) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment