Last active
November 26, 2024 16:39
-
-
Save oldcai/7230548 to your computer and use it in GitHub Desktop.
zlib vs lz4 vs lzma vs zstd compression
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import requests | |
import zlib | |
#!pip install lz4 pylzma zstd | |
import lz4.block | |
import pylzma as lzma | |
import zstd | |
def measure_time_and_compress_decompress(compress_func, decompress_func, data, *args): | |
# Measure compression time | |
start_compress_time = time.time() | |
compressed_data = compress_func(data, *args) if args else compress_func(data) | |
compress_time = time.time() - start_compress_time | |
# Measure decompression time | |
start_decompress_time = time.time() | |
decompress_func(compressed_data, *args) if args else decompress_func(compressed_data) | |
decompress_time = time.time() - start_decompress_time | |
return compressed_data, compress_time, decompress_time | |
def analyze_compression(name, compressed_data, original_len, elapsed_time, decompress_time): | |
compressed_len = len(compressed_data) | |
ratio = compressed_len / original_len | |
compress_speed = original_len / elapsed_time / 1024 ** 2 # Convert to MByte/s | |
decompress_speed = original_len / decompress_time / 1024 ** 2 # Convert to MByte/s | |
print(f"{name}:") | |
print(f" Compressed len: {compressed_len}, Compression ratio: {ratio:.5f}") | |
print(f" Compression time: {elapsed_time:.5f}s, Speed: {compress_speed:.2f} MByte/s") | |
print(f" Decompression time: {decompress_time:.5f}s, Speed: {decompress_speed:.2f} MByte/s") | |
def analysis_ratios(source): | |
len_source = len(source) | |
print("Source len:", len_source) | |
# Zlib | |
zlib_compressed, zlib_time, zlib_decompress_time = measure_time_and_compress_decompress(zlib.compress, zlib.decompress, source) | |
analyze_compression("Zlib", zlib_compressed, len_source, zlib_time, zlib_decompress_time) | |
# LZ4 | |
lz4_compressed, lz4_time, lz4_decompress_time = measure_time_and_compress_decompress(lz4.block.compress, lz4.block.decompress, source) | |
analyze_compression("LZ4", lz4_compressed, len_source, lz4_time, lz4_decompress_time) | |
# LZMA | |
lzma_compressed, lzma_time, lzma_decompress_time = measure_time_and_compress_decompress(lzma.compress, lzma.decompress, source) | |
analyze_compression("LZMA", lzma_compressed, len_source, lzma_time, lzma_decompress_time) | |
# Zstd level 3 | |
zstd_compressed_lvl3, zstd_time_lvl3, zstd_decompress_time_lvl3 = measure_time_and_compress_decompress(lambda x: zstd.compress(x, 3), zstd.decompress, source) | |
analyze_compression("Zstd level 3", zstd_compressed_lvl3, len_source, zstd_time_lvl3, zstd_decompress_time_lvl3) | |
# Zstd level 19 | |
zstd_compressed_lvl19, zstd_time_lvl19, zstd_decompress_time_lvl19 = measure_time_and_compress_decompress(lambda x: zstd.compress(x, 19), zstd.decompress, source) | |
analyze_compression("Zstd level 19", zstd_compressed_lvl19, len_source, zstd_time_lvl19, zstd_decompress_time_lvl19) | |
print("Testing with a million repeated characters:") | |
analysis_ratios(b"a" * 1000000) | |
print("-" * 50) | |
print("Testing in the real world:") | |
def get_website_content(url): | |
try: | |
response = requests.get(url, headers={"user-agent": "Mozilla/5.0"}) | |
return response.content | |
except requests.RequestException as e: | |
print(f"Error fetching {url}: {e}") | |
return None | |
test_str_in_real_world = get_website_content("https://www.163.com/") | |
if test_str_in_real_world: | |
analysis_ratios(test_str_in_real_world) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Testing with a million repeated characters: | |
Source len: 1000000 | |
Zlib: | |
Compressed len: 992, Compression ratio: 0.00099 | |
Compression time: 0.00490s, Speed: 194.72 MByte/s | |
Decompression time: 0.00278s, Speed: 343.41 MByte/s | |
LZ4: | |
Compressed len: 3936, Compression ratio: 0.00394 | |
Compression time: 0.00011s, Speed: 8565.31 MByte/s | |
Decompression time: 0.00059s, Speed: 1628.00 MByte/s | |
LZMA: | |
Compressed len: 221, Compression ratio: 0.00022 | |
Compression time: 0.10568s, Speed: 9.02 MByte/s | |
Decompression time: 0.00239s, Speed: 398.60 MByte/s | |
Zstd level 1: | |
Compressed len: 51, Compression ratio: 0.00005 | |
Compression time: 0.00116s, Speed: 821.52 MByte/s | |
Decompression time: 0.00007s, Speed: 13245.03 MByte/s | |
Zstd level 3: | |
Compressed len: 50, Compression ratio: 0.00005 | |
Compression time: 0.00111s, Speed: 858.37 MByte/s | |
Decompression time: 0.00027s, Speed: 3530.45 MByte/s | |
Zstd level 19: | |
Compressed len: 49, Compression ratio: 0.00005 | |
Compression time: 0.00621s, Speed: 153.63 MByte/s | |
Decompression time: 0.00013s, Speed: 7421.15 MByte/s | |
Zstd level 22: | |
Compressed len: 49, Compression ratio: 0.00005 | |
Compression time: 0.00151s, Speed: 632.21 MByte/s | |
Decompression time: 0.00013s, Speed: 7220.22 MByte/s | |
-------------------------------------------------- | |
Testing in the real world: | |
Source len: 555360 | |
Zlib: | |
Compressed len: 90642, Compression ratio: 0.16321 | |
Compression time: 0.00817s, Speed: 64.86 MByte/s | |
Decompression time: 0.00117s, Speed: 452.52 MByte/s | |
LZ4: | |
Compressed len: 125840, Compression ratio: 0.22659 | |
Compression time: 0.00072s, Speed: 733.15 MByte/s | |
Decompression time: 0.00019s, Speed: 2735.76 MByte/s | |
LZMA: | |
Compressed len: 67894, Compression ratio: 0.12225 | |
Compression time: 0.18124s, Speed: 2.92 MByte/s | |
Decompression time: 0.00393s, Speed: 134.92 MByte/s | |
Zstd level 1: | |
Compressed len: 95823, Compression ratio: 0.17254 | |
Compression time: 0.00172s, Speed: 308.19 MByte/s | |
Decompression time: 0.00055s, Speed: 958.76 MByte/s | |
Zstd level 3: | |
Compressed len: 88492, Compression ratio: 0.15934 | |
Compression time: 0.00218s, Speed: 243.02 MByte/s | |
Decompression time: 0.00055s, Speed: 960.42 MByte/s | |
Zstd level 19: | |
Compressed len: 73132, Compression ratio: 0.13168 | |
Compression time: 0.17033s, Speed: 3.11 MByte/s | |
Decompression time: 0.00053s, Speed: 996.61 MByte/s | |
Zstd level 22: | |
Compressed len: 73149, Compression ratio: 0.13171 | |
Compression time: 0.21461s, Speed: 2.47 MByte/s | |
Decompression time: 0.00053s, Speed: 990.39 MByte/s |
thanks for this!
163.com now returns a brief error when the default python requests user-agent is used, but you can fix that like this:
test_str_in_real_world = requests.get("https://www.163.com/",headers={"user-agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/536.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/535.36"}).content
how to run this one in linux
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks !