Skip to content

Instantly share code, notes, and snippets.

@padenot
Created February 10, 2025 15:51
Show Gist options
  • Save padenot/b8008ccbb700aecf24bbdcb33d8b8a05 to your computer and use it in GitHub Desktop.
Save padenot/b8008ccbb700aecf24bbdcb33d8b8a05 to your computer and use it in GitHub Desktop.
compression bench
#!/bin/bash
OUTPUT_DIR="compressed_output"
FILES=("model.enet.intgemm.alphas.bin" "model.jaen.intgemm.alphas.bin")
LEVELS=(1 3 9)
ZSTD_LEVELS=(1 5 19)
mkdir -p "$OUTPUT_DIR"
for file in "${FILES[@]}"; do
for level in "${LEVELS[@]}"; do
bzip2 -k -z -"$level" -c "$file" > "$OUTPUT_DIR/${file}.bz2.$level"
xz -k -"$level" -c "$file" > "$OUTPUT_DIR/${file}.xz.$level"
zip -"$level" "$OUTPUT_DIR/${file}.zip.$level" "$file"
lz4 -"$level" -c "$file" > "$OUTPUT_DIR/${file}.lz4.$level"
done
for level in "${ZSTD_LEVELS[@]}"; do
zstd -"$level" -c "$file" > "$OUTPUT_DIR/${file}.zst.$level"
done
done
#!/bin/bash -ex
OUTPUT_DIR="compressed_output"
RESULTS_DIR="benchmark_results"
RESULTS_FILE="$RESULTS_DIR/decompression_results.json"
FILES=("model.enet.intgemm.alphas.bin" "model.jaen.intgemm.alphas.bin")
LEVELS=(1 3 9)
ZSTD_LEVELS=(1 5 19)
mkdir -p "$RESULTS_DIR"
COMMANDS=()
for file in "${FILES[@]}"; do
for level in "${LEVELS[@]}"; do
COMMANDS+=("bzip2 -d -c $OUTPUT_DIR/${file}.bz2.$level > /dev/null")
COMMANDS+=("xz -d -c $OUTPUT_DIR/${file}.xz.$level > /dev/null")
COMMANDS+=("unzip -p $OUTPUT_DIR/${file}.zip.$level > /dev/null")
COMMANDS+=("lz4 -d -c $OUTPUT_DIR/${file}.lz4.$level > /dev/null")
done
for level in "${ZSTD_LEVELS[@]}"; do
COMMANDS+=("zstd -d -c $OUTPUT_DIR/${file}.zst.$level > /dev/null")
done
done
hyperfine --export-json "$RESULTS_FILE" "${COMMANDS[@]}"
import json
import re
import matplotlib.pyplot as plt
import numpy as np
import os
compressed_dir = "compressed_output"
file_sizes = {}
for file in os.listdir(compressed_dir):
match = re.search(r'model\.(.*?)\.intgemm\.alphas\.bin\.([a-z0-9]+)\.(\d+)', file)
if match:
model, compression, level = match.groups()
key = f"{compression}.{level}"
if model == "jaen":
key += ".jaen"
file_sizes[key] = os.path.getsize(os.path.join(compressed_dir, file))
with open('benchmark_results/decompression_results.json', 'r') as file:
data = json.load(file)
sizes = []
times = []
labels = []
commands = []
levels = []
for entry in data['results']:
match = re.search(r'\.([a-z0-9]+)\.([0-9]+)', entry['command'])
if match:
key = f"{match.group(1)}.{match.group(2)}"
if "jaen" in entry['command']:
key += ".jaen"
size = file_sizes.get(key, None)
if size:
sizes.append(size)
times.append(entry['mean'])
commands.append(entry['command'].split()[0])
levels.append(match.group(2))
labels.append(f"{entry['command'].split()[0]}-{match.group(2)}")
unique_commands = list(set(commands))
colors = plt.cm.tab10(np.linspace(0, 1, len(unique_commands)))
command_color_map = {cmd: colors[i] for i, cmd in enumerate(unique_commands)}
# Plot data
plt.figure(figsize=(10, 6))
for i in range(len(sizes)):
plt.scatter(sizes[i], times[i], color=command_color_map[commands[i]], label=commands[i] if commands[i] not in plt.gca().get_legend_handles_labels()[1] else "")
plt.annotate(labels[i], (sizes[i], times[i]), textcoords="offset points", xytext=(5,5), ha='right')
plt.xlabel('Compressed Size (bytes)')
plt.ylabel('Decompression Time (s)')
plt.title('Decompression Time vs Compressed Size')
plt.legend(title="Command")
plt.grid(True)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment