padenot · February 10, 2025 15:51
diff --git a/compress.sh b/compress.sh
 #!/bin/bash

 OUTPUT_DIR="compressed_output"
 FILES=("model.enet.intgemm.alphas.bin" "model.jaen.intgemm.alphas.bin")
 LEVELS=(1 3 9)
 ZSTD_LEVELS=(1 5 19)

 mkdir -p "$OUTPUT_DIR"

 for file in "${FILES[@]}"; do
    for level in "${LEVELS[@]}"; do
        bzip2 -k -z -"$level" -c "$file" > "$OUTPUT_DIR/${file}.bz2.$level"
        xz -k -"$level" -c "$file" > "$OUTPUT_DIR/${file}.xz.$level"
        zip -"$level" "$OUTPUT_DIR/${file}.zip.$level" "$file"
        lz4 -"$level" -c "$file" > "$OUTPUT_DIR/${file}.lz4.$level"
    done

    for level in "${ZSTD_LEVELS[@]}"; do
        zstd -"$level" -c "$file" > "$OUTPUT_DIR/${file}.zst.$level"
    done
 done
diff --git a/decompress.sh b/decompress.sh
 #!/bin/bash -ex

 OUTPUT_DIR="compressed_output"
 RESULTS_DIR="benchmark_results"
 RESULTS_FILE="$RESULTS_DIR/decompression_results.json"
 FILES=("model.enet.intgemm.alphas.bin" "model.jaen.intgemm.alphas.bin")
 LEVELS=(1 3 9)
 ZSTD_LEVELS=(1 5 19)

 mkdir -p "$RESULTS_DIR"
 COMMANDS=()

 for file in "${FILES[@]}"; do
    for level in "${LEVELS[@]}"; do
        COMMANDS+=("bzip2 -d -c $OUTPUT_DIR/${file}.bz2.$level > /dev/null")
        COMMANDS+=("xz -d -c $OUTPUT_DIR/${file}.xz.$level > /dev/null")
        COMMANDS+=("unzip -p $OUTPUT_DIR/${file}.zip.$level > /dev/null")
        COMMANDS+=("lz4 -d -c $OUTPUT_DIR/${file}.lz4.$level > /dev/null")
    done

    for level in "${ZSTD_LEVELS[@]}"; do
        COMMANDS+=("zstd -d -c $OUTPUT_DIR/${file}.zst.$level > /dev/null")
    done
 done

 hyperfine --export-json "$RESULTS_FILE" "${COMMANDS[@]}"
diff --git a/plot.py b/plot.py
 import json
 import re
 import matplotlib.pyplot as plt
 import numpy as np
 import os

 compressed_dir = "compressed_output"

 file_sizes = {}
 for file in os.listdir(compressed_dir):
    match = re.search(r'model\.(.*?)\.intgemm\.alphas\.bin\.([a-z0-9]+)\.(\d+)', file)
    if match:
        model, compression, level = match.groups()
        key = f"{compression}.{level}"
        if model == "jaen":
            key += ".jaen"
        file_sizes[key] = os.path.getsize(os.path.join(compressed_dir, file))

 with open('benchmark_results/decompression_results.json', 'r') as file:
    data = json.load(file)

 sizes = []
 times = []
 labels = []
 commands = []
 levels = []

 for entry in data['results']:
    match = re.search(r'\.([a-z0-9]+)\.([0-9]+)', entry['command'])
    if match:
        key = f"{match.group(1)}.{match.group(2)}"
        if "jaen" in entry['command']:
            key += ".jaen"
        size = file_sizes.get(key, None)
        if size:
            sizes.append(size)
            times.append(entry['mean'])
            commands.append(entry['command'].split()[0])
            levels.append(match.group(2))
            labels.append(f"{entry['command'].split()[0]}-{match.group(2)}")

 unique_commands = list(set(commands))
 colors = plt.cm.tab10(np.linspace(0, 1, len(unique_commands)))
 command_color_map = {cmd: colors[i] for i, cmd in enumerate(unique_commands)}

 # Plot data
 plt.figure(figsize=(10, 6))
 for i in range(len(sizes)):
    plt.scatter(sizes[i], times[i], color=command_color_map[commands[i]], label=commands[i] if commands[i] not in plt.gca().get_legend_handles_labels()[1] else "")
    plt.annotate(labels[i], (sizes[i], times[i]), textcoords="offset points", xytext=(5,5), ha='right')

 plt.xlabel('Compressed Size (bytes)')
 plt.ylabel('Decompression Time (s)')
 plt.title('Decompression Time vs Compressed Size')
 plt.legend(title="Command")
 plt.grid(True)
 plt.show()
	#!/bin/bash

	OUTPUT_DIR="compressed_output"
	FILES=("model.enet.intgemm.alphas.bin" "model.jaen.intgemm.alphas.bin")
	LEVELS=(1 3 9)
	ZSTD_LEVELS=(1 5 19)

	mkdir -p "$OUTPUT_DIR"

	for file in "${FILES[@]}"; do
	for level in "${LEVELS[@]}"; do
	bzip2 -k -z -"$level" -c "$file" > "$OUTPUT_DIR/${file}.bz2.$level"
	xz -k -"$level" -c "$file" > "$OUTPUT_DIR/${file}.xz.$level"
	zip -"$level" "$OUTPUT_DIR/${file}.zip.$level" "$file"
	lz4 -"$level" -c "$file" > "$OUTPUT_DIR/${file}.lz4.$level"
	done

	for level in "${ZSTD_LEVELS[@]}"; do
	zstd -"$level" -c "$file" > "$OUTPUT_DIR/${file}.zst.$level"
	done
	done
	#!/bin/bash -ex

	OUTPUT_DIR="compressed_output"
	RESULTS_DIR="benchmark_results"
	RESULTS_FILE="$RESULTS_DIR/decompression_results.json"
	FILES=("model.enet.intgemm.alphas.bin" "model.jaen.intgemm.alphas.bin")
	LEVELS=(1 3 9)
	ZSTD_LEVELS=(1 5 19)

	mkdir -p "$RESULTS_DIR"
	COMMANDS=()

	for file in "${FILES[@]}"; do
	for level in "${LEVELS[@]}"; do
	COMMANDS+=("bzip2 -d -c $OUTPUT_DIR/${file}.bz2.$level > /dev/null")
	COMMANDS+=("xz -d -c $OUTPUT_DIR/${file}.xz.$level > /dev/null")
	COMMANDS+=("unzip -p $OUTPUT_DIR/${file}.zip.$level > /dev/null")
	COMMANDS+=("lz4 -d -c $OUTPUT_DIR/${file}.lz4.$level > /dev/null")
	done

	for level in "${ZSTD_LEVELS[@]}"; do
	COMMANDS+=("zstd -d -c $OUTPUT_DIR/${file}.zst.$level > /dev/null")
	done
	done

	hyperfine --export-json "$RESULTS_FILE" "${COMMANDS[@]}"
	import json
	import re
	import matplotlib.pyplot as plt
	import numpy as np
	import os

	compressed_dir = "compressed_output"

	file_sizes = {}
	for file in os.listdir(compressed_dir):
	match = re.search(r'model\.(.*?)\.intgemm\.alphas\.bin\.([a-z0-9]+)\.(\d+)', file)
	if match:
	model, compression, level = match.groups()
	key = f"{compression}.{level}"
	if model == "jaen":
	key += ".jaen"
	file_sizes[key] = os.path.getsize(os.path.join(compressed_dir, file))

	with open('benchmark_results/decompression_results.json', 'r') as file:
	data = json.load(file)

	sizes = []
	times = []
	labels = []
	commands = []
	levels = []

	for entry in data['results']:
	match = re.search(r'\.([a-z0-9]+)\.([0-9]+)', entry['command'])
	if match:
	key = f"{match.group(1)}.{match.group(2)}"
	if "jaen" in entry['command']:
	key += ".jaen"
	size = file_sizes.get(key, None)
	if size:
	sizes.append(size)
	times.append(entry['mean'])
	commands.append(entry['command'].split()[0])
	levels.append(match.group(2))
	labels.append(f"{entry['command'].split()[0]}-{match.group(2)}")

	unique_commands = list(set(commands))
	colors = plt.cm.tab10(np.linspace(0, 1, len(unique_commands)))
	command_color_map = {cmd: colors[i] for i, cmd in enumerate(unique_commands)}

	# Plot data
	plt.figure(figsize=(10, 6))
	for i in range(len(sizes)):
	plt.scatter(sizes[i], times[i], color=command_color_map[commands[i]], label=commands[i] if commands[i] not in plt.gca().get_legend_handles_labels()[1] else "")
	plt.annotate(labels[i], (sizes[i], times[i]), textcoords="offset points", xytext=(5,5), ha='right')

	plt.xlabel('Compressed Size (bytes)')
	plt.ylabel('Decompression Time (s)')
	plt.title('Decompression Time vs Compressed Size')
	plt.legend(title="Command")
	plt.grid(True)
	plt.show()