Last active
September 17, 2025 17:14
-
-
Save MaskRay/74cdaa83c1f44ee105fcebcdff0ba9a7 to your computer and use it in GitHub Desktop.
https://maskray.me/blog/2025-08-31-benchmarking-compression-programs program, distributed under the terms of both the MIT license and the Apache License (Version 2.0)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# This program downloads and builds several compression utilities, benchmarks their compression and decompression | |
# performance on a specific input file including memory consumption, and finally generates HTML charts. | |
require 'benchmark' | |
require 'digest' | |
require 'etc' | |
require 'fileutils' | |
require 'json' | |
require 'net/http' | |
require 'optparse' | |
require 'tempfile' | |
require 'uri' | |
require 'open3' | |
JOBS = Etc.nprocessors | |
# Defaults: supports_threading: true | |
COMPRESSORS = { | |
'brotli' => { | |
url: 'https://github.com/google/brotli/archive/refs/tags/v1.1.0.tar.gz', | |
build_dir: 'brotli-1.1.0', | |
build_commands: ['cmake -GNinja -S. -Bout -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DBROTLI_DISABLE_TESTS=on -DCMAKE_C_FLAGS="-march=native"', 'ninja -C out install'], | |
levels: [1, 3, 5, 7, 9, 11], | |
compress: ->exe, lvl, i, o, thr { "#{exe} -c -q #{lvl} '#{i}' > '#{o}'" }, | |
decompress: ->exe, i, o, thr { "#{exe} -d -c '#{i}' > '#{o}'" }, | |
supports_threading: false | |
}, | |
'bzip3' => { | |
url: 'https://github.com/kspalaiologos/bzip3/releases/download/1.5.3/bzip3-1.5.3.tar.gz', | |
build_dir: 'bzip3-1.5.3', | |
build_commands: ['./configure --prefix=$PWD/install CFLAGS="-O3 -march=native"', "make -j #{JOBS} install"], | |
levels: [1], | |
compress: ->exe, lvl, i, o, thr { "#{exe} -j#{thr} -c '#{i}' > '#{o}'" }, | |
decompress: ->exe, i, o, thr { "#{exe} -j#{thr} -d -c '#{i}' > '#{o}'" }, | |
}, | |
'kanzi' => { | |
url: 'https://github.com/flanglet/kanzi-cpp/archive/refs/tags/2.4.0.tar.gz', | |
build_dir: 'kanzi-cpp-2.4.0', | |
build_commands: ['cmake -GNinja -Ssrc -Binstall/bin -DCMAKE_CXX_FLAGS="-march=native"', 'ninja -C install/bin kanzi'], | |
levels: 1..9, | |
compress: ->exe, lvl, i, o, thr { "#{exe} -c -v 0 -j #{thr} -l #{lvl} -f -i '#{i}' -o '#{o}'" }, | |
decompress: ->exe, i, o, thr { "#{exe} -d -j #{thr} -v 0 -f -i '#{i}' -o '#{o}'" }, | |
}, | |
'lz4' => { | |
url: 'https://github.com/lz4/lz4/releases/download/v1.10.0/lz4-1.10.0.tar.gz', | |
build_dir: 'lz4-1.10.0', | |
build_commands: ["make -j #{JOBS} CFLAGS='-O3 -march=native' PREFIX=$PWD/install install"], | |
levels: 1..12, | |
compress: ->exe, lvl, i, o, thr { "#{exe} -q -T#{thr} -c -#{lvl} '#{i}' > '#{o}'" }, | |
decompress: ->exe, i, o, thr { "#{exe} -d -q -T#{thr} -c '#{i}' > '#{o}'" }, | |
}, | |
'lzham' => { | |
url: 'https://github.com/MaskRay/lzham_codec/archive/refs/heads/cmake.zip', | |
source_filename: 'lzham-master.zip', | |
build_dir: 'lzham_codec-cmake', | |
build_commands: ['cmake -GNinja -S. -Bout -DCMAKE_CXX_FLAGS="-march=native"', 'ninja -C out', 'mkdir -p install/bin && rsync -a out/lzhamtest/lzhamtest install/bin/lzham'], | |
levels: 1..4, | |
compress: ->exe, lvl, i, o, thr { "#{exe} -m#{lvl} -t#{thr} c '#{i}' '#{o}'" }, | |
decompress: ->exe, i, o, thr { "#{exe} -t#{thr} d '#{i}' '#{o}'" }, | |
}, | |
'xz' => { | |
url: 'https://tukaani.org/xz/xz-5.8.1.tar.gz', | |
build_dir: 'xz-5.8.1', | |
build_commands: ['./configure --prefix=$PWD/install CFLAGS="-O3 -march=native"', "make -j #{JOBS} install"], | |
levels: [*1..6, 9], | |
compress: ->exe, lvl, i, o, thr { "#{exe} -#{lvl} -T#{thr} -c '#{i}' > '#{o}'" }, | |
decompress: ->exe, i, o, thr { "#{exe} -d -T#{thr} -c '#{i}' > '#{o}'" }, | |
}, | |
'zpaq' => { | |
url: 'https://github.com/zpaq/zpaq/archive/refs/heads/master.zip', | |
source_filename: 'zpaq-master.zip', | |
build_dir: 'zpaq-master', | |
build_commands: ["make -j #{JOBS} PREFIX=install CXXFLAGS='-O3 -march=native' install"], | |
levels: 1..4, | |
extension: '.zpaq', | |
compress: ->exe, lvl, i, o, thr { "#{exe} a '#{o}' '#{i}' -m#{lvl} -t#{thr}" }, | |
# Decompress to tmp_zpaq, then rename to #{o}. | |
decompress: ->exe, i, o, thr { "#{exe} x '#{i}' -t#{thr} -to tmp_zpaq" }, | |
decompress_dir: 'tmp_zpaq', | |
}, | |
'zstd' => { | |
url: 'https://github.com/facebook/zstd/releases/download/v1.5.7/zstd-1.5.7.tar.gz', | |
build_dir: 'zstd-1.5.7', | |
build_commands: ['cmake -GNinja -Sbuild/cmake -Bout -DCMAKE_INSTALL_PREFIX=install -DCMAKE_C_FLAGS="-march=native"', 'ninja -C out install'], | |
levels: [*(1..6), 9, 13, 16, 19], | |
compress: ->exe, lvl, i, o, thr { "#{exe} -q -T#{thr} -#{lvl} -c '#{i}' > '#{o}'" }, | |
decompress: ->exe, i, o, thr { "#{exe} -d -q -T#{thr} -c '#{i}' > '#{o}'" }, | |
}, | |
} | |
class MemoryMonitor | |
def self.measure_command(cmd) | |
begin | |
if RUBY_PLATFORM.include?('linux') | |
return measure_with_time_linux(cmd) | |
elsif RUBY_PLATFORM.include?('darwin') | |
return measure_with_time_macos(cmd) | |
else | |
raise 0 | |
end | |
rescue => e | |
# Fallback: run without memory measurement | |
{ success: system(cmd, out: File::NULL, err: File::NULL), memory_kb: nil } | |
end | |
end | |
private | |
def self.measure_with_time_linux(cmd) | |
# Use GNU time -v to get max RSS in KB. | |
time_cmd = "/usr/bin/time -v sh -c '#{cmd}' 2>&1" | |
output, status = Open3.capture2e(time_cmd) | |
if status.success? | |
# Parse "Maximum resident set size" from verbose output | |
if match = output.match(/Maximum resident set size \(kbytes\):\s*(\d+)/) | |
memory_kb = match[1].to_i | |
{ success: true, memory_kb: memory_kb > 0 ? memory_kb : nil } | |
else | |
{ success: true, memory_kb: nil } | |
end | |
else | |
{ success: false, memory_kb: nil } | |
end | |
end | |
def self.measure_with_time_macos(cmd) | |
time_cmd = "/usr/bin/time -l #{cmd} 2>&1" | |
output, status = Open3.capture2e(time_cmd) | |
if status.success? | |
# Parse "maximum resident set size" from output (in bytes on macOS) | |
if match = output.match(/(\d+)\s+maximum resident set size/) | |
memory_bytes = match[1].to_i | |
memory_kb = memory_bytes / 1024 | |
{ success: true, memory_kb: memory_kb > 0 ? memory_kb : nil } | |
else | |
{ success: true, memory_kb: nil } | |
end | |
else | |
{ success: false, memory_kb: nil } | |
end | |
end | |
end | |
class CompressorBuilder | |
def initialize(work_dir) | |
@work_dir = File.expand_path(work_dir) | |
FileUtils.mkdir_p(@work_dir) | |
end | |
def build_all | |
COMPRESSORS.each do |name, config| | |
bin_dir = File.join(@work_dir, config[:build_dir], 'install/bin') | |
if begin !Dir.empty?(bin_dir) rescue false end | |
puts "✓ #{name} already built" | |
else | |
download_and_extract(name, config) | |
build_compressor(name, config) | |
end | |
if Dir.exist?(bin_dir) | |
program = File.join(bin_dir, name) | |
unless File.exist? program | |
binaries = Dir.glob(File.join(bin_dir, '*')).select { |f| File.executable?(f) } | |
raise "Compressor not found, available executables: #{binaries.map { |b| File.basename(b) }.join(', ')}" | |
end | |
puts " Executable: #{program}" | |
config[:program] = program | |
end | |
end | |
end | |
private | |
def download_and_extract(name, config) | |
filename = config.fetch(:source_filename, File.basename(config[:url])) | |
filepath = File.join(@work_dir, filename) | |
unless File.exist?(filepath) | |
puts "Downloading #{name}..." | |
download_file(config[:url], filepath) | |
end | |
extract_path = File.join(@work_dir, config[:build_dir]) | |
unless Dir.exist?(extract_path) | |
puts "Extracting #{filename}..." | |
if filename.end_with? 'zip' | |
system("unzip", filepath, "-d", @work_dir) or | |
raise "Failed to extract #{filepath}" | |
else | |
system("tar", "-xf", filepath, "-C", @work_dir) or | |
raise "Failed to extract #{filepath}" | |
end | |
end | |
end | |
def download_file(url, filepath) | |
uri = URI(url) | |
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http| | |
request = Net::HTTP::Get.new(uri) | |
# Follow redirects | |
response = http.request(request) | |
case response | |
when Net::HTTPRedirection | |
download_file(response['location'], filepath) | |
return | |
when Net::HTTPSuccess | |
File.open(filepath, 'wb') { |f| f.write(response.body) } | |
else | |
raise "Failed to download #{url}: #{response.code} #{response.message}" | |
end | |
end | |
end | |
def build_compressor(name, config) | |
build_path = File.join(@work_dir, config[:build_dir]) | |
unless Dir.exist?(build_path) | |
raise "Build directory not found: #{build_path}" | |
end | |
puts "Building #{name}..." | |
Dir.chdir(build_path) do | |
config[:build_commands].each do |command| | |
puts "Running: #{command}" | |
success = system(command) | |
unless success | |
raise "Build command failed: #{command}" | |
end | |
end | |
end | |
end | |
end | |
class CompressorBenchmark | |
def initialize(input_file, options) | |
@input_file = input_file | |
@invalidate_cache = options[:invalidate_cache] | |
@test_threading = options[:test_threading] | |
@thread_counts = options[:thread_counts] | |
unless File.exist?(@input_file) | |
puts "Error: Input file '#{@input_file}' not found!" | |
exit 1 | |
end | |
puts "Input file: #{@input_file}" | |
@input_size = File.size(@input_file) | |
@input_hash = Digest::SHA256.file(@input_file).hexdigest[0..16] | |
@cache_file = "cache_#{File.basename(@input_file)}_#{@input_hash}.json" | |
puts "File size: #{@input_size} bytes" | |
puts "Cache file: #{@cache_file}" | |
puts "Thread counts to test: #{@thread_counts.join(', ')}" if @test_threading | |
puts "Memory measurement: enabled (default)" | |
if @invalidate_cache && File.exist?(@cache_file) | |
File.delete(@cache_file) | |
puts "Cache invalidated." | |
end | |
# Load cached results | |
@results = {} | |
if File.exist?(@cache_file) | |
begin | |
cached_data = JSON.parse(File.read(@cache_file)) | |
@results = cached_data.transform_values do |compressor_data| | |
compressor_data.map { |point| point.transform_keys(&:to_sym) } | |
end | |
puts "Loaded cached results for #{@results.keys.join(', ')}" | |
rescue JSON::ParserError | |
puts "Warning: Corrupted cache file, starting fresh" | |
end | |
end | |
puts | |
end | |
def save_cache | |
File.open(@cache_file, 'w') do |f| | |
f.write(JSON.pretty_generate(@results)) | |
end | |
end | |
def run_benchmark | |
COMPRESSORS.each do |name, config| | |
puts "Testing #{name}..." | |
program = config[:program] | |
compress_method = config[:compress] | |
decompress_method = config[:decompress] | |
supports_threading = config.fetch(:supports_threading, true) | |
executed = false | |
@results[name] ||= [] | |
thread_counts_to_test = @test_threading && supports_threading ? @thread_counts : [1] | |
config[:levels].each do |level| | |
thread_counts_to_test.each do |threads| | |
cache_key = { level: level, threads: threads } | |
if @results.key?(name) && @results[name].any? { |x| | |
x[:level] == level && x[:threads] == threads && | |
x[:compress_memory_mb] && x[:decompress_memory_mb] | |
} | |
next | |
end | |
thread_info = threads > 1 ? " (#{threads} threads)" : "" | |
print " Level #{level}#{thread_info}... " | |
executed = true | |
compressed_size = nil | |
compress_time = nil | |
decompress_time = nil | |
compress_memory_kb = nil | |
decompress_memory_kb = nil | |
# Test compression. zpaq requires the .zpaq extension name. | |
Tempfile.create(config.key?(:extension) ? ['compressed', config[:extension]] : 'compressed') do |compressed_file| | |
cmd = compress_method.call(program, level, @input_file, compressed_file.path, threads) | |
compress_time = Benchmark.realtime do | |
result = MemoryMonitor.measure_command(cmd) | |
unless result[:success] | |
puts "Compression failed: #{cmd}" | |
exit 2 | |
end | |
compress_memory_kb = result[:memory_kb] | |
end | |
compressed_size = File.size(compressed_file.path) | |
# Test decompression | |
Tempfile.create('decompressed') do |decompressed_file| | |
cmd = decompress_method.call(program, compressed_file.path, decompressed_file.path, threads) | |
decompress_time = Benchmark.realtime do | |
result = MemoryMonitor.measure_command(cmd) | |
unless result[:success] | |
puts "Decompression failed: #{cmd}" | |
exit 2 | |
end | |
decompress_memory_kb = result[:memory_kb] | |
end | |
if config.key?(:decompress_dir) | |
FileUtils.mv File.join(config[:decompress_dir], File.basename(@input_file)), decompressed_file.path | |
end | |
# Verify decompression | |
decompressed_size = File.size(decompressed_file.path) | |
if decompressed_size != @input_size | |
puts "Decompression verification failed: size mismatch (#{decompressed_size} != #{@input_size})" | |
exit 2 | |
end | |
end | |
end | |
compress_speed = @input_size * 1e-6 / compress_time | |
decompress_speed = @input_size * 1e-6 / decompress_time | |
compression_ratio = @input_size.to_f / compressed_size | |
result_entry = { | |
level: level, | |
threads: threads, | |
compress_speed: compress_speed, | |
decompress_speed: decompress_speed, | |
ratio: compression_ratio, | |
compressed_size: compressed_size, | |
supports_threading: supports_threading, | |
compress_memory_mb: compress_memory_kb ? compress_memory_kb / 1024.0 : nil, | |
decompress_memory_mb: decompress_memory_kb ? decompress_memory_kb / 1024.0 : nil | |
} | |
# Update or add the result | |
existing_index = @results[name].find_index { |x| x[:level] == level && x[:threads] == threads } | |
if existing_index | |
@results[name][existing_index].merge!(result_entry) | |
else | |
@results[name] << result_entry | |
end | |
output = "C: #{compress_time.round(3)}s (#{compress_speed.round(1)} MB/s)" | |
output += ", Mem: #{result_entry[:compress_memory_mb].round(1)} MB" if result_entry[:compress_memory_mb] | |
output += " | D: #{decompress_time.round(3)}s (#{decompress_speed.round(1)} MB/s)" | |
output += ", Mem: #{result_entry[:decompress_memory_mb].round(1)} MB" if result_entry[:decompress_memory_mb] | |
output += " | Size: #{compressed_size} bytes" | |
puts output | |
# Save cache after each test to avoid losing work | |
save_cache | |
end | |
end | |
unless executed | |
puts "Skipping #{name} (using cached results)..." | |
end | |
puts | |
end | |
end | |
def generate_html_plot | |
# Filter results to only include compressors that are defined in COMPRESSORS | |
# and only include entries with the specified thread counts | |
filtered_results = {} | |
@results.each do |compressor_name, data| | |
if COMPRESSORS.key?(compressor_name) | |
filtered_data = data.select { |point| @thread_counts.include?(point[:threads] || 1) } | |
filtered_results[compressor_name] = filtered_data unless filtered_data.empty? | |
end | |
end | |
data_json = JSON.generate(filtered_results) | |
html_content = <<~HTML | |
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>Compression Comparison</title> | |
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script> | |
<style> | |
.container { | |
max-width: 1600px; | |
margin: 0 auto; | |
background: white; | |
padding: 30px; | |
border-radius: 15px; | |
box-shadow: 0 10px 30px rgba(0,0,0,0.2); | |
} | |
.chart-row { | |
display: flex; | |
gap: 20px; | |
margin-bottom: 30px; | |
} | |
.chart-container { | |
flex: 1; | |
height: 400px; | |
position: relative; | |
} | |
.stats-table { | |
width: 100%; | |
border-collapse: collapse; | |
margin-top: 20px; | |
} | |
.stats-table th, .stats-table td { | |
padding: 8px 12px; | |
text-align: left; | |
border-bottom: 1px solid #ddd; | |
font-size: 0.9em; | |
} | |
.stats-table th { | |
background-color: #34495e; | |
color: white; | |
font-weight: bold; | |
} | |
.stats-table tr:nth-child(even) { | |
background-color: #f2f2f2; | |
} | |
.cache-info { | |
background: #d5dbdb; | |
padding: 15px; | |
border-radius: 5px; | |
margin-bottom: 20px; | |
font-size: 0.9em; | |
} | |
</style> | |
</head> | |
<body> | |
<div class="container"> | |
<h1>Compression & Decompression Comparison</h1> | |
<div class="cache-info"> | |
<strong>Input file:</strong> #{File.basename(@input_file)} (#{(@input_size/1024.0/1024.0).round(2)} MB)<br> | |
<strong>File hash:</strong> #{@input_hash}<br> | |
<strong>Cache file:</strong> #{@cache_file}<br> | |
<strong>Compressors tested:</strong> #{@results.keys.join(', ')}<br> | |
<strong>Thread counts tested:</strong> #{@thread_counts.join(', ')}<br> | |
<strong>Memory measurement:</strong> enabled | |
</div> | |
<div class="chart-row"> | |
<div class="chart-container"> | |
<canvas id="compressionChart"></canvas> | |
</div> | |
<div class="chart-container"> | |
<canvas id="decompressionChart"></canvas> | |
</div> | |
</div> | |
<div class="chart-container" style="height: 500px;"> | |
<canvas id="combinedChart"></canvas> | |
</div> | |
<div class="chart-row"> | |
<div class="chart-container"> | |
<canvas id="compressMemoryChart"></canvas> | |
</div> | |
<div class="chart-container"> | |
<canvas id="decompressMemoryChart"></canvas> | |
</div> | |
</div> | |
#{@test_threading ? '<div class="chart-row"> | |
<div class="chart-container"> | |
<canvas id="threadingChart"></canvas> | |
</div> | |
</div>' : ''} | |
<table class="stats-table"> | |
<thead> | |
<tr> | |
<th>Compressor</th> | |
<th>Level</th> | |
#{@test_threading ? '<th>Threads</th>' : ''} | |
<th>Compress Speed (MB/s)</th> | |
<th>Decompress Speed (MB/s)</th> | |
<th>Compression Ratio</th> | |
<th>Compressed Size (KB)</th> | |
<th>Compress Memory (MB)</th> | |
<th>Decompress Memory (MB)</th> | |
</tr> | |
</thead> | |
<tbody> | |
#{@results.flat_map do |compressor, data| | |
data.map do |point| | |
size_kb = (point[:compressed_size] / 1024.0).round(1) | |
threads_cell = @test_threading ? "<td>#{point[:threads] || 1}</td>" : "" | |
memory_cells = "<td>#{point[:compress_memory_mb] ? point[:compress_memory_mb].round(1) : 'N/A'}</td><td>#{point[:decompress_memory_mb] ? point[:decompress_memory_mb].round(1) : 'N/A'}</td>" | |
"<tr><td style=\"color: var(--#{compressor}-color); font-weight: bold;\">#{compressor}</td><td>#{point[:level]}</td>#{threads_cell}<td>#{point[:compress_speed].round(2)}</td><td>#{point[:decompress_speed].round(2)}</td><td>#{point[:ratio].round(2)}</td><td>#{size_kb}</td>#{memory_cells}</tr>" | |
end | |
end.join("\n ")} | |
</tbody> | |
</table> | |
</div> | |
<script> | |
const rawData = #{data_json}; | |
const testingThreading = #{@test_threading}; | |
function MurmurOAAT32(key) { | |
let h = 3323198485n; | |
for (let i = 0; i < key.length; i++) { | |
h ^= BigInt(key.charCodeAt(i)); | |
h = (h * 0x5bd1e995n) & 0xFFFFFFFFn; | |
h ^= h >> 15n; | |
} | |
return h; | |
} | |
// Generate dynamic colors and create CSS variables | |
const compressorNames = Object.keys(rawData); | |
const colors = {}; | |
compressorNames.forEach(name => { | |
const rn = MurmurOAAT32(name); | |
const sat = 40n + rn % 40n; | |
const hue = (rn / 50n) % 360n; | |
const color = `hsl(${hue}, ${sat}%, 50%)`; | |
colors[name] = color; | |
document.documentElement.style.setProperty(`--${name}-color`, color); | |
}); | |
// Filter data for single-threaded results for basic charts | |
const singleThreadData = {}; | |
Object.keys(rawData).forEach(name => { | |
singleThreadData[name] = rawData[name].filter(d => (d.threads || 1) === 1); | |
}); | |
// Helper function to create scatter plot datasets | |
function createScatterDatasets(dataSource, xField, yField, additionalFields = {}) { | |
return compressorNames.map(name => { | |
const dataPoints = dataSource[name] | |
.filter(d => additionalFields.filter ? additionalFields.filter(d) : true) | |
.map(d => { | |
const point = { | |
name: name, | |
x: typeof xField === 'function' ? xField(d) : d[xField], | |
y: typeof yField === 'function' ? yField(d) : d[yField], | |
level: d.level | |
}; | |
// Add any additional fields | |
if (additionalFields.extra) { | |
additionalFields.extra.forEach(field => { | |
point[field] = d[field]; | |
}); | |
} | |
return point; | |
}); | |
return { | |
label: name, | |
data: dataPoints, | |
backgroundColor: colors[name], | |
borderColor: colors[name], | |
pointRadius: additionalFields.pointRadius || 5, | |
pointHoverRadius: additionalFields.pointHoverRadius || 8, | |
showLine: false, | |
pointStyle: additionalFields.pointStyle || 'circle' | |
}; | |
}); | |
} | |
// Helper function to create a scatter chart with common options | |
function createScatterChart(elementId, datasets, config) { | |
const defaultOptions = { | |
responsive: true, | |
maintainAspectRatio: false, | |
plugins: { | |
title: { | |
display: true, | |
text: config.title, | |
font: { size: 16 } | |
}, | |
legend: { display: true, position: 'top' }, | |
tooltip: { | |
callbacks: { | |
label: config.tooltipCallback || ((context) => { | |
const point = context.raw; | |
return [`${point.name} L${point.level}`]; | |
}) | |
} | |
} | |
}, | |
scales: { | |
x: { title: { display: true, text: config.xLabel } }, | |
y: { title: { display: true, text: config.yLabel } } | |
} | |
}; | |
// Merge with any custom options | |
if (config.customOptions) { | |
Object.keys(config.customOptions).forEach(key => { | |
if (key === 'scales') { | |
Object.assign(defaultOptions.scales.x, config.customOptions.scales.x || {}); | |
Object.assign(defaultOptions.scales.y, config.customOptions.scales.y || {}); | |
} else { | |
defaultOptions[key] = config.customOptions[key]; | |
} | |
}); | |
} | |
return new Chart(document.getElementById(elementId), { | |
type: config.type || 'scatter', | |
data: { datasets: datasets }, | |
options: defaultOptions | |
}); | |
} | |
// Common tooltip callback for speed/ratio charts | |
const speedRatioTooltip = (context) => { | |
const point = context.raw; | |
return [ | |
`${point.name} L${point.level}`, | |
`Speed: ${point.x?.toFixed(2) || 'N/A'} MB/s`, | |
`Ratio: ${point.y?.toFixed(2) || 'N/A'}x` | |
]; | |
}; | |
// Create all charts using the helper functions | |
// Compression Speed vs Ratio | |
createScatterChart('compressionChart', | |
createScatterDatasets(singleThreadData, 'compress_speed', 'ratio'), | |
{ | |
title: 'Compression Speed vs Ratio (1 thread)', | |
xLabel: 'Compression Speed (MB/s)', | |
yLabel: 'Compression Ratio', | |
tooltipCallback: speedRatioTooltip | |
} | |
); | |
// Decompression Speed vs Ratio | |
createScatterChart('decompressionChart', | |
createScatterDatasets(singleThreadData, 'decompress_speed', 'ratio'), | |
{ | |
title: 'Decompression Speed vs Ratio (1 thread)', | |
xLabel: 'Decompression Speed (MB/s)', | |
yLabel: 'Compression Ratio', | |
tooltipCallback: speedRatioTooltip | |
} | |
); | |
// Combined Speed Comparison | |
createScatterChart('combinedChart', | |
createScatterDatasets(singleThreadData, 'compress_speed', 'decompress_speed', | |
{ extra: ['ratio'], pointRadius: 6 }), | |
{ | |
title: 'Compression vs Decompression Speed (1 thread)', | |
xLabel: 'Compression Speed (MB/s)', | |
yLabel: 'Decompression Speed (MB/s)', | |
tooltipCallback: (context) => { | |
const point = context.raw; | |
return [ | |
`${point.name} L${point.level}`, | |
`C: ${point.x.toFixed(2)} MB/s`, | |
`D: ${point.y.toFixed(2)} MB/s`, | |
`Ratio: ${point.ratio.toFixed(2)}x` | |
]; | |
} | |
} | |
); | |
// Memory vs Compression Ratio | |
createScatterChart('compressMemoryChart', | |
createScatterDatasets(singleThreadData, 'compress_memory_mb', 'ratio', | |
{ filter: d => d.compress_memory_mb }), | |
{ | |
title: 'Compression Memory Usage vs Compression Ratio (1 thread)', | |
xLabel: 'Compression Memory Usage (MB)', | |
yLabel: 'Compression Ratio', | |
tooltipCallback: (context) => { | |
const point = context.raw; | |
return [ | |
`${point.name} L${point.level}`, | |
`Memory: ${point.x.toFixed(1)} MB`, | |
`Ratio: ${point.y.toFixed(2)}x` | |
]; | |
} | |
} | |
); | |
createScatterChart('decompressMemoryChart', | |
createScatterDatasets(singleThreadData, 'decompress_memory_mb', 'ratio', | |
{ filter: d => d.compress_memory_mb }), | |
{ | |
title: 'Decompression Memory Usage vs Compression Ratio (1 thread)', | |
xLabel: 'Decompression Memory Usage (MB)', | |
yLabel: 'Compression Ratio', | |
tooltipCallback: (context) => { | |
const point = context.raw; | |
return [ | |
`${point.name} L${point.level}`, | |
`Memory: ${point.x.toFixed(1)} MB`, | |
`Ratio: ${point.y.toFixed(2)}x` | |
]; | |
} | |
} | |
); | |
// Threading performance chart (if multi-threading was tested) | |
if (testingThreading) { | |
const threadingDatasets = compressorNames.filter(name => | |
rawData[name].some(d => d.supports_threading && d.threads > 1) | |
).map(name => { | |
const levels = [...new Set(rawData[name].map(d => d.level))]; | |
return levels.map(level => { | |
const levelData = rawData[name].filter(d => d.level === level); | |
return { | |
label: `${name} L${level}`, | |
data: levelData.map(d => ({ | |
x: d.threads, | |
y: d.compress_speed, | |
name: name, | |
level: level | |
})), | |
backgroundColor: colors[name], | |
borderColor: colors[name], | |
fill: false, | |
tension: 0.1 | |
}; | |
}); | |
}).flat(); | |
createScatterChart('threadingChart', threadingDatasets, { | |
type: 'line', | |
title: 'Threading Scaling - Compression Speed', | |
xLabel: 'Thread Count', | |
yLabel: 'Compression Speed (MB/s)', | |
customOptions: { | |
scales: { x: { type: 'linear', min: 1 } } | |
}, | |
tooltipCallback: (context) => { | |
const point = context.raw; | |
return [ | |
`${point.name} L${point.level}`, | |
`Threads: ${point.x}`, | |
`Speed: ${point.y.toFixed(2)} MB/s` | |
]; | |
} | |
}); | |
} | |
</script> | |
</body> | |
</html> | |
HTML | |
filename = 'compression_comparison.html' | |
File.open(filename, 'w') { |f| f.write(html_content) } | |
puts "Created #{filename}" | |
end | |
end | |
def show_usage | |
puts "Usage: #{$0} [options] <input_file>" | |
puts | |
puts "Options:" | |
puts " -i Delete cached results and rerun all benchmarks" | |
puts " -t N,M Test multi-threading with specified thread counts (e.g., 1,2,4,8)" | |
puts " --help Show this help message" | |
puts | |
puts "Examples:" | |
puts " #{$0} test.txt # Single-threaded benchmark with memory" | |
puts " #{$0} -t 1,2,4,8 test.txt # Multi-threaded benchmark" | |
puts " #{$0} -i test.txt # Invalidate cache and rerun" | |
end | |
options = { | |
build: File.join(__dir__, 'compressor_builds'), | |
invalidate_cache: false, | |
test_threading: false, | |
thread_counts: [1], | |
} | |
parser = OptionParser.new do |opts| | |
opts.banner = "Usage: #{$0} [options] <input_file>" | |
opts.separator '' | |
opts.separator 'Options:' | |
opts.on('-b BUILD', 'Specify compressor build directory') do |build| | |
options[:build] = File.expand_path(build) | |
end | |
opts.on('-i', '--invalidate-cache', 'Delete cached results and rerun all benchmarks') do | |
options[:invalidate_cache] = true | |
end | |
opts.on('-t COUNTS', 'Test multi-threading. Specify thread counts to test (e.g., 1,2,4,8)') do |counts| | |
begin | |
thread_counts = counts.split(',').map(&:to_i) | |
if thread_counts.any? { |t| t < 1 } | |
raise OptionParser::InvalidArgument, 'Thread counts must be positive integers' | |
end | |
options[:thread_counts] = thread_counts | |
options[:test_threading] = true | |
rescue ArgumentError | |
raise OptionParser::InvalidArgument, 'Invalid thread counts format' | |
end | |
end | |
opts.on('-h', '--help', 'Show this help message') do | |
puts opts | |
puts '' | |
puts 'Examples:' | |
puts " #{$0} test.txt # Single-threaded benchmark with memory" | |
puts " #{$0} -t 1,2,4 test.txt # Custom thread counts" | |
exit 0 | |
end | |
end | |
begin | |
parser.parse! | |
rescue => e | |
puts "Error: #{e.message}" | |
puts parser | |
exit 1 | |
end | |
if ARGV.length != 1 | |
puts 'Error: Specify one input file' | |
puts parser | |
exit 1 | |
end | |
CompressorBuilder.new(options[:build]).build_all | |
benchmark = CompressorBenchmark.new(ARGV[0], options) | |
benchmark.run_benchmark | |
benchmark.generate_html_plot |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'fileutils' | |
require 'json' | |
require 'tempfile' | |
require 'test/unit' | |
class BenchmarkTest < Test::Unit::TestCase | |
def setup | |
@test_dir = Dir.mktmpdir("bench_test") | |
@original_dir = Dir.pwd | |
build = File.join(@original_dir, 'compressor_builds') | |
Dir.chdir(@test_dir) | |
# Create test input file | |
@input_file = 'test.txt' | |
File.open(@input_file, 'w') do |f| | |
f.puts "Hello world" | |
end | |
@bench_script = File.join(@original_dir, 'bench.rb') | |
end | |
def teardown | |
Dir.chdir(@original_dir) | |
FileUtils.rm_rf(@test_dir) | |
end | |
def test_cache_reuse | |
# First run with threads 1,4 | |
output1 = `ruby #{@bench_script} #{@input_file} -t 1,4 2>&1` | |
assert_equal 0, $?.exitstatus | |
cache_file = Dir.glob("cache_*.json").first | |
assert cache_file, "Should create cache file" | |
assert File.exist?('compression_comparison.html') | |
points_1 = JSON.parse(File.read(cache_file)).values.map(&:length).sum | |
output2 = `ruby #{@bench_script} #{@input_file} -t 1 2>&1` | |
assert_equal 0, $?.exitstatus | |
assert_match(/cached results/, output2) | |
assert_not_match(/Level 1/, output2) | |
# Second run with threads 1,2,4 - should reuse 1,4 and add 2 | |
output3 = `ruby #{@bench_script} #{@input_file} -t 1,2,4 2>&1` | |
assert_equal 0, $?.exitstatus | |
assert_match(/Level 1/, output3) | |
points_3 = JSON.parse(File.read(cache_file)).values.map(&:length).sum | |
assert points_3 > points_1, "Should have more data points after adding thread count 2" | |
end | |
def test_cache_invalidation | |
# First run | |
`ruby #{@bench_script} #{@input_file} 2>&1` | |
cache_file = Dir.glob("cache_*.json").first | |
assert cache_file, "Should create cache file" | |
original_mtime = File.mtime(cache_file) | |
# Run with cache invalidation | |
output = `ruby #{@bench_script} #{@input_file} -i 2>&1` | |
assert_equal 0, $?.exitstatus | |
assert File.mtime(cache_file) > original_mtime, "Cache should be regenerated" | |
end | |
end | |
exit Test::Unit::AutoRunner.run |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment