Skip to content

Instantly share code, notes, and snippets.

@MaskRay
Last active September 17, 2025 17:14
Show Gist options
  • Save MaskRay/74cdaa83c1f44ee105fcebcdff0ba9a7 to your computer and use it in GitHub Desktop.
Save MaskRay/74cdaa83c1f44ee105fcebcdff0ba9a7 to your computer and use it in GitHub Desktop.
https://maskray.me/blog/2025-08-31-benchmarking-compression-programs program, distributed under the terms of both the MIT license and the Apache License (Version 2.0)
#!/usr/bin/env ruby
# This program downloads and builds several compression utilities, benchmarks their compression and decompression
# performance on a specific input file including memory consumption, and finally generates HTML charts.
require 'benchmark'
require 'digest'
require 'etc'
require 'fileutils'
require 'json'
require 'net/http'
require 'optparse'
require 'tempfile'
require 'uri'
require 'open3'
JOBS = Etc.nprocessors
# Defaults: supports_threading: true
COMPRESSORS = {
'brotli' => {
url: 'https://github.com/google/brotli/archive/refs/tags/v1.1.0.tar.gz',
build_dir: 'brotli-1.1.0',
build_commands: ['cmake -GNinja -S. -Bout -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DBROTLI_DISABLE_TESTS=on -DCMAKE_C_FLAGS="-march=native"', 'ninja -C out install'],
levels: [1, 3, 5, 7, 9, 11],
compress: ->exe, lvl, i, o, thr { "#{exe} -c -q #{lvl} '#{i}' > '#{o}'" },
decompress: ->exe, i, o, thr { "#{exe} -d -c '#{i}' > '#{o}'" },
supports_threading: false
},
'bzip3' => {
url: 'https://github.com/kspalaiologos/bzip3/releases/download/1.5.3/bzip3-1.5.3.tar.gz',
build_dir: 'bzip3-1.5.3',
build_commands: ['./configure --prefix=$PWD/install CFLAGS="-O3 -march=native"', "make -j #{JOBS} install"],
levels: [1],
compress: ->exe, lvl, i, o, thr { "#{exe} -j#{thr} -c '#{i}' > '#{o}'" },
decompress: ->exe, i, o, thr { "#{exe} -j#{thr} -d -c '#{i}' > '#{o}'" },
},
'kanzi' => {
url: 'https://github.com/flanglet/kanzi-cpp/archive/refs/tags/2.4.0.tar.gz',
build_dir: 'kanzi-cpp-2.4.0',
build_commands: ['cmake -GNinja -Ssrc -Binstall/bin -DCMAKE_CXX_FLAGS="-march=native"', 'ninja -C install/bin kanzi'],
levels: 1..9,
compress: ->exe, lvl, i, o, thr { "#{exe} -c -v 0 -j #{thr} -l #{lvl} -f -i '#{i}' -o '#{o}'" },
decompress: ->exe, i, o, thr { "#{exe} -d -j #{thr} -v 0 -f -i '#{i}' -o '#{o}'" },
},
'lz4' => {
url: 'https://github.com/lz4/lz4/releases/download/v1.10.0/lz4-1.10.0.tar.gz',
build_dir: 'lz4-1.10.0',
build_commands: ["make -j #{JOBS} CFLAGS='-O3 -march=native' PREFIX=$PWD/install install"],
levels: 1..12,
compress: ->exe, lvl, i, o, thr { "#{exe} -q -T#{thr} -c -#{lvl} '#{i}' > '#{o}'" },
decompress: ->exe, i, o, thr { "#{exe} -d -q -T#{thr} -c '#{i}' > '#{o}'" },
},
'lzham' => {
url: 'https://github.com/MaskRay/lzham_codec/archive/refs/heads/cmake.zip',
source_filename: 'lzham-master.zip',
build_dir: 'lzham_codec-cmake',
build_commands: ['cmake -GNinja -S. -Bout -DCMAKE_CXX_FLAGS="-march=native"', 'ninja -C out', 'mkdir -p install/bin && rsync -a out/lzhamtest/lzhamtest install/bin/lzham'],
levels: 1..4,
compress: ->exe, lvl, i, o, thr { "#{exe} -m#{lvl} -t#{thr} c '#{i}' '#{o}'" },
decompress: ->exe, i, o, thr { "#{exe} -t#{thr} d '#{i}' '#{o}'" },
},
'xz' => {
url: 'https://tukaani.org/xz/xz-5.8.1.tar.gz',
build_dir: 'xz-5.8.1',
build_commands: ['./configure --prefix=$PWD/install CFLAGS="-O3 -march=native"', "make -j #{JOBS} install"],
levels: [*1..6, 9],
compress: ->exe, lvl, i, o, thr { "#{exe} -#{lvl} -T#{thr} -c '#{i}' > '#{o}'" },
decompress: ->exe, i, o, thr { "#{exe} -d -T#{thr} -c '#{i}' > '#{o}'" },
},
'zpaq' => {
url: 'https://github.com/zpaq/zpaq/archive/refs/heads/master.zip',
source_filename: 'zpaq-master.zip',
build_dir: 'zpaq-master',
build_commands: ["make -j #{JOBS} PREFIX=install CXXFLAGS='-O3 -march=native' install"],
levels: 1..4,
extension: '.zpaq',
compress: ->exe, lvl, i, o, thr { "#{exe} a '#{o}' '#{i}' -m#{lvl} -t#{thr}" },
# Decompress to tmp_zpaq, then rename to #{o}.
decompress: ->exe, i, o, thr { "#{exe} x '#{i}' -t#{thr} -to tmp_zpaq" },
decompress_dir: 'tmp_zpaq',
},
'zstd' => {
url: 'https://github.com/facebook/zstd/releases/download/v1.5.7/zstd-1.5.7.tar.gz',
build_dir: 'zstd-1.5.7',
build_commands: ['cmake -GNinja -Sbuild/cmake -Bout -DCMAKE_INSTALL_PREFIX=install -DCMAKE_C_FLAGS="-march=native"', 'ninja -C out install'],
levels: [*(1..6), 9, 13, 16, 19],
compress: ->exe, lvl, i, o, thr { "#{exe} -q -T#{thr} -#{lvl} -c '#{i}' > '#{o}'" },
decompress: ->exe, i, o, thr { "#{exe} -d -q -T#{thr} -c '#{i}' > '#{o}'" },
},
}
class MemoryMonitor
def self.measure_command(cmd)
begin
if RUBY_PLATFORM.include?('linux')
return measure_with_time_linux(cmd)
elsif RUBY_PLATFORM.include?('darwin')
return measure_with_time_macos(cmd)
else
raise 0
end
rescue => e
# Fallback: run without memory measurement
{ success: system(cmd, out: File::NULL, err: File::NULL), memory_kb: nil }
end
end
private
def self.measure_with_time_linux(cmd)
# Use GNU time -v to get max RSS in KB.
time_cmd = "/usr/bin/time -v sh -c '#{cmd}' 2>&1"
output, status = Open3.capture2e(time_cmd)
if status.success?
# Parse "Maximum resident set size" from verbose output
if match = output.match(/Maximum resident set size \(kbytes\):\s*(\d+)/)
memory_kb = match[1].to_i
{ success: true, memory_kb: memory_kb > 0 ? memory_kb : nil }
else
{ success: true, memory_kb: nil }
end
else
{ success: false, memory_kb: nil }
end
end
def self.measure_with_time_macos(cmd)
time_cmd = "/usr/bin/time -l #{cmd} 2>&1"
output, status = Open3.capture2e(time_cmd)
if status.success?
# Parse "maximum resident set size" from output (in bytes on macOS)
if match = output.match(/(\d+)\s+maximum resident set size/)
memory_bytes = match[1].to_i
memory_kb = memory_bytes / 1024
{ success: true, memory_kb: memory_kb > 0 ? memory_kb : nil }
else
{ success: true, memory_kb: nil }
end
else
{ success: false, memory_kb: nil }
end
end
end
class CompressorBuilder
def initialize(work_dir)
@work_dir = File.expand_path(work_dir)
FileUtils.mkdir_p(@work_dir)
end
def build_all
COMPRESSORS.each do |name, config|
bin_dir = File.join(@work_dir, config[:build_dir], 'install/bin')
if begin !Dir.empty?(bin_dir) rescue false end
puts "✓ #{name} already built"
else
download_and_extract(name, config)
build_compressor(name, config)
end
if Dir.exist?(bin_dir)
program = File.join(bin_dir, name)
unless File.exist? program
binaries = Dir.glob(File.join(bin_dir, '*')).select { |f| File.executable?(f) }
raise "Compressor not found, available executables: #{binaries.map { |b| File.basename(b) }.join(', ')}"
end
puts " Executable: #{program}"
config[:program] = program
end
end
end
private
def download_and_extract(name, config)
filename = config.fetch(:source_filename, File.basename(config[:url]))
filepath = File.join(@work_dir, filename)
unless File.exist?(filepath)
puts "Downloading #{name}..."
download_file(config[:url], filepath)
end
extract_path = File.join(@work_dir, config[:build_dir])
unless Dir.exist?(extract_path)
puts "Extracting #{filename}..."
if filename.end_with? 'zip'
system("unzip", filepath, "-d", @work_dir) or
raise "Failed to extract #{filepath}"
else
system("tar", "-xf", filepath, "-C", @work_dir) or
raise "Failed to extract #{filepath}"
end
end
end
def download_file(url, filepath)
uri = URI(url)
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http|
request = Net::HTTP::Get.new(uri)
# Follow redirects
response = http.request(request)
case response
when Net::HTTPRedirection
download_file(response['location'], filepath)
return
when Net::HTTPSuccess
File.open(filepath, 'wb') { |f| f.write(response.body) }
else
raise "Failed to download #{url}: #{response.code} #{response.message}"
end
end
end
def build_compressor(name, config)
build_path = File.join(@work_dir, config[:build_dir])
unless Dir.exist?(build_path)
raise "Build directory not found: #{build_path}"
end
puts "Building #{name}..."
Dir.chdir(build_path) do
config[:build_commands].each do |command|
puts "Running: #{command}"
success = system(command)
unless success
raise "Build command failed: #{command}"
end
end
end
end
end
class CompressorBenchmark
def initialize(input_file, options)
@input_file = input_file
@invalidate_cache = options[:invalidate_cache]
@test_threading = options[:test_threading]
@thread_counts = options[:thread_counts]
unless File.exist?(@input_file)
puts "Error: Input file '#{@input_file}' not found!"
exit 1
end
puts "Input file: #{@input_file}"
@input_size = File.size(@input_file)
@input_hash = Digest::SHA256.file(@input_file).hexdigest[0..16]
@cache_file = "cache_#{File.basename(@input_file)}_#{@input_hash}.json"
puts "File size: #{@input_size} bytes"
puts "Cache file: #{@cache_file}"
puts "Thread counts to test: #{@thread_counts.join(', ')}" if @test_threading
puts "Memory measurement: enabled (default)"
if @invalidate_cache && File.exist?(@cache_file)
File.delete(@cache_file)
puts "Cache invalidated."
end
# Load cached results
@results = {}
if File.exist?(@cache_file)
begin
cached_data = JSON.parse(File.read(@cache_file))
@results = cached_data.transform_values do |compressor_data|
compressor_data.map { |point| point.transform_keys(&:to_sym) }
end
puts "Loaded cached results for #{@results.keys.join(', ')}"
rescue JSON::ParserError
puts "Warning: Corrupted cache file, starting fresh"
end
end
puts
end
def save_cache
File.open(@cache_file, 'w') do |f|
f.write(JSON.pretty_generate(@results))
end
end
def run_benchmark
COMPRESSORS.each do |name, config|
puts "Testing #{name}..."
program = config[:program]
compress_method = config[:compress]
decompress_method = config[:decompress]
supports_threading = config.fetch(:supports_threading, true)
executed = false
@results[name] ||= []
thread_counts_to_test = @test_threading && supports_threading ? @thread_counts : [1]
config[:levels].each do |level|
thread_counts_to_test.each do |threads|
cache_key = { level: level, threads: threads }
if @results.key?(name) && @results[name].any? { |x|
x[:level] == level && x[:threads] == threads &&
x[:compress_memory_mb] && x[:decompress_memory_mb]
}
next
end
thread_info = threads > 1 ? " (#{threads} threads)" : ""
print " Level #{level}#{thread_info}... "
executed = true
compressed_size = nil
compress_time = nil
decompress_time = nil
compress_memory_kb = nil
decompress_memory_kb = nil
# Test compression. zpaq requires the .zpaq extension name.
Tempfile.create(config.key?(:extension) ? ['compressed', config[:extension]] : 'compressed') do |compressed_file|
cmd = compress_method.call(program, level, @input_file, compressed_file.path, threads)
compress_time = Benchmark.realtime do
result = MemoryMonitor.measure_command(cmd)
unless result[:success]
puts "Compression failed: #{cmd}"
exit 2
end
compress_memory_kb = result[:memory_kb]
end
compressed_size = File.size(compressed_file.path)
# Test decompression
Tempfile.create('decompressed') do |decompressed_file|
cmd = decompress_method.call(program, compressed_file.path, decompressed_file.path, threads)
decompress_time = Benchmark.realtime do
result = MemoryMonitor.measure_command(cmd)
unless result[:success]
puts "Decompression failed: #{cmd}"
exit 2
end
decompress_memory_kb = result[:memory_kb]
end
if config.key?(:decompress_dir)
FileUtils.mv File.join(config[:decompress_dir], File.basename(@input_file)), decompressed_file.path
end
# Verify decompression
decompressed_size = File.size(decompressed_file.path)
if decompressed_size != @input_size
puts "Decompression verification failed: size mismatch (#{decompressed_size} != #{@input_size})"
exit 2
end
end
end
compress_speed = @input_size * 1e-6 / compress_time
decompress_speed = @input_size * 1e-6 / decompress_time
compression_ratio = @input_size.to_f / compressed_size
result_entry = {
level: level,
threads: threads,
compress_speed: compress_speed,
decompress_speed: decompress_speed,
ratio: compression_ratio,
compressed_size: compressed_size,
supports_threading: supports_threading,
compress_memory_mb: compress_memory_kb ? compress_memory_kb / 1024.0 : nil,
decompress_memory_mb: decompress_memory_kb ? decompress_memory_kb / 1024.0 : nil
}
# Update or add the result
existing_index = @results[name].find_index { |x| x[:level] == level && x[:threads] == threads }
if existing_index
@results[name][existing_index].merge!(result_entry)
else
@results[name] << result_entry
end
output = "C: #{compress_time.round(3)}s (#{compress_speed.round(1)} MB/s)"
output += ", Mem: #{result_entry[:compress_memory_mb].round(1)} MB" if result_entry[:compress_memory_mb]
output += " | D: #{decompress_time.round(3)}s (#{decompress_speed.round(1)} MB/s)"
output += ", Mem: #{result_entry[:decompress_memory_mb].round(1)} MB" if result_entry[:decompress_memory_mb]
output += " | Size: #{compressed_size} bytes"
puts output
# Save cache after each test to avoid losing work
save_cache
end
end
unless executed
puts "Skipping #{name} (using cached results)..."
end
puts
end
end
def generate_html_plot
# Filter results to only include compressors that are defined in COMPRESSORS
# and only include entries with the specified thread counts
filtered_results = {}
@results.each do |compressor_name, data|
if COMPRESSORS.key?(compressor_name)
filtered_data = data.select { |point| @thread_counts.include?(point[:threads] || 1) }
filtered_results[compressor_name] = filtered_data unless filtered_data.empty?
end
end
data_json = JSON.generate(filtered_results)
html_content = <<~HTML
<!DOCTYPE html>
<html>
<head>
<title>Compression Comparison</title>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<style>
.container {
max-width: 1600px;
margin: 0 auto;
background: white;
padding: 30px;
border-radius: 15px;
box-shadow: 0 10px 30px rgba(0,0,0,0.2);
}
.chart-row {
display: flex;
gap: 20px;
margin-bottom: 30px;
}
.chart-container {
flex: 1;
height: 400px;
position: relative;
}
.stats-table {
width: 100%;
border-collapse: collapse;
margin-top: 20px;
}
.stats-table th, .stats-table td {
padding: 8px 12px;
text-align: left;
border-bottom: 1px solid #ddd;
font-size: 0.9em;
}
.stats-table th {
background-color: #34495e;
color: white;
font-weight: bold;
}
.stats-table tr:nth-child(even) {
background-color: #f2f2f2;
}
.cache-info {
background: #d5dbdb;
padding: 15px;
border-radius: 5px;
margin-bottom: 20px;
font-size: 0.9em;
}
</style>
</head>
<body>
<div class="container">
<h1>Compression & Decompression Comparison</h1>
<div class="cache-info">
<strong>Input file:</strong> #{File.basename(@input_file)} (#{(@input_size/1024.0/1024.0).round(2)} MB)<br>
<strong>File hash:</strong> #{@input_hash}<br>
<strong>Cache file:</strong> #{@cache_file}<br>
<strong>Compressors tested:</strong> #{@results.keys.join(', ')}<br>
<strong>Thread counts tested:</strong> #{@thread_counts.join(', ')}<br>
<strong>Memory measurement:</strong> enabled
</div>
<div class="chart-row">
<div class="chart-container">
<canvas id="compressionChart"></canvas>
</div>
<div class="chart-container">
<canvas id="decompressionChart"></canvas>
</div>
</div>
<div class="chart-container" style="height: 500px;">
<canvas id="combinedChart"></canvas>
</div>
<div class="chart-row">
<div class="chart-container">
<canvas id="compressMemoryChart"></canvas>
</div>
<div class="chart-container">
<canvas id="decompressMemoryChart"></canvas>
</div>
</div>
#{@test_threading ? '<div class="chart-row">
<div class="chart-container">
<canvas id="threadingChart"></canvas>
</div>
</div>' : ''}
<table class="stats-table">
<thead>
<tr>
<th>Compressor</th>
<th>Level</th>
#{@test_threading ? '<th>Threads</th>' : ''}
<th>Compress Speed (MB/s)</th>
<th>Decompress Speed (MB/s)</th>
<th>Compression Ratio</th>
<th>Compressed Size (KB)</th>
<th>Compress Memory (MB)</th>
<th>Decompress Memory (MB)</th>
</tr>
</thead>
<tbody>
#{@results.flat_map do |compressor, data|
data.map do |point|
size_kb = (point[:compressed_size] / 1024.0).round(1)
threads_cell = @test_threading ? "<td>#{point[:threads] || 1}</td>" : ""
memory_cells = "<td>#{point[:compress_memory_mb] ? point[:compress_memory_mb].round(1) : 'N/A'}</td><td>#{point[:decompress_memory_mb] ? point[:decompress_memory_mb].round(1) : 'N/A'}</td>"
"<tr><td style=\"color: var(--#{compressor}-color); font-weight: bold;\">#{compressor}</td><td>#{point[:level]}</td>#{threads_cell}<td>#{point[:compress_speed].round(2)}</td><td>#{point[:decompress_speed].round(2)}</td><td>#{point[:ratio].round(2)}</td><td>#{size_kb}</td>#{memory_cells}</tr>"
end
end.join("\n ")}
</tbody>
</table>
</div>
<script>
const rawData = #{data_json};
const testingThreading = #{@test_threading};
function MurmurOAAT32(key) {
let h = 3323198485n;
for (let i = 0; i < key.length; i++) {
h ^= BigInt(key.charCodeAt(i));
h = (h * 0x5bd1e995n) & 0xFFFFFFFFn;
h ^= h >> 15n;
}
return h;
}
// Generate dynamic colors and create CSS variables
const compressorNames = Object.keys(rawData);
const colors = {};
compressorNames.forEach(name => {
const rn = MurmurOAAT32(name);
const sat = 40n + rn % 40n;
const hue = (rn / 50n) % 360n;
const color = `hsl(${hue}, ${sat}%, 50%)`;
colors[name] = color;
document.documentElement.style.setProperty(`--${name}-color`, color);
});
// Filter data for single-threaded results for basic charts
const singleThreadData = {};
Object.keys(rawData).forEach(name => {
singleThreadData[name] = rawData[name].filter(d => (d.threads || 1) === 1);
});
// Helper function to create scatter plot datasets
function createScatterDatasets(dataSource, xField, yField, additionalFields = {}) {
return compressorNames.map(name => {
const dataPoints = dataSource[name]
.filter(d => additionalFields.filter ? additionalFields.filter(d) : true)
.map(d => {
const point = {
name: name,
x: typeof xField === 'function' ? xField(d) : d[xField],
y: typeof yField === 'function' ? yField(d) : d[yField],
level: d.level
};
// Add any additional fields
if (additionalFields.extra) {
additionalFields.extra.forEach(field => {
point[field] = d[field];
});
}
return point;
});
return {
label: name,
data: dataPoints,
backgroundColor: colors[name],
borderColor: colors[name],
pointRadius: additionalFields.pointRadius || 5,
pointHoverRadius: additionalFields.pointHoverRadius || 8,
showLine: false,
pointStyle: additionalFields.pointStyle || 'circle'
};
});
}
// Helper function to create a scatter chart with common options
function createScatterChart(elementId, datasets, config) {
const defaultOptions = {
responsive: true,
maintainAspectRatio: false,
plugins: {
title: {
display: true,
text: config.title,
font: { size: 16 }
},
legend: { display: true, position: 'top' },
tooltip: {
callbacks: {
label: config.tooltipCallback || ((context) => {
const point = context.raw;
return [`${point.name} L${point.level}`];
})
}
}
},
scales: {
x: { title: { display: true, text: config.xLabel } },
y: { title: { display: true, text: config.yLabel } }
}
};
// Merge with any custom options
if (config.customOptions) {
Object.keys(config.customOptions).forEach(key => {
if (key === 'scales') {
Object.assign(defaultOptions.scales.x, config.customOptions.scales.x || {});
Object.assign(defaultOptions.scales.y, config.customOptions.scales.y || {});
} else {
defaultOptions[key] = config.customOptions[key];
}
});
}
return new Chart(document.getElementById(elementId), {
type: config.type || 'scatter',
data: { datasets: datasets },
options: defaultOptions
});
}
// Common tooltip callback for speed/ratio charts
const speedRatioTooltip = (context) => {
const point = context.raw;
return [
`${point.name} L${point.level}`,
`Speed: ${point.x?.toFixed(2) || 'N/A'} MB/s`,
`Ratio: ${point.y?.toFixed(2) || 'N/A'}x`
];
};
// Create all charts using the helper functions
// Compression Speed vs Ratio
createScatterChart('compressionChart',
createScatterDatasets(singleThreadData, 'compress_speed', 'ratio'),
{
title: 'Compression Speed vs Ratio (1 thread)',
xLabel: 'Compression Speed (MB/s)',
yLabel: 'Compression Ratio',
tooltipCallback: speedRatioTooltip
}
);
// Decompression Speed vs Ratio
createScatterChart('decompressionChart',
createScatterDatasets(singleThreadData, 'decompress_speed', 'ratio'),
{
title: 'Decompression Speed vs Ratio (1 thread)',
xLabel: 'Decompression Speed (MB/s)',
yLabel: 'Compression Ratio',
tooltipCallback: speedRatioTooltip
}
);
// Combined Speed Comparison
createScatterChart('combinedChart',
createScatterDatasets(singleThreadData, 'compress_speed', 'decompress_speed',
{ extra: ['ratio'], pointRadius: 6 }),
{
title: 'Compression vs Decompression Speed (1 thread)',
xLabel: 'Compression Speed (MB/s)',
yLabel: 'Decompression Speed (MB/s)',
tooltipCallback: (context) => {
const point = context.raw;
return [
`${point.name} L${point.level}`,
`C: ${point.x.toFixed(2)} MB/s`,
`D: ${point.y.toFixed(2)} MB/s`,
`Ratio: ${point.ratio.toFixed(2)}x`
];
}
}
);
// Memory vs Compression Ratio
createScatterChart('compressMemoryChart',
createScatterDatasets(singleThreadData, 'compress_memory_mb', 'ratio',
{ filter: d => d.compress_memory_mb }),
{
title: 'Compression Memory Usage vs Compression Ratio (1 thread)',
xLabel: 'Compression Memory Usage (MB)',
yLabel: 'Compression Ratio',
tooltipCallback: (context) => {
const point = context.raw;
return [
`${point.name} L${point.level}`,
`Memory: ${point.x.toFixed(1)} MB`,
`Ratio: ${point.y.toFixed(2)}x`
];
}
}
);
createScatterChart('decompressMemoryChart',
createScatterDatasets(singleThreadData, 'decompress_memory_mb', 'ratio',
{ filter: d => d.compress_memory_mb }),
{
title: 'Decompression Memory Usage vs Compression Ratio (1 thread)',
xLabel: 'Decompression Memory Usage (MB)',
yLabel: 'Compression Ratio',
tooltipCallback: (context) => {
const point = context.raw;
return [
`${point.name} L${point.level}`,
`Memory: ${point.x.toFixed(1)} MB`,
`Ratio: ${point.y.toFixed(2)}x`
];
}
}
);
// Threading performance chart (if multi-threading was tested)
if (testingThreading) {
const threadingDatasets = compressorNames.filter(name =>
rawData[name].some(d => d.supports_threading && d.threads > 1)
).map(name => {
const levels = [...new Set(rawData[name].map(d => d.level))];
return levels.map(level => {
const levelData = rawData[name].filter(d => d.level === level);
return {
label: `${name} L${level}`,
data: levelData.map(d => ({
x: d.threads,
y: d.compress_speed,
name: name,
level: level
})),
backgroundColor: colors[name],
borderColor: colors[name],
fill: false,
tension: 0.1
};
});
}).flat();
createScatterChart('threadingChart', threadingDatasets, {
type: 'line',
title: 'Threading Scaling - Compression Speed',
xLabel: 'Thread Count',
yLabel: 'Compression Speed (MB/s)',
customOptions: {
scales: { x: { type: 'linear', min: 1 } }
},
tooltipCallback: (context) => {
const point = context.raw;
return [
`${point.name} L${point.level}`,
`Threads: ${point.x}`,
`Speed: ${point.y.toFixed(2)} MB/s`
];
}
});
}
</script>
</body>
</html>
HTML
filename = 'compression_comparison.html'
File.open(filename, 'w') { |f| f.write(html_content) }
puts "Created #{filename}"
end
end
def show_usage
puts "Usage: #{$0} [options] <input_file>"
puts
puts "Options:"
puts " -i Delete cached results and rerun all benchmarks"
puts " -t N,M Test multi-threading with specified thread counts (e.g., 1,2,4,8)"
puts " --help Show this help message"
puts
puts "Examples:"
puts " #{$0} test.txt # Single-threaded benchmark with memory"
puts " #{$0} -t 1,2,4,8 test.txt # Multi-threaded benchmark"
puts " #{$0} -i test.txt # Invalidate cache and rerun"
end
options = {
build: File.join(__dir__, 'compressor_builds'),
invalidate_cache: false,
test_threading: false,
thread_counts: [1],
}
parser = OptionParser.new do |opts|
opts.banner = "Usage: #{$0} [options] <input_file>"
opts.separator ''
opts.separator 'Options:'
opts.on('-b BUILD', 'Specify compressor build directory') do |build|
options[:build] = File.expand_path(build)
end
opts.on('-i', '--invalidate-cache', 'Delete cached results and rerun all benchmarks') do
options[:invalidate_cache] = true
end
opts.on('-t COUNTS', 'Test multi-threading. Specify thread counts to test (e.g., 1,2,4,8)') do |counts|
begin
thread_counts = counts.split(',').map(&:to_i)
if thread_counts.any? { |t| t < 1 }
raise OptionParser::InvalidArgument, 'Thread counts must be positive integers'
end
options[:thread_counts] = thread_counts
options[:test_threading] = true
rescue ArgumentError
raise OptionParser::InvalidArgument, 'Invalid thread counts format'
end
end
opts.on('-h', '--help', 'Show this help message') do
puts opts
puts ''
puts 'Examples:'
puts " #{$0} test.txt # Single-threaded benchmark with memory"
puts " #{$0} -t 1,2,4 test.txt # Custom thread counts"
exit 0
end
end
begin
parser.parse!
rescue => e
puts "Error: #{e.message}"
puts parser
exit 1
end
if ARGV.length != 1
puts 'Error: Specify one input file'
puts parser
exit 1
end
CompressorBuilder.new(options[:build]).build_all
benchmark = CompressorBenchmark.new(ARGV[0], options)
benchmark.run_benchmark
benchmark.generate_html_plot
#!/usr/bin/env ruby
require 'fileutils'
require 'json'
require 'tempfile'
require 'test/unit'
class BenchmarkTest < Test::Unit::TestCase
def setup
@test_dir = Dir.mktmpdir("bench_test")
@original_dir = Dir.pwd
build = File.join(@original_dir, 'compressor_builds')
Dir.chdir(@test_dir)
# Create test input file
@input_file = 'test.txt'
File.open(@input_file, 'w') do |f|
f.puts "Hello world"
end
@bench_script = File.join(@original_dir, 'bench.rb')
end
def teardown
Dir.chdir(@original_dir)
FileUtils.rm_rf(@test_dir)
end
def test_cache_reuse
# First run with threads 1,4
output1 = `ruby #{@bench_script} #{@input_file} -t 1,4 2>&1`
assert_equal 0, $?.exitstatus
cache_file = Dir.glob("cache_*.json").first
assert cache_file, "Should create cache file"
assert File.exist?('compression_comparison.html')
points_1 = JSON.parse(File.read(cache_file)).values.map(&:length).sum
output2 = `ruby #{@bench_script} #{@input_file} -t 1 2>&1`
assert_equal 0, $?.exitstatus
assert_match(/cached results/, output2)
assert_not_match(/Level 1/, output2)
# Second run with threads 1,2,4 - should reuse 1,4 and add 2
output3 = `ruby #{@bench_script} #{@input_file} -t 1,2,4 2>&1`
assert_equal 0, $?.exitstatus
assert_match(/Level 1/, output3)
points_3 = JSON.parse(File.read(cache_file)).values.map(&:length).sum
assert points_3 > points_1, "Should have more data points after adding thread count 2"
end
def test_cache_invalidation
# First run
`ruby #{@bench_script} #{@input_file} 2>&1`
cache_file = Dir.glob("cache_*.json").first
assert cache_file, "Should create cache file"
original_mtime = File.mtime(cache_file)
# Run with cache invalidation
output = `ruby #{@bench_script} #{@input_file} -i 2>&1`
assert_equal 0, $?.exitstatus
assert File.mtime(cache_file) > original_mtime, "Cache should be regenerated"
end
end
exit Test::Unit::AutoRunner.run
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment