MaskRay · September 17, 2025 17:14
diff --git a/bench.rb b/bench.rb
 #!/usr/bin/env ruby
 # This program downloads and builds several compression utilities, benchmarks their compression and decompression
 # performance on a specific input file including memory consumption, and finally generates HTML charts.
 require 'benchmark'
 require 'digest'
 require 'etc'
 require 'fileutils'
 require 'json'
 require 'net/http'
 require 'optparse'
 require 'tempfile'
 require 'uri'
 require 'open3'

 JOBS = Etc.nprocessors

 # Defaults: supports_threading: true
 COMPRESSORS = {
  'brotli' => {
    url: 'https://github.com/google/brotli/archive/refs/tags/v1.1.0.tar.gz',
    build_dir: 'brotli-1.1.0',
    build_commands: ['cmake -GNinja -S. -Bout -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DBROTLI_DISABLE_TESTS=on -DCMAKE_C_FLAGS="-march=native"', 'ninja -C out install'],
    levels: [1, 3, 5, 7, 9, 11],
    compress: ->exe, lvl, i, o, thr { "#{exe} -c -q #{lvl} '#{i}' > '#{o}'" },
    decompress: ->exe, i, o, thr { "#{exe} -d -c '#{i}' > '#{o}'" },
    supports_threading: false
  },
  'bzip3' => {
    url: 'https://github.com/kspalaiologos/bzip3/releases/download/1.5.3/bzip3-1.5.3.tar.gz',
    build_dir: 'bzip3-1.5.3',
    build_commands: ['./configure --prefix=$PWD/install CFLAGS="-O3 -march=native"', "make -j #{JOBS} install"],
    levels: [1],
    compress: ->exe, lvl, i, o, thr { "#{exe} -j#{thr} -c '#{i}' > '#{o}'" },
    decompress: ->exe, i, o, thr { "#{exe} -j#{thr} -d -c '#{i}' > '#{o}'" },
  },
  'kanzi' => {
    url: 'https://github.com/flanglet/kanzi-cpp/archive/refs/tags/2.4.0.tar.gz',
    build_dir: 'kanzi-cpp-2.4.0',
    build_commands: ['cmake -GNinja -Ssrc -Binstall/bin -DCMAKE_CXX_FLAGS="-march=native"', 'ninja -C install/bin kanzi'],
    levels: 1..9,
    compress: ->exe, lvl, i, o, thr { "#{exe} -c -v 0 -j #{thr} -l #{lvl} -f -i '#{i}' -o '#{o}'" },
    decompress: ->exe, i, o, thr { "#{exe} -d -j #{thr} -v 0 -f -i '#{i}' -o '#{o}'" },
  },
  'lz4' => {
    url: 'https://github.com/lz4/lz4/releases/download/v1.10.0/lz4-1.10.0.tar.gz',
    build_dir: 'lz4-1.10.0',
    build_commands: ["make -j #{JOBS} CFLAGS='-O3 -march=native' PREFIX=$PWD/install install"],
    levels: 1..12,
    compress: ->exe, lvl, i, o, thr { "#{exe} -q -T#{thr} -c -#{lvl} '#{i}' > '#{o}'" },
    decompress: ->exe, i, o, thr { "#{exe} -d -q -T#{thr} -c '#{i}' > '#{o}'" },
  },
  'lzham' => {
    url: 'https://github.com/MaskRay/lzham_codec/archive/refs/heads/cmake.zip',
    source_filename: 'lzham-master.zip',
    build_dir: 'lzham_codec-cmake',
    build_commands: ['cmake -GNinja -S. -Bout -DCMAKE_CXX_FLAGS="-march=native"', 'ninja -C out', 'mkdir -p install/bin && rsync -a out/lzhamtest/lzhamtest install/bin/lzham'],
    levels: 1..4,
    compress: ->exe, lvl, i, o, thr { "#{exe} -m#{lvl} -t#{thr} c '#{i}' '#{o}'" },
    decompress: ->exe, i, o, thr {  "#{exe} -t#{thr} d '#{i}' '#{o}'" },
  },
  'xz' => {
    url: 'https://tukaani.org/xz/xz-5.8.1.tar.gz',
    build_dir: 'xz-5.8.1',
    build_commands: ['./configure --prefix=$PWD/install CFLAGS="-O3 -march=native"', "make -j #{JOBS} install"],
    levels: [*1..6, 9],
    compress: ->exe, lvl, i, o, thr { "#{exe} -#{lvl} -T#{thr} -c '#{i}' > '#{o}'" },
    decompress: ->exe, i, o, thr { "#{exe} -d -T#{thr} -c '#{i}' > '#{o}'" },
  },
  'zpaq' => {
    url: 'https://github.com/zpaq/zpaq/archive/refs/heads/master.zip',
    source_filename: 'zpaq-master.zip',
    build_dir: 'zpaq-master',
    build_commands: ["make -j #{JOBS} PREFIX=install CXXFLAGS='-O3 -march=native' install"],
    levels: 1..4,
    extension: '.zpaq',
    compress: ->exe, lvl, i, o, thr { "#{exe} a '#{o}' '#{i}' -m#{lvl} -t#{thr}" },
    # Decompress to tmp_zpaq, then rename to #{o}.
    decompress: ->exe, i, o, thr { "#{exe} x '#{i}' -t#{thr} -to tmp_zpaq" },
    decompress_dir: 'tmp_zpaq',
  },
  'zstd' => {
    url: 'https://github.com/facebook/zstd/releases/download/v1.5.7/zstd-1.5.7.tar.gz',
    build_dir: 'zstd-1.5.7',
    build_commands: ['cmake -GNinja -Sbuild/cmake -Bout -DCMAKE_INSTALL_PREFIX=install -DCMAKE_C_FLAGS="-march=native"', 'ninja -C out install'],
    levels: [*(1..6), 9, 13, 16, 19],
    compress: ->exe, lvl, i, o, thr { "#{exe} -q -T#{thr} -#{lvl} -c '#{i}' > '#{o}'" },
    decompress: ->exe, i, o, thr { "#{exe} -d -q -T#{thr} -c '#{i}' > '#{o}'" },
  },
 }

 class MemoryMonitor
  def self.measure_command(cmd)
    begin
      if RUBY_PLATFORM.include?('linux')
        return measure_with_time_linux(cmd)
      elsif RUBY_PLATFORM.include?('darwin')
        return measure_with_time_macos(cmd)
      else
        raise 0
      end
    rescue => e
      # Fallback: run without memory measurement
      { success: system(cmd, out: File::NULL, err: File::NULL), memory_kb: nil }
    end
  end

  private

  def self.measure_with_time_linux(cmd)
    # Use GNU time -v to get max RSS in KB.
    time_cmd = "/usr/bin/time -v sh -c '#{cmd}' 2>&1"
    output, status = Open3.capture2e(time_cmd)

    if status.success?
      # Parse "Maximum resident set size" from verbose output
      if match = output.match(/Maximum resident set size \(kbytes\):\s*(\d+)/)
        memory_kb = match[1].to_i
        { success: true, memory_kb: memory_kb > 0 ? memory_kb : nil }
      else
        { success: true, memory_kb: nil }
      end
    else
      { success: false, memory_kb: nil }
    end
  end

  def self.measure_with_time_macos(cmd)
    time_cmd = "/usr/bin/time -l #{cmd} 2>&1"
    output, status = Open3.capture2e(time_cmd)

    if status.success?
      # Parse "maximum resident set size" from output (in bytes on macOS)
      if match = output.match(/(\d+)\s+maximum resident set size/)
        memory_bytes = match[1].to_i
        memory_kb = memory_bytes / 1024
        { success: true, memory_kb: memory_kb > 0 ? memory_kb : nil }
      else
        { success: true, memory_kb: nil }
      end
    else
      { success: false, memory_kb: nil }
    end
  end
 end

 class CompressorBuilder
  def initialize(work_dir)
    @work_dir = File.expand_path(work_dir)
    FileUtils.mkdir_p(@work_dir)
  end

  def build_all
    COMPRESSORS.each do |name, config|
      bin_dir = File.join(@work_dir, config[:build_dir], 'install/bin')
      if begin !Dir.empty?(bin_dir) rescue false end
        puts "✓ #{name} already built"
      else
        download_and_extract(name, config)
        build_compressor(name, config)
      end

      if Dir.exist?(bin_dir)
        program = File.join(bin_dir, name)
        unless File.exist? program
          binaries = Dir.glob(File.join(bin_dir, '*')).select { |f| File.executable?(f) }
          raise "Compressor not found, available executables: #{binaries.map { |b| File.basename(b) }.join(', ')}"
        end
        puts "  Executable: #{program}"
        config[:program] = program
      end
    end
  end

  private

  def download_and_extract(name, config)
    filename = config.fetch(:source_filename, File.basename(config[:url]))
    filepath = File.join(@work_dir, filename)

    unless File.exist?(filepath)
      puts "Downloading #{name}..."
      download_file(config[:url], filepath)
    end

    extract_path = File.join(@work_dir, config[:build_dir])
    unless Dir.exist?(extract_path)
      puts "Extracting #{filename}..."
      if filename.end_with? 'zip'
        system("unzip", filepath, "-d", @work_dir) or
          raise "Failed to extract #{filepath}"
      else
        system("tar", "-xf", filepath, "-C", @work_dir) or
          raise "Failed to extract #{filepath}"
      end
    end
  end

  def download_file(url, filepath)
    uri = URI(url)

    Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http|
      request = Net::HTTP::Get.new(uri)

      # Follow redirects
      response = http.request(request)
      case response
      when Net::HTTPRedirection
        download_file(response['location'], filepath)
        return
      when Net::HTTPSuccess
        File.open(filepath, 'wb') { |f| f.write(response.body) }
      else
        raise "Failed to download #{url}: #{response.code} #{response.message}"
      end
    end
  end

  def build_compressor(name, config)
    build_path = File.join(@work_dir, config[:build_dir])
    unless Dir.exist?(build_path)
      raise "Build directory not found: #{build_path}"
    end

    puts "Building #{name}..."
    Dir.chdir(build_path) do
      config[:build_commands].each do |command|
        puts "Running: #{command}"
        success = system(command)
        unless success
          raise "Build command failed: #{command}"
        end
      end
    end
  end
 end

 class CompressorBenchmark
  def initialize(input_file, options)
    @input_file = input_file
    @invalidate_cache = options[:invalidate_cache]
    @test_threading = options[:test_threading]
    @thread_counts = options[:thread_counts]

    unless File.exist?(@input_file)
      puts "Error: Input file '#{@input_file}' not found!"
      exit 1
    end
    puts "Input file: #{@input_file}"

    @input_size = File.size(@input_file)
    @input_hash = Digest::SHA256.file(@input_file).hexdigest[0..16]
    @cache_file = "cache_#{File.basename(@input_file)}_#{@input_hash}.json"
    puts "File size: #{@input_size} bytes"
    puts "Cache file: #{@cache_file}"
    puts "Thread counts to test: #{@thread_counts.join(', ')}" if @test_threading
    puts "Memory measurement: enabled (default)"

    if @invalidate_cache && File.exist?(@cache_file)
      File.delete(@cache_file)
      puts "Cache invalidated."
    end

    # Load cached results
    @results = {}
    if File.exist?(@cache_file)
      begin
        cached_data = JSON.parse(File.read(@cache_file))
        @results = cached_data.transform_values do |compressor_data|
          compressor_data.map { |point| point.transform_keys(&:to_sym) }
        end
        puts "Loaded cached results for #{@results.keys.join(', ')}"
      rescue JSON::ParserError
        puts "Warning: Corrupted cache file, starting fresh"
      end
    end
    puts
  end

  def save_cache
    File.open(@cache_file, 'w') do |f|
      f.write(JSON.pretty_generate(@results))
    end
  end

  def run_benchmark
    COMPRESSORS.each do |name, config|
      puts "Testing #{name}..."
      program = config[:program]
      compress_method = config[:compress]
      decompress_method = config[:decompress]
      supports_threading = config.fetch(:supports_threading, true)
      executed = false
      @results[name] ||= []

      thread_counts_to_test = @test_threading && supports_threading ? @thread_counts : [1]

      config[:levels].each do |level|
        thread_counts_to_test.each do |threads|
          cache_key = { level: level, threads: threads }
          if @results.key?(name) && @results[name].any? { |x|
            x[:level] == level && x[:threads] == threads &&
            x[:compress_memory_mb] && x[:decompress_memory_mb]
          }
            next
          end

          thread_info = threads > 1 ? " (#{threads} threads)" : ""
          print "  Level #{level}#{thread_info}... "
          executed = true
          compressed_size = nil
          compress_time = nil
          decompress_time = nil
          compress_memory_kb = nil
          decompress_memory_kb = nil

          # Test compression. zpaq requires the .zpaq extension name.
          Tempfile.create(config.key?(:extension) ? ['compressed', config[:extension]] : 'compressed') do |compressed_file|
            cmd = compress_method.call(program, level, @input_file, compressed_file.path, threads)

            compress_time = Benchmark.realtime do
              result = MemoryMonitor.measure_command(cmd)
              unless result[:success]
                puts "Compression failed: #{cmd}"
                exit 2
              end
              compress_memory_kb = result[:memory_kb]
            end

            compressed_size = File.size(compressed_file.path)

            # Test decompression
            Tempfile.create('decompressed') do |decompressed_file|
              cmd = decompress_method.call(program, compressed_file.path, decompressed_file.path, threads)

              decompress_time = Benchmark.realtime do
                result = MemoryMonitor.measure_command(cmd)
                unless result[:success]
                  puts "Decompression failed: #{cmd}"
                  exit 2
                end
                decompress_memory_kb = result[:memory_kb]
              end

              if config.key?(:decompress_dir)
                FileUtils.mv File.join(config[:decompress_dir], File.basename(@input_file)), decompressed_file.path
              end

              # Verify decompression
              decompressed_size = File.size(decompressed_file.path)
              if decompressed_size != @input_size
                puts "Decompression verification failed: size mismatch (#{decompressed_size} != #{@input_size})"
                exit 2
              end
            end
          end

          compress_speed = @input_size * 1e-6 / compress_time
          decompress_speed = @input_size * 1e-6 / decompress_time
          compression_ratio = @input_size.to_f / compressed_size

          result_entry = {
            level: level,
            threads: threads,
            compress_speed: compress_speed,
            decompress_speed: decompress_speed,
            ratio: compression_ratio,
            compressed_size: compressed_size,
            supports_threading: supports_threading,
            compress_memory_mb: compress_memory_kb ? compress_memory_kb / 1024.0 : nil,
            decompress_memory_mb: decompress_memory_kb ? decompress_memory_kb / 1024.0 : nil
          }

          # Update or add the result
          existing_index = @results[name].find_index { |x| x[:level] == level && x[:threads] == threads }
          if existing_index
            @results[name][existing_index].merge!(result_entry)
          else
            @results[name] << result_entry
          end

          output = "C: #{compress_time.round(3)}s (#{compress_speed.round(1)} MB/s)"
          output += ", Mem: #{result_entry[:compress_memory_mb].round(1)} MB" if result_entry[:compress_memory_mb]
          output += " | D: #{decompress_time.round(3)}s (#{decompress_speed.round(1)} MB/s)"
          output += ", Mem: #{result_entry[:decompress_memory_mb].round(1)} MB" if result_entry[:decompress_memory_mb]
          output += " | Size: #{compressed_size} bytes"
          puts output

          # Save cache after each test to avoid losing work
          save_cache
        end
      end

      unless executed
        puts "Skipping #{name} (using cached results)..."
      end
      puts
    end
  end

  def generate_html_plot
    # Filter results to only include compressors that are defined in COMPRESSORS
    # and only include entries with the specified thread counts
    filtered_results = {}
    @results.each do |compressor_name, data|
      if COMPRESSORS.key?(compressor_name)
        filtered_data = data.select { |point| @thread_counts.include?(point[:threads] || 1) }
        filtered_results[compressor_name] = filtered_data unless filtered_data.empty?
      end
    end
    data_json = JSON.generate(filtered_results)

    html_content = <<~HTML
 <!DOCTYPE html>
 <html>
 <head>
  <title>Compression Comparison</title>
  <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
  <style>
    .container {
      max-width: 1600px;
      margin: 0 auto;
      background: white;
      padding: 30px;
      border-radius: 15px;
      box-shadow: 0 10px 30px rgba(0,0,0,0.2);
    }
    .chart-row {
      display: flex;
      gap: 20px;
      margin-bottom: 30px;
    }
    .chart-container {
      flex: 1;
      height: 400px;
      position: relative;
    }
    .stats-table {
      width: 100%;
      border-collapse: collapse;
      margin-top: 20px;
    }
    .stats-table th, .stats-table td {
      padding: 8px 12px;
      text-align: left;
      border-bottom: 1px solid #ddd;
      font-size: 0.9em;
    }
    .stats-table th {
      background-color: #34495e;
      color: white;
      font-weight: bold;
    }
    .stats-table tr:nth-child(even) {
      background-color: #f2f2f2;
    }
    .cache-info {
      background: #d5dbdb;
      padding: 15px;
      border-radius: 5px;
      margin-bottom: 20px;
      font-size: 0.9em;
    }
  </style>
 </head>
 <body>
  <div class="container">
    <h1>Compression & Decompression Comparison</h1>

    <div class="cache-info">
      <strong>Input file:</strong> #{File.basename(@input_file)} (#{(@input_size/1024.0/1024.0).round(2)} MB)<br>
      <strong>File hash:</strong> #{@input_hash}<br>
      <strong>Cache file:</strong> #{@cache_file}<br>
      <strong>Compressors tested:</strong> #{@results.keys.join(', ')}<br>
      <strong>Thread counts tested:</strong> #{@thread_counts.join(', ')}<br>
      <strong>Memory measurement:</strong> enabled
    </div>

    <div class="chart-row">
      <div class="chart-container">
        <canvas id="compressionChart"></canvas>
      </div>
      <div class="chart-container">
        <canvas id="decompressionChart"></canvas>
      </div>
    </div>
    <div class="chart-container" style="height: 500px;">
      <canvas id="combinedChart"></canvas>
    </div>

    <div class="chart-row">
      <div class="chart-container">
        <canvas id="compressMemoryChart"></canvas>
      </div>
      <div class="chart-container">
        <canvas id="decompressMemoryChart"></canvas>
      </div>
    </div>

    #{@test_threading ? '<div class="chart-row">
      <div class="chart-container">
        <canvas id="threadingChart"></canvas>
      </div>
    </div>' : ''}

    <table class="stats-table">
      <thead>
        <tr>
          <th>Compressor</th>
          <th>Level</th>
          #{@test_threading ? '<th>Threads</th>' : ''}
          <th>Compress Speed (MB/s)</th>
          <th>Decompress Speed (MB/s)</th>
          <th>Compression Ratio</th>
          <th>Compressed Size (KB)</th>
          <th>Compress Memory (MB)</th>
          <th>Decompress Memory (MB)</th>
        </tr>
      </thead>
      <tbody>
        #{@results.flat_map do |compressor, data|
          data.map do |point|
            size_kb = (point[:compressed_size] / 1024.0).round(1)
            threads_cell = @test_threading ? "<td>#{point[:threads] || 1}</td>" : ""
            memory_cells = "<td>#{point[:compress_memory_mb] ? point[:compress_memory_mb].round(1) : 'N/A'}</td><td>#{point[:decompress_memory_mb] ? point[:decompress_memory_mb].round(1) : 'N/A'}</td>"

            "<tr><td style=\"color: var(--#{compressor}-color); font-weight: bold;\">#{compressor}</td><td>#{point[:level]}</td>#{threads_cell}<td>#{point[:compress_speed].round(2)}</td><td>#{point[:decompress_speed].round(2)}</td><td>#{point[:ratio].round(2)}</td><td>#{size_kb}</td>#{memory_cells}</tr>"
          end
        end.join("\n              ")}
      </tbody>
    </table>
  </div>

  <script>
    const rawData = #{data_json};
    const testingThreading = #{@test_threading};

    function MurmurOAAT32(key) {
      let h = 3323198485n;
      for (let i = 0; i < key.length; i++) {
        h ^= BigInt(key.charCodeAt(i));
        h = (h * 0x5bd1e995n) & 0xFFFFFFFFn;
        h ^= h >> 15n;
      }
      return h;
    }

    // Generate dynamic colors and create CSS variables
    const compressorNames = Object.keys(rawData);
    const colors = {};
    compressorNames.forEach(name => {
      const rn = MurmurOAAT32(name);
      const sat = 40n + rn % 40n;
      const hue = (rn / 50n) % 360n;
      const color = `hsl(${hue}, ${sat}%, 50%)`;
      colors[name] = color;
      document.documentElement.style.setProperty(`--${name}-color`, color);
    });

    // Filter data for single-threaded results for basic charts
    const singleThreadData = {};
    Object.keys(rawData).forEach(name => {
      singleThreadData[name] = rawData[name].filter(d => (d.threads || 1) === 1);
    });

    // Helper function to create scatter plot datasets
    function createScatterDatasets(dataSource, xField, yField, additionalFields = {}) {
      return compressorNames.map(name => {
        const dataPoints = dataSource[name]
          .filter(d => additionalFields.filter ? additionalFields.filter(d) : true)
          .map(d => {
            const point = {
              name: name,
              x: typeof xField === 'function' ? xField(d) : d[xField],
              y: typeof yField === 'function' ? yField(d) : d[yField],
              level: d.level
            };
            // Add any additional fields
            if (additionalFields.extra) {
              additionalFields.extra.forEach(field => {
                point[field] = d[field];
              });
            }
            return point;
          });
        return {
          label: name,
          data: dataPoints,
          backgroundColor: colors[name],
          borderColor: colors[name],
          pointRadius: additionalFields.pointRadius || 5,
          pointHoverRadius: additionalFields.pointHoverRadius || 8,
          showLine: false,
          pointStyle: additionalFields.pointStyle || 'circle'
        };
      });
    }

    // Helper function to create a scatter chart with common options
    function createScatterChart(elementId, datasets, config) {
      const defaultOptions = {
        responsive: true,
        maintainAspectRatio: false,
        plugins: {
          title: {
            display: true,
            text: config.title,
            font: { size: 16 }
          },
          legend: { display: true, position: 'top' },
          tooltip: {
            callbacks: {
              label: config.tooltipCallback || ((context) => {
                const point = context.raw;
                return [`${point.name} L${point.level}`];
              })
            }
          }
        },
        scales: {
          x: { title: { display: true, text: config.xLabel } },
          y: { title: { display: true, text: config.yLabel } }
        }
      };

      // Merge with any custom options
      if (config.customOptions) {
        Object.keys(config.customOptions).forEach(key => {
          if (key === 'scales') {
            Object.assign(defaultOptions.scales.x, config.customOptions.scales.x || {});
            Object.assign(defaultOptions.scales.y, config.customOptions.scales.y || {});
          } else {
            defaultOptions[key] = config.customOptions[key];
          }
        });
      }

      return new Chart(document.getElementById(elementId), {
        type: config.type || 'scatter',
        data: { datasets: datasets },
        options: defaultOptions
      });
    }

    // Common tooltip callback for speed/ratio charts
    const speedRatioTooltip = (context) => {
      const point = context.raw;
      return [
        `${point.name} L${point.level}`,
        `Speed: ${point.x?.toFixed(2) || 'N/A'} MB/s`,
        `Ratio: ${point.y?.toFixed(2) || 'N/A'}x`
      ];
    };

    // Create all charts using the helper functions
    // Compression Speed vs Ratio
    createScatterChart('compressionChart',
      createScatterDatasets(singleThreadData, 'compress_speed', 'ratio'),
      {
        title: 'Compression Speed vs Ratio (1 thread)',
        xLabel: 'Compression Speed (MB/s)',
        yLabel: 'Compression Ratio',
        tooltipCallback: speedRatioTooltip
      }
    );

    // Decompression Speed vs Ratio
    createScatterChart('decompressionChart',
      createScatterDatasets(singleThreadData, 'decompress_speed', 'ratio'),
      {
        title: 'Decompression Speed vs Ratio (1 thread)',
        xLabel: 'Decompression Speed (MB/s)',
        yLabel: 'Compression Ratio',
        tooltipCallback: speedRatioTooltip
      }
    );

    // Combined Speed Comparison
    createScatterChart('combinedChart',
      createScatterDatasets(singleThreadData, 'compress_speed', 'decompress_speed',
        { extra: ['ratio'], pointRadius: 6 }),
      {
        title: 'Compression vs Decompression Speed (1 thread)',
        xLabel: 'Compression Speed (MB/s)',
        yLabel: 'Decompression Speed (MB/s)',
        tooltipCallback: (context) => {
          const point = context.raw;
          return [
            `${point.name} L${point.level}`,
            `C: ${point.x.toFixed(2)} MB/s`,
            `D: ${point.y.toFixed(2)} MB/s`,
            `Ratio: ${point.ratio.toFixed(2)}x`
          ];
        }
      }
    );

    // Memory vs Compression Ratio
    createScatterChart('compressMemoryChart',
      createScatterDatasets(singleThreadData, 'compress_memory_mb', 'ratio',
        { filter: d => d.compress_memory_mb }),
      {
        title: 'Compression Memory Usage vs Compression Ratio (1 thread)',
        xLabel: 'Compression Memory Usage (MB)',
        yLabel: 'Compression Ratio',
        tooltipCallback: (context) => {
          const point = context.raw;
          return [
            `${point.name} L${point.level}`,
            `Memory: ${point.x.toFixed(1)} MB`,
            `Ratio: ${point.y.toFixed(2)}x`
          ];
        }
      }
    );

    createScatterChart('decompressMemoryChart',
      createScatterDatasets(singleThreadData, 'decompress_memory_mb', 'ratio',
        { filter: d => d.compress_memory_mb }),
      {
        title: 'Decompression Memory Usage vs Compression Ratio (1 thread)',
        xLabel: 'Decompression Memory Usage (MB)',
        yLabel: 'Compression Ratio',
        tooltipCallback: (context) => {
          const point = context.raw;
          return [
            `${point.name} L${point.level}`,
            `Memory: ${point.x.toFixed(1)} MB`,
            `Ratio: ${point.y.toFixed(2)}x`
          ];
        }
      }
    );

    // Threading performance chart (if multi-threading was tested)
    if (testingThreading) {
      const threadingDatasets = compressorNames.filter(name =>
        rawData[name].some(d => d.supports_threading && d.threads > 1)
      ).map(name => {
        const levels = [...new Set(rawData[name].map(d => d.level))];
        return levels.map(level => {
          const levelData = rawData[name].filter(d => d.level === level);
          return {
            label: `${name} L${level}`,
            data: levelData.map(d => ({
              x: d.threads,
              y: d.compress_speed,
              name: name,
              level: level
            })),
            backgroundColor: colors[name],
            borderColor: colors[name],
            fill: false,
            tension: 0.1
          };
        });
      }).flat();

      createScatterChart('threadingChart', threadingDatasets, {
        type: 'line',
        title: 'Threading Scaling - Compression Speed',
        xLabel: 'Thread Count',
        yLabel: 'Compression Speed (MB/s)',
        customOptions: {
          scales: { x: { type: 'linear', min: 1 } }
        },
        tooltipCallback: (context) => {
          const point = context.raw;
          return [
            `${point.name} L${point.level}`,
            `Threads: ${point.x}`,
            `Speed: ${point.y.toFixed(2)} MB/s`
          ];
        }
      });
    }
  </script>
 </body>
 </html>
    HTML

    filename = 'compression_comparison.html'
    File.open(filename, 'w') { |f| f.write(html_content) }
    puts "Created #{filename}"
  end
 end

 def show_usage
  puts "Usage: #{$0} [options] <input_file>"
  puts
  puts "Options:"
  puts "  -i                   Delete cached results and rerun all benchmarks"
  puts "  -t N,M               Test multi-threading with specified thread counts (e.g., 1,2,4,8)"
  puts "  --help               Show this help message"
  puts
  puts "Examples:"
  puts "  #{$0} test.txt                           # Single-threaded benchmark with memory"
  puts "  #{$0} -t 1,2,4,8 test.txt                # Multi-threaded benchmark"
  puts "  #{$0} -i test.txt                        # Invalidate cache and rerun"
 end

 options = {
  build: File.join(__dir__, 'compressor_builds'),
  invalidate_cache: false,
  test_threading: false,
  thread_counts: [1],
 }

 parser = OptionParser.new do |opts|
  opts.banner = "Usage: #{$0} [options] <input_file>"
  opts.separator ''
  opts.separator 'Options:'

  opts.on('-b BUILD', 'Specify compressor build directory') do |build|
    options[:build] = File.expand_path(build)
  end
  opts.on('-i', '--invalidate-cache', 'Delete cached results and rerun all benchmarks') do
    options[:invalidate_cache] = true
  end
  opts.on('-t COUNTS', 'Test multi-threading. Specify thread counts to test (e.g., 1,2,4,8)') do |counts|
    begin
      thread_counts = counts.split(',').map(&:to_i)
      if thread_counts.any? { |t| t < 1 }
        raise OptionParser::InvalidArgument, 'Thread counts must be positive integers'
      end
      options[:thread_counts] = thread_counts
      options[:test_threading] = true
    rescue ArgumentError
      raise OptionParser::InvalidArgument, 'Invalid thread counts format'
    end
  end
  opts.on('-h', '--help', 'Show this help message') do
    puts opts
    puts ''
    puts 'Examples:'
    puts "  #{$0} test.txt           # Single-threaded benchmark with memory"
    puts "  #{$0} -t 1,2,4 test.txt  # Custom thread counts"
    exit 0
  end
 end

 begin
  parser.parse!
 rescue => e
  puts "Error: #{e.message}"
  puts parser
  exit 1
 end

 if ARGV.length != 1
  puts 'Error: Specify one input file'
  puts parser
  exit 1
 end

 CompressorBuilder.new(options[:build]).build_all

 benchmark = CompressorBenchmark.new(ARGV[0], options)
 benchmark.run_benchmark
 benchmark.generate_html_plot
diff --git a/bench_test.rb b/bench_test.rb
 #!/usr/bin/env ruby
 require 'fileutils'
 require 'json'
 require 'tempfile'
 require 'test/unit'

 class BenchmarkTest < Test::Unit::TestCase
  def setup
    @test_dir = Dir.mktmpdir("bench_test")
    @original_dir = Dir.pwd
    build = File.join(@original_dir, 'compressor_builds')
    Dir.chdir(@test_dir)

    # Create test input file
    @input_file = 'test.txt'
    File.open(@input_file, 'w') do |f|
      f.puts "Hello world"
    end

    @bench_script = File.join(@original_dir, 'bench.rb')
  end

  def teardown
    Dir.chdir(@original_dir)
    FileUtils.rm_rf(@test_dir)
  end

  def test_cache_reuse
    # First run with threads 1,4
    output1 = `ruby #{@bench_script} #{@input_file} -t 1,4 2>&1`
    assert_equal 0, $?.exitstatus
    cache_file = Dir.glob("cache_*.json").first
    assert cache_file, "Should create cache file"
    assert File.exist?('compression_comparison.html')
    points_1 = JSON.parse(File.read(cache_file)).values.map(&:length).sum

    output2 = `ruby #{@bench_script} #{@input_file} -t 1 2>&1`
    assert_equal 0, $?.exitstatus
    assert_match(/cached results/, output2)
    assert_not_match(/Level 1/, output2)

    # Second run with threads 1,2,4 - should reuse 1,4 and add 2
    output3 = `ruby #{@bench_script} #{@input_file} -t 1,2,4 2>&1`
    assert_equal 0, $?.exitstatus
    assert_match(/Level 1/, output3)
    points_3 = JSON.parse(File.read(cache_file)).values.map(&:length).sum
    assert points_3 > points_1, "Should have more data points after adding thread count 2"
  end

  def test_cache_invalidation
    # First run
    `ruby #{@bench_script} #{@input_file} 2>&1`
    cache_file = Dir.glob("cache_*.json").first
    assert cache_file, "Should create cache file"
    original_mtime = File.mtime(cache_file)

    # Run with cache invalidation
    output = `ruby #{@bench_script} #{@input_file} -i 2>&1`
    assert_equal 0, $?.exitstatus
    assert File.mtime(cache_file) > original_mtime, "Cache should be regenerated"
  end
 end

 exit Test::Unit::AutoRunner.run
	#!/usr/bin/env ruby
	require 'fileutils'
	require 'json'
	require 'tempfile'
	require 'test/unit'

	class BenchmarkTest < Test::Unit::TestCase
	def setup
	@test_dir = Dir.mktmpdir("bench_test")
	@original_dir = Dir.pwd
	build = File.join(@original_dir, 'compressor_builds')
	Dir.chdir(@test_dir)

	# Create test input file
	@input_file = 'test.txt'
	File.open(@input_file, 'w') do \|f\|
	f.puts "Hello world"
	end

	@bench_script = File.join(@original_dir, 'bench.rb')
	end

	def teardown
	Dir.chdir(@original_dir)
	FileUtils.rm_rf(@test_dir)
	end

	def test_cache_reuse
	# First run with threads 1,4
	output1 = `ruby #{@bench_script} #{@input_file} -t 1,4 2>&1`
	assert_equal 0, $?.exitstatus
	cache_file = Dir.glob("cache_*.json").first
	assert cache_file, "Should create cache file"
	assert File.exist?('compression_comparison.html')
	points_1 = JSON.parse(File.read(cache_file)).values.map(&:length).sum

	output2 = `ruby #{@bench_script} #{@input_file} -t 1 2>&1`
	assert_equal 0, $?.exitstatus
	assert_match(/cached results/, output2)
	assert_not_match(/Level 1/, output2)

	# Second run with threads 1,2,4 - should reuse 1,4 and add 2
	output3 = `ruby #{@bench_script} #{@input_file} -t 1,2,4 2>&1`
	assert_equal 0, $?.exitstatus
	assert_match(/Level 1/, output3)
	points_3 = JSON.parse(File.read(cache_file)).values.map(&:length).sum
	assert points_3 > points_1, "Should have more data points after adding thread count 2"
	end

	def test_cache_invalidation
	# First run
	`ruby #{@bench_script} #{@input_file} 2>&1`
	cache_file = Dir.glob("cache_*.json").first
	assert cache_file, "Should create cache file"
	original_mtime = File.mtime(cache_file)

	# Run with cache invalidation
	output = `ruby #{@bench_script} #{@input_file} -i 2>&1`
	assert_equal 0, $?.exitstatus
	assert File.mtime(cache_file) > original_mtime, "Cache should be regenerated"
	end
	end

	exit Test::Unit::AutoRunner.run
No results found