Skip to content

Instantly share code, notes, and snippets.

@cbankston
Last active May 5, 2016 17:18
Show Gist options
  • Save cbankston/226e9dc566cc8730102f54b8fa5f20a9 to your computer and use it in GitHub Desktop.
Save cbankston/226e9dc566cc8730102f54b8fa5f20a9 to your computer and use it in GitHub Desktop.
de-dupe algorithm benchmark for teammates
MAX = 10_000
def dedupe_alltogether(results)
results = []
results.each do |result|
results.push(result) unless results.include?(result)
end
end
def dedupe_by_vendor(results_by_vendor)
results = []
results_by_vendor.each do |vendor, vendor_results|
vendor_results.each do |result|
results.push(result) unless results.include?(result)
end
end
end
results_with_dupes = []
results_with_dupes.push(*(1..MAX).to_a)
results_with_dupes.push(*(1..MAX).to_a)
results_with_dupes.push(*(1..MAX).to_a)
results_with_dupes.push(*(1..MAX).to_a)
results_without_dupes = []
results_without_dupes.push(*(1..(MAX * 4)).to_a)
vendor_results_with_dupes = {}
vendor_results_with_dupes[1] = (1..MAX).to_a
vendor_results_with_dupes[2] = (1..MAX).to_a
vendor_results_with_dupes[3] = (1..MAX).to_a
vendor_results_with_dupes[4] = (1..MAX).to_a
vendor_results_without_dupes = {}
vendor_results_without_dupes[1] = (1..MAX).to_a
vendor_results_without_dupes[2] = ((MAX+1)..(MAX*2)).to_a
vendor_results_without_dupes[3] = ((MAX*2+1)..(MAX*3)).to_a
vendor_results_without_dupes[4] = ((MAX*3+1)..(MAX*4)).to_a
puts '#' * 40
puts 'alltogether with dupes results length:'
puts results_with_dupes.length
puts '#' * 40
puts 'alltogether without dupes results length:'
puts results_without_dupes.length
puts '#' * 40
puts 'by_vendor with dupes results length:'
puts vendor_results_with_dupes.map{ |_, vr| vr.length }.inject(:+)
puts '#' * 40
puts 'by_vendor without dupes results length:'
puts vendor_results_without_dupes.map{ |_, vr| vr.length }.inject(:+)
puts '#' * 40
puts ''
require 'benchmark'
Benchmark.bmbm do |x|
x.report("dedupe alltogether with dupes") { dedupe_alltogether(results_with_dupes) }
x.report("dedupe alltogether without dupes") { dedupe_alltogether(results_without_dupes) }
x.report("dedupe by vendor with dupes") { dedupe_by_vendor(vendor_results_with_dupes) }
x.report("dedupe by vendor without dupes") { dedupe_by_vendor(vendor_results_without_dupes) }
end
root@0460be381725:/myapp# ruby tmp.rb
########################################
alltogether with dupes results length:
40000
########################################
alltogether without dupes results length:
40000
########################################
by_vendor with dupes results length:
40000
########################################
by_vendor without dupes results length:
40000
########################################
Rehearsal --------------------------------------------------------------------
dedupe alltogether with dupes 0.000000 0.000000 0.000000 ( 0.000003)
dedupe alltogether without dupes 0.000000 0.000000 0.000000 ( 0.000002)
dedupe by vendor with dupes 5.040000 0.000000 5.040000 ( 5.043795)
dedupe by vendor without dupes 20.090000 0.010000 20.100000 ( 20.117038)
---------------------------------------------------------- total: 25.140000sec
user system total real
dedupe alltogether with dupes 0.000000 0.000000 0.000000 ( 0.000004)
dedupe alltogether without dupes 0.000000 0.000000 0.000000 ( 0.000004)
dedupe by vendor with dupes 5.030000 0.000000 5.030000 ( 5.036925)
dedupe by vendor without dupes 20.920000 0.000000 20.920000 ( 20.929647)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment