Skip to content

Instantly share code, notes, and snippets.

@knugie
Last active May 25, 2022 19:47
Show Gist options
  • Save knugie/49b3598095a3fb2cf0fd to your computer and use it in GitHub Desktop.
Save knugie/49b3598095a3fb2cf0fd to your computer and use it in GitHub Desktop.
Count unique elements in Array
require 'benchmark'
require 'open3'
require 'histogram/array'
array = Array.new(10_000_000) { rand(60) }
puts 'uniq, count:'
uniq_count = Benchmark.measure do
array.uniq.map { |key| array.count(key) }
end
puts uniq_count
puts 'system call (awk):'
system_call = Benchmark.measure do
sys_count = "awk '{dups[$1]++} END{for (num in dups) {print num,dups[num]}}'"
Open3.popen3(sys_count) do |i, o, _e, _t|
i.write array
i.close
o.read
end
end
puts system_call
puts 'histogram:'
histogram = Benchmark.measure do
array.histogram
end
puts histogram
puts 'each_with_object:'
each_with_object = Benchmark.measure do
array.each_with_object({}) { |x, mem| mem.key?(x) ? mem[x] += 1 : mem[x] = 1 }
end
puts each_with_object
puts 'count hash:'
count_hash = Benchmark.measure do
count = Hash.new(0)
array.each { |val| count[val] += 1 }
count
end
puts count_hash
puts 'group_by:'
group_by = Benchmark.measure do
array.group_by { |v| v }.map { |k, v| [k, v.size] }
end
puts group_by
# sample output:
# uniq, count:
# 17.740000 0.010000 17.750000 ( 17.761145)
# system call (awk):
# 1.750000 0.030000 7.400000 ( 7.404668)
# histogram:
# 4.690000 0.000000 4.690000 ( 4.706780)
# each_with_object:
# 1.740000 0.000000 1.740000 ( 1.742432)
# count hash:
# 1.110000 0.000000 1.110000 ( 1.125301)
# group_by:
# 1.000000 0.050000 1.050000 ( 1.049218)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment