Skip to content

Instantly share code, notes, and snippets.

@leucos
Last active August 29, 2015 14:10
Show Gist options
  • Save leucos/1cbe95154118d96fe447 to your computer and use it in GitHub Desktop.
Save leucos/1cbe95154118d96fe447 to your computer and use it in GitHub Desktop.
Unioning & sorting (redis + ruby)
#!/bin/env ruby
#
require 'hiredis'
require 'redis'
require 'benchmark'
require 'pp'
TOTAL_PAGES_PER_WINDOW = 15_000_000
CATEGORY_COUNT = 100
ARTICLES_COUNT = 150
DAYS_WINDOW = 30
redis = Redis.new(:driver => :hiredis)
# Cleanup
puts "Cleaning up keys"
redis.del "es_merged"
redis.pipelined do
CATEGORY_COUNT.times do |c|
DAYS_WINDOW.times do |d|
redis.del "es_categorie_#{c}_#{d}"
end
end
end
# How many views do we get each day
views_per_day = (TOTAL_PAGES_PER_WINDOW/(CATEGORY_COUNT*DAYS_WINDOW)).to_i
# Adding some page hits
puts "Randomly incrementing page hits (15M hits inserted)"
puts Benchmark.measure {
# Every day...
DAYS_WINDOW.times do |day|
# ...articles get "views_per_day" hits
views_per_day.times do |hit|
redis.pipelined do
# Let's dispatch each hit randomly in articles
# Each category has a key for every day in sliding window, so the key looks like :
# es_categorie_<categoty_id>_<day_number>)
# For each category/day key, we have one item per article, which holds hit score
category = rand(CATEGORY_COUNT)
redis.zincrby "es_categorie_#{category}_#{day}", 1, "category_#{category}_article_#{rand(ARTICLES_COUNT)}"
end
end
print "#{day}.."
end
}
puts "Getting 25 highest scores across categories"
Benchmark.bm(1_000) do |x|
x.report("unioning and sorting") {
# We have to merge all sets to find best hists
sorted_sets = []
DAYS_WINDOW.times do |day|
sorted_sets += (0..CATEGORY_COUNT).map { |x| "es_categorie_#{x}_#{day}" }
end
# The unionstore will merge keys, and sum elements scores with the same name
redis.zunionstore "es_merged", sorted_sets
redis.zrevrange "es_merged", 0, 25, { :withscores => true }
}
x.report("sorting only") {
redis.zrevrange "es_merged", 0, 25, { :withscores => true }
}
end
puts "25 highest scores across categories :"
redis.zrevrange("es_merged", 0, 25, { :withscores => true }).each do |a|
puts "#{a[0]} => #{a[1]} total views"
end
$ ruby redis-sorted-set-tests.rb
Cleaning up keys
Randomly incrementing page hits (15M hits inserted)
0..1..2..3..4..5..6..7..8..9..10..11..12..13..14..15..16..17..18..19..20..21..22..23..24..25..26..27..28..29..
5.640000 2.180000 7.820000 ( 9.850677)
Getting 25 highest scores across categories
user system total real
unioning and sorting 0.000000 0.000000 0.000000 ( 0.079645)
sorting only 0.000000 0.000000 0.000000 ( 0.000284)
25 highest scores across categories :
category_15_article_45 => 24.0 total views
category_6_article_18 => 23.0 total views
category_32_article_63 => 23.0 total views
category_25_article_24 => 23.0 total views
category_95_article_89 => 22.0 total views
category_84_article_147 => 22.0 total views
category_81_article_118 => 22.0 total views
category_52_article_71 => 22.0 total views
category_29_article_99 => 22.0 total views
category_20_article_111 => 22.0 total views
$
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment