Last active
August 29, 2015 14:10
-
-
Save leucos/1cbe95154118d96fe447 to your computer and use it in GitHub Desktop.
Unioning & sorting (redis + ruby)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env ruby | |
# | |
require 'hiredis' | |
require 'redis' | |
require 'benchmark' | |
require 'pp' | |
TOTAL_PAGES_PER_WINDOW = 15_000_000 | |
CATEGORY_COUNT = 100 | |
ARTICLES_COUNT = 150 | |
DAYS_WINDOW = 30 | |
redis = Redis.new(:driver => :hiredis) | |
# Cleanup | |
puts "Cleaning up keys" | |
redis.del "es_merged" | |
redis.pipelined do | |
CATEGORY_COUNT.times do |c| | |
DAYS_WINDOW.times do |d| | |
redis.del "es_categorie_#{c}_#{d}" | |
end | |
end | |
end | |
# How many views do we get each day | |
views_per_day = (TOTAL_PAGES_PER_WINDOW/(CATEGORY_COUNT*DAYS_WINDOW)).to_i | |
# Adding some page hits | |
puts "Randomly incrementing page hits (15M hits inserted)" | |
puts Benchmark.measure { | |
# Every day... | |
DAYS_WINDOW.times do |day| | |
# ...articles get "views_per_day" hits | |
views_per_day.times do |hit| | |
redis.pipelined do | |
# Let's dispatch each hit randomly in articles | |
# Each category has a key for every day in sliding window, so the key looks like : | |
# es_categorie_<categoty_id>_<day_number>) | |
# For each category/day key, we have one item per article, which holds hit score | |
category = rand(CATEGORY_COUNT) | |
redis.zincrby "es_categorie_#{category}_#{day}", 1, "category_#{category}_article_#{rand(ARTICLES_COUNT)}" | |
end | |
end | |
print "#{day}.." | |
end | |
} | |
puts "Getting 25 highest scores across categories" | |
Benchmark.bm(1_000) do |x| | |
x.report("unioning and sorting") { | |
# We have to merge all sets to find best hists | |
sorted_sets = [] | |
DAYS_WINDOW.times do |day| | |
sorted_sets += (0..CATEGORY_COUNT).map { |x| "es_categorie_#{x}_#{day}" } | |
end | |
# The unionstore will merge keys, and sum elements scores with the same name | |
redis.zunionstore "es_merged", sorted_sets | |
redis.zrevrange "es_merged", 0, 25, { :withscores => true } | |
} | |
x.report("sorting only") { | |
redis.zrevrange "es_merged", 0, 25, { :withscores => true } | |
} | |
end | |
puts "25 highest scores across categories :" | |
redis.zrevrange("es_merged", 0, 25, { :withscores => true }).each do |a| | |
puts "#{a[0]} => #{a[1]} total views" | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ ruby redis-sorted-set-tests.rb | |
Cleaning up keys | |
Randomly incrementing page hits (15M hits inserted) | |
0..1..2..3..4..5..6..7..8..9..10..11..12..13..14..15..16..17..18..19..20..21..22..23..24..25..26..27..28..29.. | |
5.640000 2.180000 7.820000 ( 9.850677) | |
Getting 25 highest scores across categories | |
user system total real | |
unioning and sorting 0.000000 0.000000 0.000000 ( 0.079645) | |
sorting only 0.000000 0.000000 0.000000 ( 0.000284) | |
25 highest scores across categories : | |
category_15_article_45 => 24.0 total views | |
category_6_article_18 => 23.0 total views | |
category_32_article_63 => 23.0 total views | |
category_25_article_24 => 23.0 total views | |
category_95_article_89 => 22.0 total views | |
category_84_article_147 => 22.0 total views | |
category_81_article_118 => 22.0 total views | |
category_52_article_71 => 22.0 total views | |
category_29_article_99 => 22.0 total views | |
category_20_article_111 => 22.0 total views | |
$ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment