Skip to content

Instantly share code, notes, and snippets.

@jhsu
Forked from qrush/bench.rb
Created March 20, 2010 19:05
Show Gist options
  • Save jhsu/338835 to your computer and use it in GitHub Desktop.
Save jhsu/338835 to your computer and use it in GitHub Desktop.
# Trying to optimize the gemcutter indexing process.
# Since we're dumping out the index ~150 times daily, this is going to help the server load/cpu out immensely.
require 'config/environment'
require 'benchmark'
require 'redis'
redis = Redis.new
def pack(value)
final = StringIO.new
gzip = Zlib::GzipWriter.new(final)
gzip.write(Marshal.dump(value))
gzip.close
final.string
end
Benchmark.bmbm do |bm|
bm.report('with_indexed') do
# this is how the index works now
value = Version.with_indexed(true).map(&:to_index)
pack(value)
end
bm.report('all') do
# trying to optimize the above query
versions = Version.all(:select => "number, platform, position, rubygem_id, indexed, rubygems.name",
:joins => :rubygem,
:order => "rubygems.name asc, position desc", :conditions => {:indexed => true});
indexed = versions.map { |v| [ v['name'], v.to_gem_version, v.platform] }
pack(indexed)
end
bm.report('redis') do
# index:all is a set of all Versions's full names... so something like: [rails-2.3.5, rack-1.0.0...]
members = redis.smembers('index:all')
# at each key is a Marshalled version of the index entry ( [rubygem name, Gem::Version, platform] )
marshals = redis.mget(members).map { |m| Marshal.load(m) }
pack(marshals.join)
end
end
## from Version.with_indexed
Version Load Including Associations (2695.5ms) SELECT "versions"."id" AS t0_r0, "versions"."authors" AS t0_r1, "versions"."description" AS t0_r2, "versions"."number" AS t0_r3, "versions"."rubygem_id" AS t0_r4, "versions"."built_at" AS t0_r5, "versions"."updated_at" AS t0_r6, "versions"."rubyforge_project" AS t0_r7, "versions"."summary" AS t0_r8, "versions"."platform" AS t0_r9, "versions"."created_at" AS t0_r10, "versions"."indexed" AS t0_r11, "versions"."prerelease" AS t0_r12, "versions"."position" AS t0_r13, "versions"."downloads_count" AS t0_r14, "versions"."latest" AS t0_r15, "versions"."full_name" AS t0_r16, "rubygems"."id" AS t1_r0, "rubygems"."name" AS t1_r1, "rubygems"."created_at" AS t1_r2, "rubygems"."updated_at" AS t1_r3, "rubygems"."downloads" AS t1_r4, "rubygems"."slug" AS t1_r5 FROM "versions" LEFT OUTER JOIN "rubygems" ON "rubygems".id = "versions".rubygem_id WHERE ("versions"."indexed" = 't') ORDER BY position
## from trying to optimize it
Version Load (696.6ms) SELECT number, platform, position, rubygem_id, indexed, rubygems.name FROM "versions" INNER JOIN "rubygems" ON "rubygems".id = "versions".rubygem_id WHERE ("versions"."indexed" = 't') ORDER BY rubygems.name asc, position desc, position
gemcutter master % ruby bench.rb
** [NewRelic] Cannot find newrelic.yml file at /Users/qrush/Dev/rails/gemcutter/config/newrelic.yml.
Rehearsal ------------------------------------------------
with_indexed 21.610000 0.440000 22.050000 ( 24.618721)
all 8.670000 0.060000 8.730000 ( 9.381824)
redis 4.570000 0.080000 4.650000 ( 5.481812)
-------------------------------------- total: 35.430000sec
user system total real
with_indexed 21.010000 0.180000 21.190000 ( 23.692446)
all 8.350000 0.050000 8.400000 ( 9.042466)
redis 4.390000 0.070000 4.460000 ( 5.317549)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment