Skip to content

Instantly share code, notes, and snippets.

@janko
Last active September 16, 2018 23:01
Show Gist options
  • Save janko/ca5158d8d804479bd42d68d51005d46f to your computer and use it in GitHub Desktop.
Save janko/ca5158d8d804479bd42d68d51005d46f to your computer and use it in GitHub Desktop.
Benchmarking different ways of filtering by the associated dataset in SQL
require "sequel"
require "benchmark"
system "createdb testing"
DB = Sequel.postgres("testing")
at_exit do
DB.disconnect
system "dropdb testing"
end
DB.create_table :artists do
primary_key :id
end
DB.create_table :albums do
primary_key :id
foreign_key :artist_id, :artists
Integer :year
end
class Artist < Sequel::Model
one_to_many :albums
end
class Album < Sequel::Model
end
artist_values = []
album_values = []
10_000.times do |idx|
artist_values << { id: idx }
album_values << { artist_id: idx, year: (1960..2018).to_a.sample }
end
DB[:artists].multi_insert(artist_values)
DB[:albums].multi_insert(album_values)
def benchmark(name)
duration = Benchmark.realtime { yield }
puts "#{name} (#{duration})"
end
benchmark("JOIN + DISTINCT") do
Artist
.association_join(:albums)
.where{albums[:year] >= 2000}
.select_all(:artists)
.distinct
.each { |artist| } # load all records
end
benchmark("COUNT subquery") do
new_album_count = Album
.where{year >= 2000}
.where{albums[:artist_id] =~ artists[:id]}
.select{count.function.*}
Artist
.where(new_album_count > 0)
.each { |artist| } # load all records
end
benchmark("IN subquery") do
new_artist_ids = Album
.where{year >= 2000}
.select(:artist_id)
Artist
.where(id: new_artist_ids)
.each { |artist| } # load all records
end
# >> JOIN + DISTINCT (0.012601999915204942)
# >> COUNT subquery (10.89080599998124)
# >> IN subquery (0.008620999986305833)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment