Skip to content

Instantly share code, notes, and snippets.

@dhoss
Last active August 29, 2015 14:02
Show Gist options
  • Save dhoss/e9788b0155cb399109b2 to your computer and use it in GitHub Desktop.
Save dhoss/e9788b0155cb399109b2 to your computer and use it in GitHub Desktop.
with recursive and with recursive inside select .. in (...)
devin@predaking:~/projects/web/treeify$ ruby bin/bm_in_query_vs_execute.rb
-- drop_table(:nodes)
-> 0.0163s
-- create_table(:nodes)
-> 0.0058s
-- add_index(:nodes, [:parent_id, :id], {:unique=>true})
-> 0.0037s
user system total real
in 0.000000 0.000000 0.000000 ( 0.000090)
raw 0.000000 0.000000 0.000000 ( 0.003632)
require "benchmark"
require_relative "../lib/treeify"
include Benchmark
ActiveRecord::Base.establish_connection(
:adapter => 'postgresql',
:database => 'tree_test',
:username => 'postgres'
)
class Node < ActiveRecord::Base
include Treeify
config({:cols => [:name]})
validates_uniqueness_of :name
validates_uniqueness_of :parent_id, :scope=> :id
end
class NodeSetup < ActiveRecord::Migration
class << self
def up
create_table :nodes do |t|
t.text :name
t.integer :parent_id
t.references :parent
end
add_index :nodes, [:parent_id, :id], :unique => true
end
def down
drop_table :nodes
end
end
end
NodeSetup.down
NodeSetup.up
nodes = []
30.times do |i|
nodes[i] = []
parent = Node.create(:name => "root_#{i}")
100.times do |j|
node = Node.new(:name => "node_#{i}_#{j}")
_parent = nodes[i][rand(nodes[i].size)] || parent
node.parent_id = _parent.id
node.save
nodes[i] << node
end
end
node = Node.first
puts "IN(..) SQL"
puts node.self_and_descendents.to_sql
puts "Raw"
puts Node.tree_sql_for(node)
Benchmark.bm(100) do |q|
q.report("in") { node.self_and_descendents }
q.report("raw") { node.self_and_descendents_raw }
end
WITH RECURSIVE cte (id, path) AS (
SELECT id,
array[id] AS path
FROM nodes
WHERE id = 1
UNION ALL
SELECT nodes.id,
cte.path || nodes.id
FROM nodes
JOIN cte ON nodes.parent_id = cte.id
)
SELECT id FROM cte
ORDER BY path
require 'active_record'
require "active_support/concern"
require "active_support/core_ext/module/attribute_accessors"
require "active_support/core_ext/class/attribute"
module Treeify
extend ActiveSupport::Concern
included do
has_many :children,
class_name: self,
foreign_key: "parent_id"
belongs_to :parent,
class_name: self,
foreign_key: "parent_id"
class_attribute :cols
scope :roots, -> { where(parent_id: nil) }
## pertinent select ... in(..) query
scope :tree_for, ->(instance) { where("#{table_name}.id IN (#{tree_sql_for(instance)})").order("#{table_name}.id") }
scope :tree_for_ancestors, ->(instance) {
where("#{table_name}.id IN (#{tree_sql_for_ancestors(instance)})")
.order("#{table_name}.id")
}
end
module ClassMethods
def config(hash = {})
# apparently columns is a reserved word in rails
self.cols = hash[:cols]
end
## pertinent raw query
def tree_for_raw(instance)
query = tree_sql_for(instance)
ActiveRecord::Base.connection.execute(query)
end
def tree_sql(instance)
"WITH RECURSIVE cte (id, path) AS (
SELECT id,
array[id] AS path
FROM #{table_name}
WHERE id = #{instance.id}
UNION ALL
SELECT #{table_name}.id,
cte.path || #{table_name}.id
FROM #{table_name}
JOIN cte ON #{table_name}.parent_id = cte.id
)"
end
def tree_sql_for(instance)
"#{tree_sql(instance)}
SELECT id FROM cte
ORDER BY path"
end
def tree_sql_for_ancestors(instance)
"WITH RECURSIVE cte (id, path) AS (
SELECT id,
array[id] AS path
FROM #{table_name}
WHERE id = #{instance.id}
UNION ALL
SELECT #{table_name}.id,
cte.path || #{table_name}.id
FROM #{table_name}
JOIN cte ON #{table_name}.parent_id = cte.id
)
SELECT cte.id FROM cte WHERE cte.id != #{instance.id}"
end
end
def descendents
self_and_descendents - [self]
end
def ancestors
self.class.tree_for_ancestors(self)
end
def self_and_descendents
self.class.tree_for(self)
end
def self_and_descendents_raw
self.class.tree_for_raw(self)
end
def is_root?
self.parent_id != nil
end
def siblings
self.class.where(parent_id: self.parent_id) - [self]
end
end
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------
Sort (cost=646.37..646.87 rows=201 width=36) (actual time=3.681..3.685 rows=101 loops=1)
Sort Key: cte.path
Sort Method: quicksort Memory: 35kB
CTE cte
-> Recursive Union (cost=0.00..634.66 rows=201 width=36) (actual time=0.016..3.551 rows=101 loops=1)
-> Index Scan using nodes_pkey on nodes (cost=0.00..8.27 rows=1 width=4) (actual time=0.015..0.016 rows=1 loops=1)
Index Cond: (id = 1)
-> Hash Join (cost=0.33..62.24 rows=20 width=36) (actual time=0.063..0.439 rows=12 loops=8)
Hash Cond: (public.nodes.parent_id = cte.id)
-> Seq Scan on nodes (cost=0.00..50.30 rows=3030 width=8) (actual time=0.003..0.186 rows=3030 loops=8)
-> Hash (cost=0.20..0.20 rows=10 width=36) (actual time=0.003..0.003 rows=13 loops=8)
Buckets: 1024 Batches: 1 Memory Usage: 1kB
-> WorkTable Scan on cte (cost=0.00..0.20 rows=10 width=36) (actual time=0.000..0.002 rows=13 loops=8)
-> CTE Scan on cte (cost=0.00..4.02 rows=201 width=36) (actual time=0.018..3.573 rows=101 loops=1)
Total runtime: 3.724 ms
(15 rows)
SELECT "nodes".* FROM "nodes" WHERE (nodes.id IN (WITH RECURSIVE cte (id, path) AS (
SELECT id,
array[id] AS path
FROM nodes
WHERE id = 1
UNION ALL
SELECT nodes.id,
cte.path || nodes.id
FROM nodes
JOIN cte ON nodes.parent_id = cte.id
)
SELECT id FROM cte
ORDER BY path)) ORDER BY nodes.id
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------
Merge Semi Join (cost=656.57..779.86 rows=1515 width=18) (actual time=5.616..5.678 rows=101 loops=1)
Merge Cond: (public.nodes.id = "ANY_subquery".id)
-> Index Scan using nodes_pkey on nodes (cost=0.00..112.70 rows=3030 width=18) (actual time=0.030..0.047 rows=102 loops=1)
-> Sort (cost=656.57..657.08 rows=201 width=4) (actual time=5.579..5.586 rows=101 loops=1)
Sort Key: "ANY_subquery".id
Sort Method: quicksort Memory: 29kB
-> Subquery Scan on "ANY_subquery" (cost=646.37..648.88 rows=201 width=4) (actual time=5.518..5.537 rows=101 loops=1)
-> Sort (cost=646.37..646.87 rows=201 width=36) (actual time=5.518..5.524 rows=101 loops=1)
Sort Key: cte.path
Sort Method: quicksort Memory: 35kB
CTE cte
-> Recursive Union (cost=0.00..634.66 rows=201 width=36) (actual time=0.011..5.303 rows=101 loops=1)
-> Index Scan using nodes_pkey on nodes (cost=0.00..8.27 rows=1 width=4) (actual time=0.010..0.011 rows=1 loops=1)
Index Cond: (id = 1)
-> Hash Join (cost=0.33..62.24 rows=20 width=36) (actual time=0.089..0.655 rows=12 loops=8)
Hash Cond: (public.nodes.parent_id = cte.id)
-> Seq Scan on nodes (cost=0.00..50.30 rows=3030 width=8) (actual time=0.003..0.239 rows=3030 loops=8)
-> Hash (cost=0.20..0.20 rows=10 width=36) (actual time=0.005..0.005 rows=13 loops=8)
Buckets: 1024 Batches: 1 Memory Usage: 1kB
-> WorkTable Scan on cte (cost=0.00..0.20 rows=10 width=36) (actual time=0.000..0.002 rows=13 loops=8)
-> CTE Scan on cte (cost=0.00..4.02 rows=201 width=36) (actual time=0.013..5.346 rows=101 loops=1)
Total runtime: 5.756 ms
(22 rows)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment