-
-
Save FND/1244491 to your computer and use it in GitHub Desktop.
Gemfile.lock |
#!/usr/bin/env ruby | |
# encoding: UTF-8 | |
# test case for eager loading of nested associations with DataMapper | |
require 'rubygems' | |
require 'dm-core' | |
require 'dm-constraints' | |
require 'dm-migrations' | |
require 'eager_loading' | |
DataMapper::Logger.new($stdout, :debug) | |
DataMapper.setup(:default, "sqlite3://#{Dir.pwd}/db.sqlite") | |
class Person | |
include DataMapper::Resource | |
property :id, Serial | |
property :name, String, :required => true | |
has n, :vehicles | |
end | |
class Vehicle | |
include DataMapper::Resource | |
property :id, Serial | |
property :name, String, :required => true | |
has n, :components | |
end | |
class Component | |
include DataMapper::Resource | |
property :id, Serial | |
property :name, String, :required => true | |
belongs_to :manufacturer | |
end | |
class Manufacturer | |
include DataMapper::Resource | |
property :id, Serial | |
property :name, String, :required => true | |
end | |
DataMapper.auto_migrate! | |
# generate test data | |
Person.create(:name => "FND", :vehicles => [ | |
Vehicle.create(:name => "Taurus", :components => [ | |
Component.create(:name => "engine", | |
:manufacturer => Manufacturer.create(:name => "Ford")), | |
Component.create(:name => "radio", | |
:manufacturer => Manufacturer.create(:name => "Bose")) | |
]), | |
Vehicle.create(:name => "fixie", :components => [ | |
Component.create(:name => "frame", | |
:manufacturer => Manufacturer.create(:name => "Campagnolo")), | |
Component.create(:name => "breaks", | |
:manufacturer => Manufacturer.create(:name => "Shimano")) | |
]) | |
]) | |
Person.create(:name => "tillsc", :vehicles => [ | |
Vehicle.create(:name => "Golf", :components => [ | |
Component.create(:name => "engine", | |
:manufacturer => Manufacturer.create(:name => "VW")) | |
]) | |
]) | |
# retrieve data | |
puts "", "[INFO] test case A" | |
person = Person.get!(1) | |
puts person.vehicles.components.manufacturer.map(&:name).join(", ") | |
puts "", "[INFO] test case B" | |
people = Person.all | |
people.each do |person| | |
person.vehicles.each do |vehicle| | |
puts sprintf("%-10s %-10s", person.name, vehicle.name) | |
end | |
end | |
puts "", "[INFO] test case C ===== /!\ n+1 hazard ====" | |
people = Person.all | |
people.each do |person| | |
person.vehicles.each do |vehicle| | |
vehicle.components.each do |component| | |
puts sprintf("%-10s %-10s %-10s", person.name, vehicle.name, component.name) | |
end | |
end | |
end | |
puts "", "[INFO] test case D" | |
people = Person.all | |
people.eager_load(Person.vehicles.components).each do |person| | |
person.vehicles.each do |vehicle| | |
vehicle.components.each do |component| | |
puts sprintf("%-10s %-10s %-10s", person.name, vehicle.name, component.name) | |
end | |
end | |
end |
# manual eager loading for DataMapper | |
# adapted from Chris Corbyn: https://gist.github.com/1244491#gistcomment-56797 | |
module EagerLoading | |
def eager_load(query_path) | |
scope = self | |
query_path.relationships.each do |relation| | |
source_key = relation.source_key.first # TODO: rename | |
target_key = relation.target_key.first # TODO: rename | |
# for each level in the query path, collect all the resources referencing | |
# keys at the current scope | |
next_scope = relation.target_model.all(target_key.name => scope. | |
collect(&:"#{source_key.name}")) | |
# map target keys to the resources that exist for them | |
links = next_scope.inject({}) do |map, resource| | |
map.merge(target_key.get(resource) => [resource]) { |k, v1, v2| v1 + v2 } | |
end | |
# now pre-load those from the map | |
scope.each do |parent| | |
if links.key?(source_key.get(parent)) | |
parent.instance_variable_set(:"@#{relation.name}", | |
links[source_key.get(parent)]) | |
end | |
end | |
# and step into the next nesting level | |
scope = next_scope | |
end | |
self | |
end | |
end | |
DataMapper::Collection.send(:include, EagerLoading) |
source :rubygems | |
DM_VERSION = '~> 1.2.0.rc2' | |
gem 'dm-core', DM_VERSION | |
gem 'dm-constraints', DM_VERSION | |
gem 'dm-migrations', DM_VERSION | |
gem 'dm-sqlite-adapter', DM_VERSION |
#!/usr/bin/env sh | |
rm db.sqlite | |
# reformat SQL queries for readability | |
bundle exec ./dm_el.rb | perl -pe 's#SELECT .*? (FROM ".*?" )(.*)#\1SELECT ... \2#' |
In a perfect world, this would be totally transparent to the user (as it currently works for a single level), but I think there are some pretty tricky issues involved with doing it transparently. To be honest, I've never looked that closely at the eager loading code, so I may just be spreading my own ignorance (and fear) here :).
That said, arbitrary depth eager loading gets my vote as a feature to roll into dm-core. /cc @dkubb, @solnic—any thoughts about prospects for rolling a polished, tested implementation of this into dm-core?
In my head it wouldn't be too difficult to do this transparently. You'd create a thread-global stack, since you can only be iterating one collection at a time, right? So for each nested loop, the stack grows by 1 and as the loop ends and, either, another one begins, or the outer one ends, the stack continues to either grow or shrink. The stack just contains what I'm storing in the scope variable, so you know what model the parent is at any given point. Seems like it should work with any nesting level.
I'm clearly over-simplifying a complex situation, but, ya know, "in my head" it work like this :P
Here's a version that has been split into a class and a wrapper module, supports composite keys (I think), uses real collections exactly the same as if they were not eager-loaded and has been refactored into small methods (yet seems to feel more complex? o_O).
module DataMapper
class Collection
# EagerLoader takes a QueryPath object and loads all relationships referenced in the path, into an existing Collection.
#
# Using eager-loading allows you to optimize out the classic "n+1" query problem, when you intend to iterate over several
# arbitrarily deeply nested relationships. One query per relationship is executed, vs one query per record.
class EagerLoader
# Initialize the EagerLoader to pre-load all relationships as deep as +query_path+ into +collection+.
#
# @param [Collection] collection
# the source collection to load related resources into
#
# @param [QueryPath] query_path
# a valid QueryPath for the target model in the collection
def initialize(collection, query_path)
@collection = collection
@query_path = query_path
end
# Perform eager loading immediately and return the collection.
#
# The number of queries executed is identical to the number of relationships in the query path.
# This method should not be invoked before query-building is complete (e.g. before limits have been applied)
#
# @return [Collection]
# the source collection resources were loaded into
def load
scope = @collection
@query_path.relationships.each do |relation|
next_scope = relation.target_model.all(target_conditions(scope, relation))
load_into_collection(scope, next_scope, relation)
scope = next_scope
end
@collection
end
private
# Map target key names to source key values, to create valid query conditions for finding all related resources
def target_conditions(collection, relationship)
collection.inject({}) do |conditions, resource|
# FIXME: Reduce the complexity here
conditions.merge!(Hash[relationship.target_key.collect(&:name).zip(relationship.source_key.get(resource))]) do |key_name, v1, v2|
Array(v1) + Array(v2)
end
end
end
# Given a set of resources and the relationship from which they derive, map them, referenced by the target key
def key_mappings(collection, relationship)
collection.inject({}) do |map, resource|
map.merge(relationship.target_key.get(resource) => [resource]) do |key, resources1, resources2|
resources1 + resources2
end
end
end
# For each of the pre-loaded resources in +related_resources+, re-establish their relationships in +collection+
def load_into_collection(collection, related_resources, relationship)
map = key_mappings(related_resources, relationship)
collection.each do |resource|
if map.key?(relationship.source_key.get(resource))
resource.instance_variable_set(
:"@#{relationship.name}",
relationship.collection_for(resource).set(map[relationship.source_key.get(resource)])
)
end
end
end
end
module EagerLoading
def eager_load(query_path)
EagerLoader.new(self, query_path).load
end
end
include EagerLoading
end
end
It occurs to me that this is very focused on 1:n relationships. It will need changes to work for other types of relationship. I won't have time to look at it further tonight as there are some other things I want to get started, but hopefully tomorrow I can pick this back up and make sure it works with other types of relationships, plus get some spec coverage on it.
I agree, this does look a little more complicated than the last iteration, but I'll chalk that up to the collections and composite key support.
Did it not work to use a Relationship as a query predicate? (eg., instead of Hash[relationship.target_key.collect(&:name).zip(relationship.source_key.get(resource))]
, do relationship => resource
). There's definitely code in place to support that API (DataMapper::Query::Conditions::Comparison::RelationshipHandler#foreign_key_mapping
), but I'm not sure if it will work here.
One teeny thing: I think you can use Relationship#set
instead of directly setting the instance variable on resource in #load_into_collection
. Currently, it's:
resource.instance_variable_set(
:"@#{relationship.name}",
relationship.collection_for(resource).set(map[relationship.source_key.get(resource)])
)
But I think it can be (I also introduced a couple of explanatory temporary variables):
collection = relationship.collection_for(resource)
eager_loaded_resources = map[relationship.source_key.get(resource)]
collection.set(eager_loaded_resources)
relationship.set(resource, collection)
Re: relationship as a predicate, I tried something (I forget what) and while it didn't error, it didn't produce the correct results. I almost certainly did the wrong thing though. Are you able to check out the original gist, drop in my code then see if you get the correct output with your suggested simplification?
You should see the following under the "test case C" heading:
[INFO] test case C ===== /!\ n+1 hazard ====
~ (0.000043) SELECT "id", "name" FROM "people" ORDER BY "id"
~ (0.000055) SELECT "id", "name", "person_id" FROM "vehicles" WHERE "person_id" IN (1, 2) ORDER BY "id"
~ (0.000086) SELECT "id", "name", "manufacturer_id", "vehicle_id" FROM "components" WHERE "vehicle_id" IN (1, 2, 3) ORDER BY "id"
FND Taurus engine
FND Taurus radio
FND fixie frame
FND fixie breaks
tillsc Golf engine
I always get a bit lost poking around in dm-core, but I always learn something new. It would be good to clear that "FIXME" :)
Awesome advice in Relationship#set
! Will try that. Classic example of getting a bit lost :P (Excuse the fragmented response... I'm a bit distracted with food and another coding problem simultaneously ;))
Hmm, I was just looking through some of Relationship
, and I noticed #eager_load
, which looks like exactly the API for this situation. So if I'm reading it correctly, you could simplify the whole thing to:
def load(query_path)
query_path.relationships.inject(@collection) do |scope, relationship|
relationship.eager_load(scope)
end
end
(BTW, I've vacillated, and I now prefer passing the Query::Path
to #load
instead of #initialize
(reverting to how you had it before). It seems more coherent, even though EagerLoader is probably still a single-use object.)
It's too late here for me to start hacking on this right now, but I'm going to try to make time to poke at it tomorrow.
@d11wtq—looks like Relationship#eager_load
does work; check out my fork of this gist: https://gist.github.com/1297105
Hey guys, I know this is an old thread, but check this out: https://gist.github.com/3100034
Is it worth any of this being pulled back into dm-core, if it's solid enough after a bit of input from several devs? It's almost certainly worth making into a little gem if nothing else. It would need to behave the same as a non-eager-loaded collection first though (i.e. load
Collection.new(query, resources)
instead of just an Array).