Skip to content

Instantly share code, notes, and snippets.

@RISCfuture
Created May 1, 2009 23:30
Show Gist options
  • Save RISCfuture/105318 to your computer and use it in GitHub Desktop.
Save RISCfuture/105318 to your computer and use it in GitHub Desktop.
Reimplementation of find_in_batches that fixes bugs and works with composite_primary_keys
module ActiveRecord::Batches
def find_in_batches(options = {})
relation = self
unless arel.orders.blank? && arel.taken.blank?
ActiveRecord::Base.logger.warn("Scoped order and limit are ignored, it's forced to be batch order and batch size")
end
if (finder_options = options.except(:start, :batch_size)).present?
raise "You can't specify an order, it's forced to be #{batch_order}" if options[:order].present?
raise "You can't specify a limit, it's forced to be the batch_size" if options[:limit].present?
relation = apply_finder_options(finder_options)
end
start = options.delete(:start)
start ||= [1]*primary_key.size
start = [start] unless start.kind_of?(Enumerable)
start[-1] = start.last - 1
batch_size = options.delete(:batch_size) || 1000
relation = relation.reorder(batch_order).limit(batch_size)
pkey = Array.wrap(primary_key)
id_constraints, key_vals = build_id_constraints(pkey.dup, start.dup)
records = relation.where(id_constraints, *key_vals).all
while records.any?
records_size = records.size
yield records
break if records_size < batch_size
unless pkey.all? { |key| records.last.send(key) }
raise "Primary key not included in the custom select clause"
end
pkey.each_with_index { |key, index| start[index] = records.last.send(key) } if records.any?
id_constraints, key_vals = build_id_constraints(pkey.dup, start.dup)
records = relation.where(id_constraints, *key_vals).all
end
end
private
def batch_order
Array.wrap(primary_key).map { |pk| "#{quoted_table_name}.#{connection.quote_column_name pk} ASC" }.join(', ')
end
private
def build_id_constraints(keys, values)
return "#{quoted_table_name}.#{connection.quote_column_name keys.first.to_s} > ?", [values.first] if keys.size == 1
key = keys.pop
value = values.pop
query = keys.reverse.map { |subkey| "#{quoted_table_name}.#{connection.quote_column_name subkey.to_s} = ?" }.join(' AND ')
subquery, subvalues = build_id_constraints(keys.dup, values.dup)
return "(#{quoted_table_name}.#{connection.quote_column_name key.to_s} > ? AND #{query}) OR (#{subquery})", [value, *(values.reverse + subvalues)]
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment