Created
May 23, 2016 07:53
-
-
Save eliank/f6b30f7ad20567573f86d33738f99ac7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module ActiveRecord | |
module Batches | |
def find_each_with_order(options = {}) | |
if block_given? | |
find_in_batches_with_order(options) do |records| | |
records.each { |record| yield record } | |
end | |
else | |
enum_for :find_each, options do | |
options[:start] ? where(table[primary_key].gteq(options[:start])).size : size | |
end | |
end | |
end | |
# Sometimes the query-planner avoids using indexes in favor of sequential scans. | |
# When a LIMIT clause is present the planner assumes a distribution of rows matching | |
# a condition that doesn't correspond with the actual contents of the table. | |
# In some cases the assumption about the distribution of values differ in such a way | |
# that the sequential scan ends op iterating over millions of rows lasting minutes. | |
# Whilst an index scan would've returned a result in mere seconds. | |
# Specifying an additional column for ordering forces the planner to consider using an index | |
# that contains the specific column. Even though the primary_key ASC order is unique | |
# and the additional order column won't be used for sorting at all. | |
# | |
# The code below is nearly identical to: | |
# https://github.com/rails/rails/blob/2a7cf24cb7aab28f483a6772b608e2868a9030ba/activerecord/lib/active_record/relation/batches.rb#L98 | |
# Its changed in such a way to allow for an additional_order_column. The column to specify is the first column of the desired index. | |
def find_in_batches_with_order(options = {}) | |
options.assert_valid_keys(:start, :batch_size, :additional_order_column) | |
relation = self | |
start = options[:start] | |
batch_size = options[:batch_size] || 1000 | |
additional_order_column = options[:additional_order_column] | |
unless block_given? | |
return to_enum(:find_in_batches, options) do | |
total = start ? where(table[primary_key].gteq(start)).size : size | |
(total - 1).div(batch_size) + 1 | |
end | |
end | |
order_for_batch = if additional_order_column.present? | |
batch_with_additional_order(additional_order_column) | |
else | |
batch_order | |
end | |
relation = relation.reorder(order_for_batch).limit(batch_size) | |
records = start ? relation.where(table[primary_key].gteq(start)).to_a : relation.to_a | |
while records.any? | |
records_size = records.size | |
primary_key_offset = records.last.id | |
raise "Primary key not included in the custom select clause" unless primary_key_offset | |
yield records | |
break if records_size < batch_size | |
records = relation.where(table[primary_key].gt(primary_key_offset)).to_a | |
end | |
end | |
private | |
def batch_with_additional_order(additional_order_column) | |
quoted_additional_column_name = connection.quote_column_name(additional_order_column) | |
"#{quoted_table_name}.#{quoted_primary_key} ASC, #{quoted_table_name}.#{quoted_additional_column_name}" | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment