Created
February 23, 2019 22:12
-
-
Save jeremylenz/c26ca975fb9ed995faafc5405b240a72 to your computer and use it in GitHub Desktop.
Prevent duplicate record creation by using a hash
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def self.prevent_duplicates(objects_to_be_added, existing_vehicle_positions) | |
# Pass in a list of objects from which VehiclePositions will be created, and compare them to a list of existing VehiclePosition records. | |
# Return only the objects which would not be duplicates. | |
# Additionally, if duplicates are found within the existing VehiclePositions, delete them. | |
start_time = Time.current | |
logger.info "VehiclePosition prevent_duplicates starting..." | |
# Coming in, we have an array of hashes and an ActiveRecord::Relation. | |
# Combine both lists into one array of hashes, with the existing departures first. | |
# Use transform_keys on objects_to_be_added to ensure that all keys are strings and not symbols. | |
object_list = existing_vehicle_positions.map(&:attributes) + objects_to_be_added.map { |d| d.transform_keys { |k| k.to_s } } | |
# Create a tracking hash to remember which departures we've already seen | |
already_seen = {} | |
# Create a list of existing IDs to delete | |
ids_to_purge = [] | |
dup_count = 0 | |
# Move through the object list and check for duplicates | |
object_list.each do |dep| | |
tracking_key = "#{dep["timestamp"].to_i} #{dep["vehicle_ref"]} #{dep["stop_ref"]}" | |
if already_seen[tracking_key] | |
dup_count += 1 | |
# print "dups: #{dup_count} | already seen: #{tracking_key} \r" | |
ids_to_purge << dep["id"] unless dep["id"].nil? | |
else | |
# print "dups: #{dup_count} | new: #{tracking_key} \r" | |
already_seen[tracking_key] = dep | |
end | |
end | |
logger.info "#{dup_count} duplicates found" | |
puts | |
# Delete pre-existing duplicates | |
unless ids_to_purge.length == 0 | |
logger.info "prevent_duplicates: Deleting #{ids_to_purge.length} duplicate VehiclePositions" | |
self.delete(ids_to_purge) | |
end | |
# Assemble result | |
# Return the unique list of values, but only keep values having no ID. | |
# This ensures we don't try to re-create existing records. | |
result = already_seen.values.select { |dep| dep["id"].nil? } | |
# Log results | |
prevented_count = objects_to_be_added.length - result.length | |
unless prevented_count == 0 | |
logger.info "prevent_duplicates: Prevented #{prevented_count} duplicate VehiclePositions" | |
logger.info "prevent_duplicates: Filtered to #{result.length} unique objects" | |
end | |
logger.info "prevent_duplicates complete after #{(Time.current - start_time).round(2)} seconds" | |
result | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment