Last active
December 3, 2021 17:50
-
-
Save krsyoung/ee171d283628f7d8689ba2e7eb5064b1 to your computer and use it in GitHub Desktop.
Helper tasks for migrating Ruby on Rails audited gem from YAML to JSONB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'benchmark' | |
# This file includes several tasks to help with the management of the audited gem. The two main tasks are: | |
# | |
# yaml_to_jsonb - this will migrate yaml data stored in a column named audited_changes_yaml to jsonb data stored | |
# in a column named audited_changes. | |
# | |
# scrub - this will look at all of the columns that have been excluded from audited classes and ensure that | |
# they are all removed from the jsonb data. | |
# | |
# Here is a sample of the migration needed: | |
# # 20211126022727_change_audited_changes_from_yaml_to_json.rb | |
# # The general approach is: | |
# # 1. Quick migration, change the schema | |
# # 2. Slow task, migrate the old YAML column to the new JSONB column | |
# # 3. Quick migration, drop the old YAML column once the data migration is trusted | |
# # | |
# class ChangeAuditedChangesFromYamlToJson < ActiveRecord::Migration[6.1] | |
# def up | |
# # rename the current column | |
# rename_column :audits, :audited_changes, :audited_changes_yaml | |
# # add a new column to catch everything going forward, this will make it look like all | |
# # audits are empty ... so best to run the data migration soon after | |
# add_column :audits, :audited_changes, :jsonb, default: {} | |
# end | |
# def down | |
# remove_column :audits, :audited_changes | |
# rename_column :audits, :audited_changes_yaml, :audited_changes | |
# end | |
# end | |
# prepare a string for a SQL statement (double up single quotes) | |
def escape_sql(string) | |
return string.gsub("'","''") | |
end | |
# converts a yaml column (audited_changes_yaml) to jsonb (audited_changes) | |
def convert_yaml_to_json(r) | |
# parse the yaml text into an object | |
parsed_yaml = YAML.safe_load(r['audited_changes_yaml'], permitted_classes: [Date, Time, ActiveSupport::HashWithIndifferentAccess, ActiveSupport::TimeZone, ActiveSupport::TimeWithZone, BigDecimal], aliases: true) | |
# dump the object to json using Oj (faster!?) | |
json_string = Oj.dump(parsed_yaml, mode: :compat) | |
# avoid any conflicts with sql (must escape single quotes) | |
escaped_json = escape_sql(json_string) | |
# Run a straight SQL update to avoid instantiating the model | |
ActiveRecord::Base.connection.execute( | |
""" | |
UPDATE audits | |
SET audited_changes = '#{escaped_json}'::jsonb | |
WHERE id = #{r['id']} | |
""" | |
) | |
end | |
namespace :audited do | |
desc "Transform Audited YAML to JSONB" | |
task yaml_to_jsonb: :environment do | |
Rails.logger = Logger.new(STDOUT) | |
# number of records to execute in a batch, locally 100,000 takes about 7 minutes | |
BATCH_SIZE = 100000 | |
Rails.logger.info "About to process #{BATCH_SIZE} records" | |
# track the conversion and insertion time of all records | |
time = Benchmark.realtime { | |
ActiveRecord::Base.connection.execute('SELECT id, audited_changes_yaml FROM audits WHERE audited_changes = \'{}\' LIMIT ' + BATCH_SIZE.to_s).map do |r| | |
convert_yaml_to_json(r) | |
end | |
} | |
# track how long the overall batch took to complete | |
Rails.logger.info "Completed in: #{time/60}m" | |
# count the number of records remaining to process | |
count = Audited::Audit.where(audited_changes: {}).count | |
Rails.logger.info "Records remaining: #{count}" | |
# calculate an estimate of the remaining time based on the current processing time | |
remaining_time = (time / BATCH_SIZE * count) / 60 | |
Rails.logger.info "Approx remaining time: #{remaining_time}" | |
end | |
desc "Remove sensitive data from JSONB audited fields based on non-audited fields defined" | |
task scrub: :environment do | |
Rails.logger = Logger.new(STDOUT) | |
# max number of records to process | |
BATCH_SIZE = 1000 | |
Rails.logger.info "About to process #{BATCH_SIZE} records" | |
# get a list of audit records we're looking at based on all existing audited records | |
auditable_types = Audited::Audit.group(:auditable_type).count | |
# track the overall runtime | |
time = Benchmark.realtime { | |
# loop over the different audited classes | |
auditable_types.each do |key, value| | |
Rails.logger.info " - processing #{key}" | |
# if a class no longer exists, it either means we renamed it or it was removed. | |
begin | |
non_audited_columns = key.constantize.non_audited_columns | |
rescue => e | |
# could possibly just delete all of the records here | |
next | |
end | |
non_audited_columns.each do |column| | |
Rails.logger.info " - hunting for #{column}" | |
# confirmation_token | |
ActiveRecord::Base.connection.execute('SELECT id, audited_changes FROM audits WHERE auditable_type = \'' + key + '\' AND audited_changes -> \'' + column + '\' is not null LIMIT ' + BATCH_SIZE.to_s).map do |r| | |
object = Oj.load(r['audited_changes']) | |
# again try Oj to speed things up as much as possible | |
json_string = Oj.dump(object.except(*non_audited_columns), mode: :compat) | |
# escape for sql | |
escaped_json = escape_sql(json_string) | |
# update | |
ActiveRecord::Base.connection.execute( | |
""" | |
UPDATE audits | |
SET audited_changes = '#{escaped_json}'::jsonb | |
WHERE id = #{r['id']} | |
""" | |
) | |
end | |
end | |
end | |
} | |
Rails.logger.info "Completed in: #{time/60}m" | |
count = Audited::Audit.where("audited_changes -> 'encrypted_password' is not null").count | |
Rails.logger.info "Records remaining: #{count}" | |
remaining_time = (time / BATCH_SIZE * count) / 60 | |
Rails.logger.info "Approx remaining time: #{remaining_time}" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment