Skip to content

Instantly share code, notes, and snippets.

@MatMoore
Last active April 25, 2016 12:52
Show Gist options
  • Save MatMoore/e07fee7f34a41ed21708d07a78fe25ed to your computer and use it in GitHub Desktop.
Save MatMoore/e07fee7f34a41ed21708d07a78fe25ed to your computer and use it in GitHub Desktop.
Verifier script WIP
require 'gds_api/rummager'
require 'set'
desc "Sanitize access limited data"
task sanitize_data: :environment do
Tasks::DataSanitizer.delete_access_limited(STDOUT)
end
task compare_policies_to_rummager: :environment do
policy_query = Queries::GetContentCollection.new(
document_type: 'policy',
fields: %w(content_id base_path),
pagination: NullPagination.new
)
rummager = GdsApi::Rummager.new(Plek.find('rummager'))
policy_query.call.each do |policy|
links = Queries::GetLinkSet.call(policy['content_id'])
orgs = (links[:links][:organisations] || []).to_set
people = (links[:links][:people] || []).to_set
wg = (links[:links][:working_groups] || []).to_set
begin
doc = rummager.get_content!(policy['base_path'])
rescue
puts
puts "Doc not in rummager: #{policy['content_id']}: #{policy['base_path']}"
next
end
rummager_orgs = (doc['raw_source']['organisations'] || []).to_set
rummager_wg = (doc['raw_source']['policy_groups'] || []).to_set
rummager_people = (doc['raw_source']['people'] || []).to_set
rummager_people.map! do |person|
ContentItemFilter
.filter(state: "published", base_path:"/government/people/#{person}")
.pluck(:content_id)
.first
end
rummager_orgs.map! do |org|
ContentItemFilter
.filter(state: "published", base_path: "/government/organisations/#{org}")
.pluck(:content_id)
.first
end
rummager_wg.map! do |wg|
ContentItemFilter
.filter(state: "published", base_path: "/government/groups/#{wg}")
.pluck(:content_id)
.first
end
if orgs == rummager_orgs
print '.'
else
puts
puts "Orgs in rummager but not in publishing api: #{policy['content_id']}: #{policy['base_path']}"
puts (rummager_orgs - orgs).inspect
puts "Orgs in publishing api but not in rummager: #{policy['content_id']}: #{policy['base_path']}"
puts (orgs - rummager_orgs).inspect
end
if wg == rummager_wg
print '.'
else
puts
puts "WG in rummager but not in publishing api: #{policy['content_id']}: #{policy['base_path']}"
puts (rummager_wg - wg).inspect
puts "WG in publishing api but not in rummager: #{policy['content_id']}: #{policy['base_path']}"
puts (wg - rummager_wg).inspect
end
if people == rummager_people
print '.'
else
puts
puts "People in rummager but not in publishing api: #{policy['content_id']}: #{policy['base_path']}"
puts (rummager_people - people).inspect
puts "People in publishing api but not in rummager: #{policy['content_id']}: #{policy['base_path']}"
puts (people - rummager_people).inspect
end
end
end
task :restore_policy_links, [:action] => [:environment] do |_, args|
dry_run = args["action"] != 'apply'
puts "Dry run" if dry_run
policy_query = Queries::GetContentCollection.new(
document_type: 'policy',
fields: %w(content_id),
pagination: NullPagination.new
)
policy_content_ids = policy_query.call.map { |item| item["content_id"] }
# Identify the destructive events
policy_wipe_events = Event
.where("created_at::date = '2016-03-14' and action = 'PatchLinkSet'")
.where(content_id: policy_content_ids)
.order("content_id, created_at asc")
wipe_events_by_content_id = Hash.new { |h, k| h[k] = [] }
puts "Found #{policy_wipe_events.size} destructive events"
policy_wipe_events.each do |event|
puts "#{event.content_id} #{event.action} #{event.created_at} #{event.payload[:links]}"
wipe_events_by_content_id[event.content_id] << event.id
end
puts
# Restore the links
policy_wipe_events.each do |wipe_event|
puts "Restoring #{wipe_event.content_id}..."
policy_events = Event.where(
content_id: wipe_event.content_id,
action: %w(PutContentWithLinks PatchLinkSet)
).order("created_at desc")
policy_log = []
policy_events.each do |event|
policy_log << event
break if event.action == 'PutContentWithLinks'
end
raise "Event log error: no events found for #{wipe_event.content_id}" if policy_log.empty?
if policy_log.last.action != 'PutContentWithLinks'
# Policy created since Publishing API V2
puts "New policy #{wipe_event.content_id}"
end
policy_log.reverse_each do |event|
unless wipe_events_by_content_id[event.content_id].include?(event.id)
puts "Reapplying #{event.created_at} #{event.action}: #{event.payload[:links]}"
Commands::V2::PatchLinkSet.call(event.payload) unless dry_run
end
end
puts
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment