Last active
April 25, 2016 12:52
-
-
Save MatMoore/e07fee7f34a41ed21708d07a78fe25ed to your computer and use it in GitHub Desktop.
Verifier script WIP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'gds_api/rummager' | |
require 'set' | |
desc "Sanitize access limited data" | |
task sanitize_data: :environment do | |
Tasks::DataSanitizer.delete_access_limited(STDOUT) | |
end | |
task compare_policies_to_rummager: :environment do | |
policy_query = Queries::GetContentCollection.new( | |
document_type: 'policy', | |
fields: %w(content_id base_path), | |
pagination: NullPagination.new | |
) | |
rummager = GdsApi::Rummager.new(Plek.find('rummager')) | |
policy_query.call.each do |policy| | |
links = Queries::GetLinkSet.call(policy['content_id']) | |
orgs = (links[:links][:organisations] || []).to_set | |
people = (links[:links][:people] || []).to_set | |
wg = (links[:links][:working_groups] || []).to_set | |
begin | |
doc = rummager.get_content!(policy['base_path']) | |
rescue | |
puts | |
puts "Doc not in rummager: #{policy['content_id']}: #{policy['base_path']}" | |
next | |
end | |
rummager_orgs = (doc['raw_source']['organisations'] || []).to_set | |
rummager_wg = (doc['raw_source']['policy_groups'] || []).to_set | |
rummager_people = (doc['raw_source']['people'] || []).to_set | |
rummager_people.map! do |person| | |
ContentItemFilter | |
.filter(state: "published", base_path:"/government/people/#{person}") | |
.pluck(:content_id) | |
.first | |
end | |
rummager_orgs.map! do |org| | |
ContentItemFilter | |
.filter(state: "published", base_path: "/government/organisations/#{org}") | |
.pluck(:content_id) | |
.first | |
end | |
rummager_wg.map! do |wg| | |
ContentItemFilter | |
.filter(state: "published", base_path: "/government/groups/#{wg}") | |
.pluck(:content_id) | |
.first | |
end | |
if orgs == rummager_orgs | |
print '.' | |
else | |
puts | |
puts "Orgs in rummager but not in publishing api: #{policy['content_id']}: #{policy['base_path']}" | |
puts (rummager_orgs - orgs).inspect | |
puts "Orgs in publishing api but not in rummager: #{policy['content_id']}: #{policy['base_path']}" | |
puts (orgs - rummager_orgs).inspect | |
end | |
if wg == rummager_wg | |
print '.' | |
else | |
puts | |
puts "WG in rummager but not in publishing api: #{policy['content_id']}: #{policy['base_path']}" | |
puts (rummager_wg - wg).inspect | |
puts "WG in publishing api but not in rummager: #{policy['content_id']}: #{policy['base_path']}" | |
puts (wg - rummager_wg).inspect | |
end | |
if people == rummager_people | |
print '.' | |
else | |
puts | |
puts "People in rummager but not in publishing api: #{policy['content_id']}: #{policy['base_path']}" | |
puts (rummager_people - people).inspect | |
puts "People in publishing api but not in rummager: #{policy['content_id']}: #{policy['base_path']}" | |
puts (people - rummager_people).inspect | |
end | |
end | |
end | |
task :restore_policy_links, [:action] => [:environment] do |_, args| | |
dry_run = args["action"] != 'apply' | |
puts "Dry run" if dry_run | |
policy_query = Queries::GetContentCollection.new( | |
document_type: 'policy', | |
fields: %w(content_id), | |
pagination: NullPagination.new | |
) | |
policy_content_ids = policy_query.call.map { |item| item["content_id"] } | |
# Identify the destructive events | |
policy_wipe_events = Event | |
.where("created_at::date = '2016-03-14' and action = 'PatchLinkSet'") | |
.where(content_id: policy_content_ids) | |
.order("content_id, created_at asc") | |
wipe_events_by_content_id = Hash.new { |h, k| h[k] = [] } | |
puts "Found #{policy_wipe_events.size} destructive events" | |
policy_wipe_events.each do |event| | |
puts "#{event.content_id} #{event.action} #{event.created_at} #{event.payload[:links]}" | |
wipe_events_by_content_id[event.content_id] << event.id | |
end | |
puts | |
# Restore the links | |
policy_wipe_events.each do |wipe_event| | |
puts "Restoring #{wipe_event.content_id}..." | |
policy_events = Event.where( | |
content_id: wipe_event.content_id, | |
action: %w(PutContentWithLinks PatchLinkSet) | |
).order("created_at desc") | |
policy_log = [] | |
policy_events.each do |event| | |
policy_log << event | |
break if event.action == 'PutContentWithLinks' | |
end | |
raise "Event log error: no events found for #{wipe_event.content_id}" if policy_log.empty? | |
if policy_log.last.action != 'PutContentWithLinks' | |
# Policy created since Publishing API V2 | |
puts "New policy #{wipe_event.content_id}" | |
end | |
policy_log.reverse_each do |event| | |
unless wipe_events_by_content_id[event.content_id].include?(event.id) | |
puts "Reapplying #{event.created_at} #{event.action}: #{event.payload[:links]}" | |
Commands::V2::PatchLinkSet.call(event.payload) unless dry_run | |
end | |
end | |
puts | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment