Created
October 13, 2013 21:36
-
-
Save rob-mcgrail/6967687 to your computer and use it in GitHub Desktop.
Dumping a useful but ugly script for wrangling misbehaving MMS records.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require 'ostruct' | |
| require 'rubygems' | |
| require 'rest_client' | |
| require 'nokogiri' | |
| SOLR_ROOT = 'http://search.tki.org.nz:8983' | |
| FEDORA_ROOT = 'username:password@fedora-mms:8080/fedora' | |
| @xpaths = { | |
| :pid => '//str[@name="id"]', | |
| :title => '//arr[@name="title_t"]/str[1]', | |
| :url => '//arr[@name="url"]/*' | |
| } | |
| # search | |
| response = RestClient.get "#{SOLR_ROOT}/solr/select?q=Kawekawe&rows=42400" | |
| results = [] | |
| puts results.count | |
| # parse responses into ostructs | |
| Nokogiri::XML(response.body).xpath('//doc').each do |doc| | |
| doc = Nokogiri::XML.parse(doc.to_s) # confused... inefficient... | |
| item = OpenStruct.new | |
| @xpaths.each do |k,v| | |
| item.send("#{k}=", doc.xpath(v).text) | |
| end | |
| results << item | |
| end | |
| #delete if not a record, or doesn't match our desired url | |
| results.delete_if do |r| | |
| r.pid !~ /TKI39128/ | |
| end | |
| # for every id | |
| results.each do |r| | |
| puts "\nFinding #{r.pid} in Fedora..." | |
| # give it a rest... | |
| sleep 0.2 | |
| begin | |
| # get Lom record and parse | |
| response = RestClient.get "#{FEDORA_ROOT}/objects/MoE:#{r.pid}/datastreams/LOM/content" | |
| record = Nokogiri::XML(response.body) | |
| status = record.xpath('//ns:status/ns:value', 'ns' => 'http://ltsc.ieee.org/xsd/LOM').text | |
| #Fix for weird issue... | |
| contrib_entries = record.xpath('//ns:contribute/ns:entry', 'ns' => 'http://ltsc.ieee.org/xsd/LOM') | |
| contrib_entries.each_with_index do |entry, index| | |
| entry.remove if index.odd? | |
| end | |
| # reject record if it's live | |
| if status == 'Live' | |
| # puts r.url | |
| # puts "Status is #{status} - changing to Rejected..." | |
| # # Changing value to 'Rejected' | |
| # for statusnode in record.xpath('//ns:status/ns:value', 'ns' => 'http://ltsc.ieee.org/xsd/LOM') | |
| # statusnode.content = statusnode.content.gsub!('Live', 'Rejected') | |
| # end | |
| # # # Print out new status | |
| # puts record.xpath('//ns:status/ns:value', 'ns' => 'http://ltsc.ieee.org/xsd/LOM').text | |
| # # PUT request back to update fedora | |
| request = "#{FEDORA_ROOT}/objects/MoE:#{r.pid}/datastreams/LOM" | |
| RestClient.put request, record.to_s, :content_type => 'text/xml; charset=utf-8' | |
| else | |
| puts "Status was #{status} - Ignoring..." | |
| end | |
| rescue => e | |
| puts e.response | |
| end | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment