Created
November 11, 2010 21:02
-
-
Save robheittman/673172 to your computer and use it in GitHub Desktop.
Copies assessments from an old working set to a new working set
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'find' | |
require 'rainbow' | |
require 'nokogiri' | |
require 'fileutils' | |
def lsl (dir) | |
Find.find dir do |path| | |
if File.directory?(path) | |
next | |
else | |
yield path | |
end | |
end | |
end | |
# Not needed any more? | |
def node_scan (path, mapping) | |
# Act only on nodes | |
if path =~ /nodes.*\.xml/ | |
id = File.basename(path).chomp(File.extname(path)) | |
# read the XML | |
doc = Nokogiri::XML(File.open(path)) | |
species = doc.css("species") | |
if !species.empty? then | |
specie = species.first | |
name = specie[:name] | |
parent = specie[:parent] | |
level = specie[:level] | |
footprint = specie.css("footprint").inner_text() | |
mapping["#{parent} #{name}"] = id; | |
end | |
end | |
end | |
def draft_scan (path, mapping) | |
if path =~ /drafts.*\.xml/ | |
id = File.basename(path).chomp(File.extname(path)) | |
doc = Nokogiri::XML(File.open(path)) | |
mapping[doc.css("speciesName").inner_text] = path; | |
end | |
end | |
#FIXME load this from command line please | |
left = "madrep_july10" | |
right = "madrep_fixed" | |
#TODO extract zips | |
left_drafts = Hash.new | |
right_drafts = Hash.new | |
puts "Loading left (bad taxa) data set" | |
lsl(left) {|path| draft_scan path, left_drafts} | |
puts "Loading right (good taxa) data set" | |
lsl(right) {|path| draft_scan path, right_drafts} | |
count = 0 | |
puts "Remapping drafts from left to right:" | |
left_drafts.each do |taxon,from_draft| | |
if right_drafts.has_key?(taxon) then | |
doc = Nokogiri::XML(File.open(from_draft)) | |
to_draft = right_drafts[taxon] | |
to_draft_doc = Nokogiri::XML(File.open(to_draft)) | |
remap_asm = to_draft_doc.css("assessmentID").first.content | |
remap_species = to_draft_doc.css("speciesID").first.content | |
# copy new species and assessment IDs | |
doc.css("speciesID").first.content = remap_species | |
doc.css("assessmentID").first.content = remap_asm | |
doc.css("assessment").first["id"] = remap_asm | |
# copy region information from original | |
doc.css('field[id="RegionInformation"]').first.replace( | |
to_draft_doc.css('field[id="RegionInformation"]').first.to_xml | |
) | |
File.open(to_draft, 'w') {|f| f.write(doc.to_xml) } | |
count = count + 1 | |
else | |
puts "No taxon in right for #{taxon}, will not remap it".color(:red) | |
end | |
end | |
puts "Remapped #{count} drafts" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This shouldn't be necessary any more, because I think I taught the software how to deal with a normal zip, but here it is anyway: | |
find . -type f -print | zip ../madrep-remapped3 -@ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment