Created
August 15, 2012 21:06
-
-
Save kardeiz/3363686 to your computer and use it in GitHub Desktop.
UMI ETD XML to MARCXML with Ruby
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'nokogiri' | |
# where the files at | |
my_files = Dir.chdir(ARGV[0]) { Dir.glob("./*").map{|x| File.expand_path(x) } } | |
builder = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml| | |
xml.collection(:xmlns => 'http://www.loc.gov/MARC21/slim') { | |
my_files.each do |my_file| | |
my_file = Nokogiri::XML(File.open(my_file)) | |
author = my_file.at_xpath("//DISS_author[@type='primary']/DISS_name") | |
title = my_file.at_xpath("//DISS_description/DISS_title") | |
xml.record { | |
xml.datafield(:tag => "100", :ind1 => "1", :ind2 => " ") { | |
xml.subfield(:code => "a") { | |
xml.text "#{author.at_xpath('./DISS_surname').text}, #{author.at_xpath('./DISS_fname').text}" | |
xml.text " #{author.at_xpath('./DISS_middle').text}" if author.at_xpath('./DISS_middle') | |
xml.text author.at_xpath('./DISS_suffix').inner_text.empty? ? "." : "," | |
} | |
unless author.at_xpath('./DISS_suffix').inner_text.empty? | |
xml.subfield(:code => "c") { | |
xml.text author.at_xpath('./DISS_suffix').text | |
} | |
end | |
} | |
xml.datafield(:tag => "245", :ind1 => "1", :ind2 => "0") { | |
xml.subfield(:code => "a") { | |
xml.text title.text.split(':',2).first.strip | |
} | |
if title.text.include?(':') | |
xml.subfield(:code => "b") { | |
xml.text title.text.split(':',2).last.strip | |
} | |
end | |
xml.subfield(:code => "c") { | |
xml.text "by #{author.at_xpath('./DISS_fname').text}" | |
xml.text " #{author.at_xpath('./DISS_middle').text}" | |
xml.text " #{author.at_xpath('./DISS_surname').text}" | |
unless author.at_xpath('./DISS_suffix').inner_text.empty? | |
xml.text ", #{author.at_xpath('./DISS_suffix').text}" | |
end | |
} | |
xml.subfield(:code => "h") { | |
xml.text "[electronic resource]" | |
} | |
} | |
if title.text.include?(':') | |
xml.datafield(:tag => "246", :ind1 => "3", :ind2 => "0") { | |
xml.subfield(:code => "a") { | |
title_temp = title.text.split(':',2).last.strip | |
title_temp[0] = title_temp[0].capitalize | |
xml.text title_temp | |
} | |
} | |
end | |
xml.datafield(:tag => "260") { | |
xml.subfield(:code => "a") { | |
xml.text "[Fort Worth, Tex.]" | |
} | |
xml.subfield(:code => "b") { | |
xml.text my_file.at_xpath('//DISS_institution/DISS_inst_name').text | |
} | |
xml.subfield(:code => "c") { | |
xml.text my_file.at_xpath('//DISS_dates/DISS_comp_date').text | |
} | |
} | |
xml.datafield(:tag => "490", :ind1 => "0") { | |
xml.subfield(:code => "a") { | |
xml.text my_file.at_xpath('//DISS_dates/DISS_comp_date').text | |
xml.text " dissertation" | |
} | |
} | |
xml.datafield(:tag => "500") { | |
xml.subfield(:code => "a") { | |
xml.text "Title from dissertation title page" | |
} | |
} | |
unless my_file.at_xpath('//DISS_abstract/DISS_para').text.empty? | |
xml.datafield(:tag => "500") { | |
xml.subfield(:code => "a") { | |
xml.text "Includes abstract" | |
} | |
} | |
end | |
xml.datafield(:tag => "500") { | |
xml.subfield(:code => "a") { | |
xml.text my_file.at_xpath('//DISS_institution/DISS_inst_contact').text | |
xml.text "; advisor, #{my_file.at_xpath('//DISS_advisor/DISS_name/DISS_fname').text}" | |
xml.text " #{my_file.at_xpath('//DISS_advisor/DISS_name/DISS_surname').text}" | |
} | |
} | |
my_file.xpath('//DISS_abstract/DISS_para').each do |para_txt| | |
xml.datafield(:tag => "520") { | |
xml.subfield(:code => "a") { | |
xml.text para_txt.text | |
} | |
} | |
end | |
xml.datafield(:tag => "538") { | |
xml.subfield(:code => "a") { | |
xml.text "Mode of access: World Wide Web" | |
} | |
} | |
if my_file.at_xpath('//DISS_binary/@type').text == 'PDF' | |
xml.datafield(:tag => "538") { | |
xml.subfield(:code => "a") { | |
xml.text "System requirements: Adobe Acrobat reader" | |
} | |
} | |
end | |
} | |
end | |
} | |
end | |
puts builder.to_xml |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment