Skip to content

Instantly share code, notes, and snippets.

@kardeiz
Created August 30, 2012 21:46
Show Gist options
  • Save kardeiz/3542022 to your computer and use it in GitHub Desktop.
Save kardeiz/3542022 to your computer and use it in GitHub Desktop.
Ugly code to get rough CSV into wrapped EAD XML chunks
#!/usr/bin/env ruby
# encoding: utf-8
require 'csv'
require 'pp'
require 'active_support/core_ext'
require 'nokogiri'
f = nil; File.open('/home/jhbrown/Dropbox/kaye.csv') do |file|
f = CSV.parse(file.read.encode('UTF-8', :invalid => :replace))
# , :headers => true, :header_converters => :symbol)
end
f.each_with_index do |line,index|
line.each_with_index.map do |line_el, line_el_index|
if line_el.blank? && index != 1
f[index][line_el_index] = f[index - 1][line_el_index]
end
end
line[(0..3)].map(&:to_s).join.match(/\((?<folders>\d+)[^\)]*?\)/).tap do |o|
if o.nil?
f[index] << ""
else
f[index] << "#{o[:folders]} folder" if o[:folders] == "1"
f[index] << "#{o[:folders]} folders" if o[:folders] != "1"
end
end
end
csv_string = CSV.generate(:encoding => 'UTF-8') do |csv|
f.each do |v|
csv << v
end
end
puts csv_string
# f = CSV.read('/home/jhbrown/projects/kaye_parsed.txt')
f = CSV.parse(csv_string)
f.map!{|x| x.map{|y| y.nil? ? y : y.strip } }
f.map!{|x| x.map{|y| y == "none" ? "" : y } }
xdoc = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
xml.root {
f.each_with_index do |line,index|
unless line[2].nil? || index == 0
xml.c02(:level => 'item') {
xml.did {
unless line[0..1].reject(&:blank?).empty?
xml.scopecontent(:encodinganalog => "subject") {
xml.list {
xml.item {
xml.text line[0..1].reject(&:blank?).join("--")
}
}
}
end
xml.unitdate(:encodinganalog => "date") {
xml.text line[2]
}
xml.unittitle(:encodinganalog => "title") {
xml.text(line[3].blank? ? "General" : line[3])
}
xml.container(:type => "Box") {
xml.text line[4]
}
unless line[5].blank?
xml.physdesc(:encodinganalog => "format") {
xml.text line[5]
}
end
}
}
end
end
}
end
puts xdoc.to_xml
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment