Created
September 16, 2011 14:44
-
-
Save agmcleod/1222265 to your computer and use it in GitHub Desktop.
Parses an xml file with the given Object node name
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# If you have an xml representation of a data entity, and need to convert to CSV, this script works pretty well. | |
# Specify the file name of an xml file, and the node name of the object. So if you have an XML file of <Person> objects that has all the fields in child elements. | |
# Specify the Person node, and it will iterate through to create a csv. Each person will be a separate row. | |
require 'rubygems' | |
require 'nokogiri' | |
puts 'type xml file name' | |
xml_name = gets.chomp | |
f = File.open(xml_name) | |
doc = Nokogiri::XML(f) | |
f.close | |
def enclose(str) | |
if str.index(',') | |
return "\"#{str}\"" | |
else | |
return str | |
end | |
end | |
def enclose_and_strip(str) | |
return enclose(strip(str)) | |
end | |
def strip(str) | |
return str.gsub(/\n|\t/, '').strip | |
end | |
def populate_missing(headers, location) | |
h = [] | |
location.children.each do |c| | |
h << c.name if c.name != "text" | |
end | |
h.sort | |
if h != headers | |
h = headers - h | |
h.each { |mh| location.add_child("<#{mh} />") } | |
end | |
return location | |
end | |
headers = [] | |
rows = [] | |
puts "Enter in node name to traverse. If it's an xml table of <Person> objects, type \"Person\" (without quotes)" | |
node_to_traverse = gets.chomp | |
doc.css(node_to_traverse).each do |loc| | |
loc.children.each do |field| | |
headers << field.name if field.name != "text" && !headers.include?(field.name) | |
end | |
end | |
count = 0 | |
doc.css(node_to_traverse).each do |loc| | |
fields = Array.new(headers.size) | |
loc = populate_missing(headers, loc) | |
loc.children.each do |field| | |
idx = headers.index(field.name) | |
if field.class == Nokogiri::XML::Element | |
f = [] | |
field.children.each do |c| | |
text = strip(c.text) | |
f << text unless text.empty? | |
end | |
fields[idx] = "#{enclose(f.join('|'))}" | |
else | |
f = enclose_and_strip(field.text) | |
fields[idx] = "#{f}" unless f.empty? | |
end | |
end | |
count += 1 | |
rows << fields.join(',') | |
end | |
File.open("#{xml_name}.csv", 'w+') do |file| | |
file.write(headers.join(',') + "\n") | |
rows.each do |r| | |
file.write("#{r}\n") | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment