Created
October 28, 2011 22:58
-
-
Save tenderlove/1323792 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### | |
# Read in XML as a stream, write out JSON as a stream. As little information | |
# is kept in memory as possible. | |
require 'nokogiri' | |
require 'psych' | |
class JSONTranslator < Nokogiri::XML::SAX::Document | |
attr_reader :emitter | |
def initialize emitter | |
@emitter = emitter | |
super() | |
end | |
def start_document | |
emitter.start_stream Psych::Nodes::Stream::UTF8 | |
emitter.start_document [], [], true | |
end | |
def end_document | |
emitter.end_document | |
emitter.end_stream | |
end | |
def start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = [] | |
start_map | |
scalar name | |
start_map | |
unless attrs.empty? | |
scalar 'attributes' | |
mapping do | |
attrs.each { |attr| | |
scalar attr.localname | |
scalar attr.value | |
} | |
end | |
end | |
scalar 'children' | |
start_sequence | |
end | |
def characters string | |
scalar string | |
end | |
def end_element_namespace name, prefix = nil, uri = nil | |
end_sequence | |
end_map | |
end_map | |
end | |
private | |
def scalar string | |
emitter.scalar string, nil, nil, false, true, Psych::Nodes::Scalar::PLAIN | |
end | |
def start_map | |
emitter.start_mapping nil, nil, false, 1 | |
end | |
def end_map | |
emitter.end_mapping | |
end | |
def start_sequence | |
emitter.start_sequence nil, nil, false, 1 | |
end | |
def end_sequence | |
emitter.end_sequence | |
end | |
def mapping | |
start_map | |
yield | |
end_map | |
end | |
end | |
### | |
# +parser+ calls XML events on the +translator+. +translator+ calls JSON events | |
# on the +emitter+/ | |
emitter = Psych::JSON::Stream::Emitter.new $stdout | |
translator = JSONTranslator.new emitter | |
parser = Nokogiri::XML::SAX::Parser.new translator | |
File.open ARGV[0], 'rb' do |f| | |
parser.parse f | |
end | |
__END__ | |
<one two="three"> | |
four | |
<ul> | |
<li> | |
hi | |
<div>lol</div> | |
mom! | |
</li> | |
<li>hello world</li> | |
</ul> | |
</one> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment