|
require 'rexml/document' |
|
|
|
module XML |
|
|
|
def self.parse_xml(file) |
|
doc = REXML::Document.new |
|
LineNoTreeParser.new(file, doc).parse |
|
doc |
|
end |
|
|
|
module LineNo |
|
attr_accessor :start_line, :end_line |
|
end |
|
|
|
class LineNoTreeParser < REXML::Parsers::TreeParser |
|
def parse |
|
tag_stack = [] |
|
in_doctype = false |
|
entities = nil |
|
begin |
|
while true |
|
# BEGIN MONKEY PATCH LineNo |
|
line = @parser.source.current_line |
|
# END MONKEY PATCH LineNo |
|
|
|
event = @parser.pull |
|
#STDERR.puts "TREEPARSER GOT #{event.inspect}" |
|
case event[0] |
|
when :end_document |
|
unless tag_stack.empty? |
|
#raise ParseException.new("No close tag for #{tag_stack.inspect}") |
|
raise REXML::ParseException.new("No close tag for #{@build_context.xpath}") |
|
end |
|
return |
|
when :start_element |
|
tag_stack.push(event[1]) |
|
el = @build_context = @build_context.add_element( event[1] ) |
|
|
|
event[2].each do |key, value| |
|
el.attributes[key]=REXML::Attribute.new(key,value,self) |
|
end |
|
when :end_element |
|
tag_stack.pop |
|
@build_context = @build_context.parent |
|
when :text |
|
if not in_doctype |
|
if @build_context[-1].instance_of? REXML::Text |
|
@build_context[-1] << event[1] |
|
else |
|
@build_context.add( |
|
REXML::Text.new(event[1], @build_context.whitespace, nil, true) |
|
) unless ( |
|
@build_context.ignore_whitespace_nodes and |
|
event[1].strip.size==0 |
|
) |
|
end |
|
end |
|
when :comment |
|
c = REXML::Comment.new( event[1] ) |
|
@build_context.add( c ) |
|
when :cdata |
|
c = REXML::CData.new( event[1] ) |
|
@build_context.add( c ) |
|
when :processing_instruction |
|
@build_context.add( REXML::Instruction.new( event[1], event[2] ) ) |
|
when :end_doctype |
|
in_doctype = false |
|
entities.each { |k,v| entities[k] = @build_context.entities[k].value } |
|
@build_context = @build_context.parent |
|
when :start_doctype |
|
doctype = REXML::DocType.new( event[1..-1], @build_context ) |
|
@build_context = doctype |
|
entities = {} |
|
in_doctype = true |
|
when :attlistdecl |
|
n = REXML::AttlistDecl.new( event[1..-1] ) |
|
@build_context.add( n ) |
|
when :externalentity |
|
n = REXML::ExternalEntity.new( event[1] ) |
|
@build_context.add( n ) |
|
when :elementdecl |
|
n = REXML::ElementDecl.new( event[1] ) |
|
@build_context.add(n) |
|
when :entitydecl |
|
entities[ event[1] ] = event[2] unless event[2] =~ /PUBLIC|SYSTEM/ |
|
@build_context.add(REXML::Entity.new(event)) |
|
when :notationdecl |
|
n = REXML::NotationDecl.new( *event[1..-1] ) |
|
@build_context.add( n ) |
|
when :xmldecl |
|
x = REXML::XMLDecl.new( event[1], event[2], event[3] ) |
|
@build_context.add( x ) |
|
end |
|
|
|
# BEGIN MONKEY PATCH LineNo |
|
@build_context.extend(LineNo) |
|
@build_context.start_line = line[1] |
|
@build_context.end_line = line[2] |
|
# END MONKEY PATCH LineNo |
|
|
|
end |
|
rescue REXML::Validation::ValidationException |
|
raise |
|
rescue REXML::UndefinedNamespaceException |
|
raise |
|
rescue |
|
raise REXML::ParseException.new( $!.message, @parser.source, @parser, $! ) |
|
end |
|
end |
|
end |
|
end |