Skip to content

Instantly share code, notes, and snippets.

@zachpendleton
Created August 9, 2011 14:02
Show Gist options
  • Select an option

  • Save zachpendleton/1134120 to your computer and use it in GitHub Desktop.

Select an option

Save zachpendleton/1134120 to your computer and use it in GitHub Desktop.
class Parser
attr_accessor :results
@@default_services = {}
def parse(document)
document = Nokogiri::XML(document).remove_namespaces!
found = false
@@default_services.each do |service, content|
if document.xpath(content[:identifier]).length > 0
self.send("parse_#{service.to_s}", document)
found = true
break
end
end
raise "Parser not found" unless found
end
def self.define_model(model)
@@result_model = model
end
def self.define_interface(name, component)
raise "Interface name already used" unless @@default_services[name].nil?
@@default_services[name] = component
define_method("parse_#{name.to_s}".to_sym) do |document|
document = Nokogiri::XML(document).remove_namespaces! if document.class != Nokogiri::XML::Document
@results ||= []
document.xpath(component[:finder]).each do |a|
result = @@result_model.new
component[:fields].each do |key, path|
next if path.nil?
if path.class == Proc
value = path.call(a)
else
value = path.match(/^@/) ?
a.attribute(path.gsub(/@/, "")).text :
a.xpath(path).text
end
result.send("#{key.to_s}=".to_sym, value)
end
@results << result
end
@results.length
end
end
end
# Example Default Services
default_services = {
:factiva => {
:finder => "//result",
:identifier => "/PPSResponse/ppsresultResponse",
:fields => {
:source => "sourceName",
:title => "headline/paragraph",
:content => lambda { |result|
content = ""
%w{leadParagraph/paragraph tailParagraphs/paragraph}.each do |search|
content += result.xpath(search).inject("") { |c, p| c = c + p.text }
end
content
},
:date => "publicationDate/date",
:url => nil,
:custom_id => "accessionNo",
:language => nil
}
}
}
Parser.define_model(Article)
default_services.each do |name, component|
Parser.define_interface(name, component)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment