Created
March 28, 2012 17:31
-
-
Save harlanbarnes/2228474 to your computer and use it in GitHub Desktop.
XML filter for Logstash
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
input { | |
stdin { | |
debug => true | |
type => default | |
add_field => [ "xml", "%{@message}" ] | |
} | |
} | |
filter { | |
xml { | |
type => default | |
xml => data | |
xpath => [ | |
"local-name(/conf/*)", "msg_type", | |
"local-name(/BAD/*)", "bad_function", | |
"/conf/participant_request/*/@last", "part_lastname", | |
"/conf/participant_request/*/@email", "part_email", | |
"/conf/participant_request/*/@NOT_IN_THE_XML", "part_NOT_IN_THE_XML", | |
"count(/conf/participant_request/person)", "person_count", | |
"/conf/*/@id", "type_id", | |
"/conf/@id", "msg_id", | |
"/conf/*", "another_xml" | |
] | |
store_xml => false | |
add_field => [ "xml_type", "axis" ] | |
} | |
} | |
output { | |
stdout { debug => true type => default } | |
elasticsearch { type => default embedded => true } | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<conf id='123'><participant_request id='456'><person phone='15551234567' first="John" last="Doe" email="[email protected]" /></participant_request></conf> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<conf id='123'><participant_request id='456'><person phone='15551234567' first="John" last="Doe" email="[email protected]" /><person phone='15557654321' first="Jane" last="Doe" email="[email protected]" /></participant_request></conf> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<conf id="foo123"><dialout confid="9876"><contact phone="6789894785"/></dialout></conf> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "logstash/filters/base" | |
require "logstash/namespace" | |
require "xmlsimple" | |
require "rexml/document" | |
include REXML | |
# XML filter. Takes a field that contains XML and expands it into | |
# an actual datastructure. | |
class LogStash::Filters::Xml < LogStash::Filters::Base | |
config_name "xml" | |
plugin_status "experimental" | |
# Config for xml to hash is: | |
# | |
# source => dest | |
# | |
# XML in the value of the source field will be expanded into a | |
# datastructure in the "dest" field. Note: if the "dest" field | |
# already exists, it will be overridden. | |
config /[A-Za-z0-9_-]+/, :validate => :string | |
# xpath will additionally select string values (.to_s on whatever is selected) | |
# from parsed XML (using each source field defined using the method above) | |
# and place those values in the destination fields. Configuration: | |
# | |
# xpath => [ "xpath-syntax", "destination-field" ] | |
# | |
# Values returned by XPath parsring from xpath-synatx will be put in the | |
# destination field. Multiple values returned will be pushed onto the | |
# destination field as an array. As such, multiple matches across | |
# multiple source fields will produce duplicate entries in the field | |
# | |
# More on xpath: http://www.w3schools.com/xpath/ | |
# | |
# The xpath functions are particularly powerful: | |
# http://www.w3schools.com/xpath/xpath_functions.asp | |
# | |
config :xpath, :validate => :hash, :default => {} | |
# By default the filter will store the whole parsed xml in the destination | |
# field as described above. Setting this to false will prevent that. | |
config :store_xml, :validate => :boolean, :default => true | |
public | |
def register | |
@xml = {} | |
@config.each do |field, dest| | |
next if ( RESERVED + ["xpath","store_xml"] ).member?(field) | |
@xml[field] = dest | |
end | |
end # def register | |
public | |
def filter(event) | |
return unless filter?(event) | |
matched = false | |
@logger.debug("Running xml filter", :event => event) | |
@xml.each do |key, dest| | |
if event.fields[key] | |
if event.fields[key].is_a?(String) | |
event.fields[key] = [event.fields[key]] | |
end | |
if event.fields[key].length > 1 | |
@logger.warn("XML filter only works on fields of length 1", | |
:key => key, :value => event.fields[key]) | |
next | |
end | |
raw = event.fields[key].first | |
# for some reason, an empty string is considered valid XML | |
next if raw.strip.length == 0 | |
if @xpath | |
begin | |
doc = Document.new(raw) | |
rescue => e | |
event.tags << "_xmlparsefailure" | |
@logger.warn("Trouble parsing xml with REXML::Document", :key => key, :raw => raw, | |
:exception => e, :backtrace => e.backtrace) | |
next | |
end | |
@xpath.each do |xpath_src, xpath_dest| | |
XPath.each(doc, xpath_src).each do |value| | |
# some XPath functions return empty arrays as string | |
if value.is_a?(Array) | |
next if value.length == 0 | |
end | |
unless value.nil? | |
matched = true | |
event[xpath_dest] ||= [] | |
event[xpath_dest] << value.to_s | |
end | |
end | |
end | |
end | |
if @store_xml | |
begin | |
event[dest] = XmlSimple.xml_in(raw) | |
matched = true | |
rescue => e | |
event.tags << "_xmlparsefailure" | |
@logger.warn("Trouble parsing xml with XmlSimple", :key => key, :raw => raw, | |
:exception => e, :backtrace => e.backtrace) | |
next | |
end | |
end | |
filter_matched(event) if matched | |
end | |
end | |
@logger.debug("Event after xml filter", :event => event) | |
end # def filter | |
end # class LogStash::Filters::Xml |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
can anyone tell me that can i add jtl files for xml type input in logstash?