Created
June 6, 2013 19:14
-
-
Save timconradinc/5724099 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
input { | |
# | |
# this configuration is for both the info- and notice- files that | |
# proofpoint puts out. | |
# | |
# They need to have both the type of 'proofpoint_raw_type' and a tag | |
# of 'proofpoint_raw' for this all to work properly. | |
# | |
#file { | |
# type => "proofpoint_raw_type" | |
# tags => "proofpoint_raw" | |
# path => ["/home/conrad/testlog.log"] | |
#} | |
#stdin { | |
# type => "proofpoint_raw_type" | |
# tags => "proofpoint_raw" | |
#} | |
redis { | |
# read from the redis instance | |
type => "RawLog" | |
host => "10.99.99.66" | |
db => 0 | |
data_type => "list" | |
key => "logstash" | |
} | |
} | |
filter { | |
# | |
# There is some semblance to order to this for the first few grok filters anyways. | |
# | |
# The general design is once a line matches, the proofpoint_raw tag is removed so | |
# no more processing will happen on that particular line. | |
# | |
grok { | |
tags => "proofpoint_raw" | |
# this matches the 'user unknown' messages from sendmail | |
# match May 21 00:01:18 pp-serve01 sendmail[16212]: r4L41GBT016212: <[email protected]>... User unknown | |
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME:hostname}%{SPACE}%{DATA:logtype}\[%{INT:process_pid}\]:%{SPACE}%{WORD:smtp_messageid}:%{SPACE}<%{GREEDYDATA:user_unknown}>" | |
add_tag => "user_unknown" | |
tag_on_failure => "not_user_unknown" | |
remove_tag => "proofpoint_raw" | |
} | |
grok { | |
tags => "proofpoint_raw" | |
# match proofpoint instance logs - in the notice logs, this is 99% of the data, so this hsould be near the top | |
# match May 21 00:00:00 pp-serve01 filter_instance1[5614]: rprt s=1cdv4jjmcx mod=session cmd=dispose module=access rule=netmlx action=reject value="550 5.7.0 Local Policy Violation - ${DnsblResult_netmlx}" | |
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME:hostname}%{SPACE}%{DATA:logtype}\[%{INT:process_pid}\]:%{SPACE}rprt%{SPACE}%{GREEDYDATA:tokvparse}" | |
add_tag => "tokvparse_pre" | |
add_tag => "instance_message" | |
tag_on_failure => "not_instance_message" | |
remove_tag => "proofpoint_raw" | |
} | |
grok { | |
tags => "proofpoint_raw" | |
# matches to/from/etc sendmail messages | |
# May 30 23:59:52 pp-serve03 sendmail[26858]: r4V3xpH3026858: from=<[email protected]>, size=1852, class=0, nrcpts=1, msgid=<[email protected]>, proto=ESMTP, daemon=MTA-v6, relay=mail.example.net [11.22.33.44] | |
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME:hostname}%{SPACE}%{DATA:logstype}\[%{INT:process_pid}\]:%{SPACE}%{WORD:smtp_messageid}:%{SPACE}%{WORD:sendmail_type}=" | |
add_tag => "smtp_%{sendmail_type}" | |
add_tag => "smtp_kv_string" | |
tag_on_failure => "not_smtp_type" | |
remove_tag => "proofpoint_raw" | |
} | |
grok { | |
tags => "smtp_kv_string" | |
# this pattern exists solely to find the string to pass to the kv filter. if you label these variables, it'll add duplicate values. | |
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME}%{SPACE}%{DATA}\[%{INT}\]:%{SPACE}%{WORD}:%{SPACE}%{GREEDYDATA:tokvparse}" | |
add_tag => "sendmail_message" | |
add_tag => "sendmail_catchall_kv" | |
add_tag => "tokvparse_pre" | |
remove_tag => "smtp_kv_string" | |
tag_on_failure => "not_sendmail_message" | |
} | |
grok { | |
tags => "proofpoint_raw" | |
# this finds the milter specific lines and tags them appropriately | |
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME:hostname}%{SPACE}%{DATA:logstype}\[%{INT:process_pid}\]:%{SPACE}%{WORD:smtp_messageid}:%{SPACE}Milter" | |
add_tag => "smtp_milter" | |
tag_on_failure => "not_smtp_milter" | |
remove_tag => "proofpoint_raw" | |
} | |
grok { | |
tags => "smtp_milter" | |
# this is the connect message from remote relays | |
# this is also where the reject message lives for message bounces | |
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME}%{SPACE}%{DATA}\[%{INT}\]:%{SPACE}%{WORD}:%{SPACE}Milter:%{SPACE}connect:%{SPACE}%{GREEDYDATA:tokvparse}" | |
add_tag => "smtp_milter_connect" | |
add_tag => "tokvparse_pre" | |
tag_on_failure => "not_smtp_milter_connect" | |
remove_tag => "proofpoint_raw" | |
} | |
grok { | |
tags => "smtp_milter" | |
# This is for milter added headers: | |
# X-Proofpoint-Virus-Version | |
# X-Proofpoint-Spam-Details | |
# ...probably others.... | |
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME}%{SPACE}%{DATA}\[%{INT}\]:%{SPACE}%{WORD}:%{SPACE}Milter%{SPACE}add:%{SPACE}header:%{SPACE}%{DATA:added_header}:%{SPACE}%{GREEDYDATA:tokvparse}" | |
add_tag => "smtp_milter_add_header" | |
add_tag => "%{added_header}" | |
add_tag => "tokvparse_pre" | |
tag_on_failure => "not_smtp_milter_add_header" | |
remove_tag => "proofpoint_raw" | |
} | |
grok { | |
tags => "smtp_milter" | |
# DKIM header | |
# there seems to be a gazillion fields in this, so i'm just shoving it into one | |
# Keep in mind that = is valid in base64, so enabling kv parsing on the dkim_header | |
# field will result in a lot of weird fields being created. | |
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME}%{SPACE}%{DATA}\[%{INT}\]:%{SPACE}%{WORD}:%{SPACE}Milter%{SPACE}insert%{SPACE}\(%{NUMBER}\):%{SPACE}header:%{SPACE}%{DATA:added_header}:%{SPACE}%{GREEDYDATA:dkim_header}" | |
add_tag => "smtp_milter_add_header" | |
add_tag => "%{added_header}" | |
tag_on_failure => "not_smtp_milter_add_header_dkim" | |
remove_tag => "proofpoint_raw" | |
} | |
grok { | |
tags => "proofpoint_raw" | |
# this finds the STARTTLS messages and handles tags them as such | |
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME:hostname}%{SPACE}%{DATA:logtype}\[%{INT:process_pid}\]:%{SPACE}STARTTLS=%{WORD:starttls},%{SPACE}%{GREEDYDATA:tokvparse}" | |
add_tag => "smtp_queue_tls" | |
add_tag => "smtp_queue" | |
add_tag => "tokvparse_pre" | |
tag_on_failure => "not_smtp_queue_tls" | |
remove_tag => "proofpoint_raw" | |
} | |
grok { | |
tags => "proofpoint_raw" | |
# this is for the various queued-reinject/default/etc message | |
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME:hostname}%{SPACE}queued-%{WORD:qtype}\[%{INT:process_pid}\]:%{SPACE}%{WORD:smtp_messageid}:%{SPACE}%{GREEDYDATA:tokvparse}" | |
add_tag => "smtp_queue" | |
add_tag => "tokvparse_pre" | |
tag_on_failure => "not_smtp_queue" | |
remove_tag => "proofpoint_raw" | |
} | |
grok { | |
tags => "proofpoint_raw" | |
# SENDMAIL CATCHALL #1 | |
# this should be near the bottom of the groks | |
# it'd be too easy if sendmail sent a single log type. | |
# This picks up sendmail logs that have a messageid: some message | |
# liek dis: May 30 01:03:36 pp-serve03 sendmail[29138]: r4U53aMU029138: nagios.example.org [1.2.3.4] did not issue MAIL/EXPN/VRFY/ETRN during connection to MTA-v6 | |
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME:hostname}%{SPACE}%{DATA:logtype}\[%{INT:process_pid}\]:%{SPACE}%{WORD:smtp_messageid}:%{SPACE}%{GREEDYDATA:sendmail_message}" | |
add_tag => "sendmail_message" | |
add_tag => "sendmail_catchall_1" | |
# yes. this is a bit silly, but if you remove the mutate below you can see which one specifically, as technically they're all sendmail messages | |
tag_on_failure => "not_sendmail_message2" | |
remove_tag => "proofpoint_raw" | |
} | |
grok { | |
tags => "proofpoint_raw" | |
# SENDMAIL CATCHALL #2 | |
# this should be near the bottom of the groks | |
# this should pick up sendmail logs that don'thave messageid | |
# like this: May 30 12:36:29 pp-serve03 sendmail[15949]: r4UGaR2h015949: lost input channel from [22.22.22.22] to MTA-v6 after rcpt | |
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME:hostname}%{SPACE}%{DATA:logtype}\[%{INT:process_pid}\]:%{SPACE}%{GREEDYDATA:sendmail_message}" | |
add_tag => "sendmail_message" | |
add_tag => "sendmail_catchall_2" | |
tag_on_failure => "not_sendmail_message3" | |
remove_tag => "proofpoint_raw" | |
} | |
splitter { | |
# this is a custom filter | |
# The stat= and reject= fields both contain = in them, which ends up adding awkward fields. | |
# This will simply remove those parts for the kv parsing. | |
tags => "tokvparse_pre" | |
splitme => "tokvparse" | |
spliton => "stat=" | |
returnwhich => 0 | |
add_tag => "tokvparse" | |
} | |
splitter { | |
tags => "tokvparse_pre" | |
splitme => "tokvparse" | |
spliton => "reject=" | |
returnwhich => 0 | |
add_tag => "tokvparse" | |
} | |
mutate { | |
tags => "tokvparse" | |
# remove newlines since it seems that proofpoint likes to put the literal | |
# header messages in here | |
gsub => ["tokvparse", "\\n",""] | |
# gsub => ["tokvparse", "\n",""] | |
} | |
kv { | |
# parse the key/value field tokvparse and parse it into key/values | |
type => "proofpoint_raw_type" | |
source => "tokvparse" | |
value_split => "=" | |
trim => "<>," | |
} | |
mutate { | |
# we've parsed the key/value bits of the proofpoint message, let's remove the field now to save on space and it's no longer needed | |
type => "proofpoint_raw_type" | |
remove => ["tokvparse"] | |
} | |
mutate { | |
replace => [ "@source_host", "%{hostname}" ] | |
} | |
mutate { | |
# at this piont all that should be left are logs that logstash somehow missed. | |
tags => "proofpoint_raw" | |
add_tag => "logstash_missed" | |
} | |
mutate { | |
# this will remove the not_ tags we created above just to keep thing neat at this point. _grokfailurs should still bubble up. | |
type => "proofpoint_raw_type" | |
remove_tag => ["not_user_unknown", "not_instance_message", "not_sendmail_message", "not_sendmail_message2", "not_sendmail_message3"] | |
remove_tag => ["not_proofpoint_spam_details", "not_smtp_queue_tls", "not_smtp_type", "not_smtp_queue", "not_smtp_milter"] | |
remove_tag => ["not_smtp_milter_connect", "not_smtp_milter_add_header", "not_smtp_milter_add_header_dkim"] | |
} | |
} | |
output { | |
#stdout { | |
# debug => true | |
# debug_format => "json" | |
#} | |
elasticsearch { | |
# send each tag type to elasticsearch | |
type => "proofpoint_raw_type" | |
embedded => false | |
index => "logstash-proofpoint-%{+YYYY.MM.dd}" | |
host => "10.55.11.55" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment