Skip to content

Instantly share code, notes, and snippets.

Created June 6, 2013 19:14
Show Gist options
  • Save timconradinc/5724099 to your computer and use it in GitHub Desktop.
Save timconradinc/5724099 to your computer and use it in GitHub Desktop.
input {
# this configuration is for both the info- and notice- files that
# proofpoint puts out.
# They need to have both the type of 'proofpoint_raw_type' and a tag
# of 'proofpoint_raw' for this all to work properly.
#file {
# type => "proofpoint_raw_type"
# tags => "proofpoint_raw"
# path => ["/home/conrad/testlog.log"]
#stdin {
# type => "proofpoint_raw_type"
# tags => "proofpoint_raw"
redis {
# read from the redis instance
type => "RawLog"
host => ""
db => 0
data_type => "list"
key => "logstash"
filter {
# There is some semblance to order to this for the first few grok filters anyways.
# The general design is once a line matches, the proofpoint_raw tag is removed so
# no more processing will happen on that particular line.
grok {
tags => "proofpoint_raw"
# this matches the 'user unknown' messages from sendmail
# match May 21 00:01:18 pp-serve01 sendmail[16212]: r4L41GBT016212: <[email protected]>... User unknown
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME:hostname}%{SPACE}%{DATA:logtype}\[%{INT:process_pid}\]:%{SPACE}%{WORD:smtp_messageid}:%{SPACE}<%{GREEDYDATA:user_unknown}>"
add_tag => "user_unknown"
tag_on_failure => "not_user_unknown"
remove_tag => "proofpoint_raw"
grok {
tags => "proofpoint_raw"
# match proofpoint instance logs - in the notice logs, this is 99% of the data, so this hsould be near the top
# match May 21 00:00:00 pp-serve01 filter_instance1[5614]: rprt s=1cdv4jjmcx mod=session cmd=dispose module=access rule=netmlx action=reject value="550 5.7.0 Local Policy Violation - ${DnsblResult_netmlx}"
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME:hostname}%{SPACE}%{DATA:logtype}\[%{INT:process_pid}\]:%{SPACE}rprt%{SPACE}%{GREEDYDATA:tokvparse}"
add_tag => "tokvparse_pre"
add_tag => "instance_message"
tag_on_failure => "not_instance_message"
remove_tag => "proofpoint_raw"
grok {
tags => "proofpoint_raw"
# matches to/from/etc sendmail messages
# May 30 23:59:52 pp-serve03 sendmail[26858]: r4V3xpH3026858: from=<[email protected]>, size=1852, class=0, nrcpts=1, msgid=<[email protected]>, proto=ESMTP, daemon=MTA-v6, []
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME:hostname}%{SPACE}%{DATA:logstype}\[%{INT:process_pid}\]:%{SPACE}%{WORD:smtp_messageid}:%{SPACE}%{WORD:sendmail_type}="
add_tag => "smtp_%{sendmail_type}"
add_tag => "smtp_kv_string"
tag_on_failure => "not_smtp_type"
remove_tag => "proofpoint_raw"
grok {
tags => "smtp_kv_string"
# this pattern exists solely to find the string to pass to the kv filter. if you label these variables, it'll add duplicate values.
add_tag => "sendmail_message"
add_tag => "sendmail_catchall_kv"
add_tag => "tokvparse_pre"
remove_tag => "smtp_kv_string"
tag_on_failure => "not_sendmail_message"
grok {
tags => "proofpoint_raw"
# this finds the milter specific lines and tags them appropriately
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME:hostname}%{SPACE}%{DATA:logstype}\[%{INT:process_pid}\]:%{SPACE}%{WORD:smtp_messageid}:%{SPACE}Milter"
add_tag => "smtp_milter"
tag_on_failure => "not_smtp_milter"
remove_tag => "proofpoint_raw"
grok {
tags => "smtp_milter"
# this is the connect message from remote relays
# this is also where the reject message lives for message bounces
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME}%{SPACE}%{DATA}\[%{INT}\]:%{SPACE}%{WORD}:%{SPACE}Milter:%{SPACE}connect:%{SPACE}%{GREEDYDATA:tokvparse}"
add_tag => "smtp_milter_connect"
add_tag => "tokvparse_pre"
tag_on_failure => "not_smtp_milter_connect"
remove_tag => "proofpoint_raw"
grok {
tags => "smtp_milter"
# This is for milter added headers:
# X-Proofpoint-Virus-Version
# X-Proofpoint-Spam-Details
# ...probably others....
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME}%{SPACE}%{DATA}\[%{INT}\]:%{SPACE}%{WORD}:%{SPACE}Milter%{SPACE}add:%{SPACE}header:%{SPACE}%{DATA:added_header}:%{SPACE}%{GREEDYDATA:tokvparse}"
add_tag => "smtp_milter_add_header"
add_tag => "%{added_header}"
add_tag => "tokvparse_pre"
tag_on_failure => "not_smtp_milter_add_header"
remove_tag => "proofpoint_raw"
grok {
tags => "smtp_milter"
# DKIM header
# there seems to be a gazillion fields in this, so i'm just shoving it into one
# Keep in mind that = is valid in base64, so enabling kv parsing on the dkim_header
# field will result in a lot of weird fields being created.
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME}%{SPACE}%{DATA}\[%{INT}\]:%{SPACE}%{WORD}:%{SPACE}Milter%{SPACE}insert%{SPACE}\(%{NUMBER}\):%{SPACE}header:%{SPACE}%{DATA:added_header}:%{SPACE}%{GREEDYDATA:dkim_header}"
add_tag => "smtp_milter_add_header"
add_tag => "%{added_header}"
tag_on_failure => "not_smtp_milter_add_header_dkim"
remove_tag => "proofpoint_raw"
grok {
tags => "proofpoint_raw"
# this finds the STARTTLS messages and handles tags them as such
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME:hostname}%{SPACE}%{DATA:logtype}\[%{INT:process_pid}\]:%{SPACE}STARTTLS=%{WORD:starttls},%{SPACE}%{GREEDYDATA:tokvparse}"
add_tag => "smtp_queue_tls"
add_tag => "smtp_queue"
add_tag => "tokvparse_pre"
tag_on_failure => "not_smtp_queue_tls"
remove_tag => "proofpoint_raw"
grok {
tags => "proofpoint_raw"
# this is for the various queued-reinject/default/etc message
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME:hostname}%{SPACE}queued-%{WORD:qtype}\[%{INT:process_pid}\]:%{SPACE}%{WORD:smtp_messageid}:%{SPACE}%{GREEDYDATA:tokvparse}"
add_tag => "smtp_queue"
add_tag => "tokvparse_pre"
tag_on_failure => "not_smtp_queue"
remove_tag => "proofpoint_raw"
grok {
tags => "proofpoint_raw"
# this should be near the bottom of the groks
# it'd be too easy if sendmail sent a single log type.
# This picks up sendmail logs that have a messageid: some message
# liek dis: May 30 01:03:36 pp-serve03 sendmail[29138]: r4U53aMU029138: [] did not issue MAIL/EXPN/VRFY/ETRN during connection to MTA-v6
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME:hostname}%{SPACE}%{DATA:logtype}\[%{INT:process_pid}\]:%{SPACE}%{WORD:smtp_messageid}:%{SPACE}%{GREEDYDATA:sendmail_message}"
add_tag => "sendmail_message"
add_tag => "sendmail_catchall_1"
# yes. this is a bit silly, but if you remove the mutate below you can see which one specifically, as technically they're all sendmail messages
tag_on_failure => "not_sendmail_message2"
remove_tag => "proofpoint_raw"
grok {
tags => "proofpoint_raw"
# this should be near the bottom of the groks
# this should pick up sendmail logs that don'thave messageid
# like this: May 30 12:36:29 pp-serve03 sendmail[15949]: r4UGaR2h015949: lost input channel from [] to MTA-v6 after rcpt
pattern => "%{SYSLOGTIMESTAMP}%{SPACE}%{HOSTNAME:hostname}%{SPACE}%{DATA:logtype}\[%{INT:process_pid}\]:%{SPACE}%{GREEDYDATA:sendmail_message}"
add_tag => "sendmail_message"
add_tag => "sendmail_catchall_2"
tag_on_failure => "not_sendmail_message3"
remove_tag => "proofpoint_raw"
splitter {
# this is a custom filter
# The stat= and reject= fields both contain = in them, which ends up adding awkward fields.
# This will simply remove those parts for the kv parsing.
tags => "tokvparse_pre"
splitme => "tokvparse"
spliton => "stat="
returnwhich => 0
add_tag => "tokvparse"
splitter {
tags => "tokvparse_pre"
splitme => "tokvparse"
spliton => "reject="
returnwhich => 0
add_tag => "tokvparse"
mutate {
tags => "tokvparse"
# remove newlines since it seems that proofpoint likes to put the literal
# header messages in here
gsub => ["tokvparse", "\\n",""]
# gsub => ["tokvparse", "\n",""]
kv {
# parse the key/value field tokvparse and parse it into key/values
type => "proofpoint_raw_type"
source => "tokvparse"
value_split => "="
trim => "<>,"
mutate {
# we've parsed the key/value bits of the proofpoint message, let's remove the field now to save on space and it's no longer needed
type => "proofpoint_raw_type"
remove => ["tokvparse"]
mutate {
replace => [ "@source_host", "%{hostname}" ]
mutate {
# at this piont all that should be left are logs that logstash somehow missed.
tags => "proofpoint_raw"
add_tag => "logstash_missed"
mutate {
# this will remove the not_ tags we created above just to keep thing neat at this point. _grokfailurs should still bubble up.
type => "proofpoint_raw_type"
remove_tag => ["not_user_unknown", "not_instance_message", "not_sendmail_message", "not_sendmail_message2", "not_sendmail_message3"]
remove_tag => ["not_proofpoint_spam_details", "not_smtp_queue_tls", "not_smtp_type", "not_smtp_queue", "not_smtp_milter"]
remove_tag => ["not_smtp_milter_connect", "not_smtp_milter_add_header", "not_smtp_milter_add_header_dkim"]
output {
#stdout {
# debug => true
# debug_format => "json"
elasticsearch {
# send each tag type to elasticsearch
type => "proofpoint_raw_type"
embedded => false
index => "logstash-proofpoint-%{+YYYY.MM.dd}"
host => ""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment