Last active
September 24, 2020 12:29
-
-
Save wbuchanan/ffdc65b76f20e078dcf48a19fcc140b2 to your computer and use it in GitHub Desktop.
Logstash configuration example for munging logs generated by Infinite Campus
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################################################################################### | |
# I have this file saved as iclogs.yml, but you could name the file anything you wanted to. When you # | |
# start up logstash you need to tell it where this file is using the -f flag on the command line like: # | |
# # | |
# logstash -f iclogs.yml # | |
# # | |
# There are still some bugs in the parsing resulting in keys that contain no name value, which might be # | |
# possible to clean up using the grep setting of the mutate filter. # | |
########################################################################################################### | |
# Define what logstash will be looking for in terms of input | |
input { | |
# Since we are throwing existing files at it need to install and use the file input plugin from the | |
# command line using : logstash-plugin install logstash-input-file | |
file { | |
# This tells logstash where to find the files and the type of files to look for | |
path => "/Location/Where/logFiles/areStored/**/*.log" | |
# And where to start the processing from | |
start_position => "beginning" | |
} # End of the file configuration block | |
# If we were handling the log data in real time we could include an IP address and port that logstash | |
# would "listen" to for the data to stream in | |
} # End of the input block | |
# This is one of the more important sections since it is essentially defining how the data will be parsed | |
filter { | |
# This is another plugin. To install use: logstash-plugin install logstash-filter-mutate | |
mutate { | |
# Make sure it is reading all the data as strings | |
convert => { "message" => "string" } | |
} # End of the first mutate block | |
# The grok plugin defines a bunch of preformatted parsers (basically makes life easier) | |
# logstash-plugin install logstash-filter-grok | |
grok { | |
# Defines one or more patterns than an input should match message is a default | |
# reference to the data coming in the "%{COMBINEDAPACHELOG}" part defines a few | |
# different things that get parsed into distinct fields. This basically handles | |
# separating the majority of the data into distinct fields for us | |
match => ["message", "%{COMBINEDAPACHELOG}"] | |
} # End of the grok block | |
# Next mutate block | |
mutate { | |
# Now we're going to use some regular expressions to handle some of the data in the | |
# referrer string (e.g., the page making the request to the server). The space character | |
# in URL encoding is %20 and makes for not so friendly fields/values so every time this | |
# happens in the referrer string it will replace it with an underscore then it will look | |
# for the pattern "x." (need to escape the period which is why it is \. below) and remove it | |
gsub => [ "referrer", "(%20)", "_", "referrer", "x\.", "", "referrer", "/\?", "/" ] | |
} # End second mutate block | |
# Another plugin that needs to be installed using: logstash-plugin install logstash-filter-kv | |
# This is used to parse the query fields into key-value pairs | |
kv { | |
# Any time a & or ? occur in the URL string it will treat it as a new key-value pair | |
field_split => "&?" | |
# But we are only going to do this to the referrer string | |
source => "referrer" | |
# And will only return unique key-value pairs from an entry | |
allow_duplicate_values => false | |
# And will remove spaces and commas from the key | |
trimkey => " ," | |
# And make the value of the key lowercased | |
transform_key => "lowercase" | |
# And make the value of the value lowercased as well | |
transform_value => "lowercase" | |
} # End of kv block | |
} # End of filter block | |
# This is the last block where we tell logstash where to put the data | |
output { | |
# This is also a plugin that needs to be installed using: | |
# logstash-plugin install logstash-output-elasticsearch | |
# This handles a few different things and tells logstash that it is going to be | |
# pushing the data into an instance of elasticsearch (extremely fast search | |
# engine/NoSQL database) | |
elasticsearch { | |
# Location of the elasticsearch server | |
hosts => ["127.0.0.1:9200"] | |
# Indexes in elasticsearch are places where documents get stored it isn't the same as | |
# an index in an SQL table, but allows different documents to have distinct schemas | |
# and/or multiple types related to a single index without having the date included it puts | |
# all of the data into a single index so queries will be easier to write, but with a single | |
# node (instance) of elasticsearch and no sharding (spliting the files across multiple | |
# instances) the searching will probably be noticeably slower | |
index => "iclogs" | |
# A template is also needed to avoid an error that will be created from the number of | |
# fields generated from this process otherwise. This should be the path to the file | |
# below in this Gist | |
template => "/Location/ofThe/Template/File/iclogtemplate.yml" | |
# As an alternative we can also create the indices based on the date of the log file | |
# index -> "logstash-%{[timestamp]" | |
} # End of elasticsearch block | |
} # End of output block |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Creates an index template for IC log files | |
{ | |
# Defines this as a template for the index creation | |
"template": "iclogs", | |
# Now provide some settings for the index | |
"settings" : { | |
# Number of shards to create from the data | |
"number_of_shards" : 5, | |
# Number of replicas to create | |
"number_of_replicas": 0, | |
# Maximum number of fields in an index | |
"index.mapping.total_fields.limit": 10000 | |
} # End of Settings block | |
} # End of iclogs index template | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here is an example of part of a query string that causes the no name values to appear:
Need to include a regular expression that tests for values on both sides of the
=
sign when splitting to key value pairs. Also need to learn the mapping used by the application to consolidate the number of fields/object references into a more parsimonious and meaningful set.