Last active
August 29, 2015 14:01
-
-
Save jp/2d6e4c000d43a02b0088 to your computer and use it in GitHub Desktop.
morphline basic apache log conf
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
morphlines : [ | |
{ | |
# Name used to identify a morphline. E.g. used if there are multiple | |
# morphlines in a morphline config file | |
id : morphline1 | |
# Import all morphline commands in these java packages and their | |
# subpackages. Other commands that may be present on the classpath are | |
# not visible to this morphline. | |
importCommands : ["com.cloudera.**", "org.apache.solr.**", "org.kitesdk.**"] | |
commands : [ | |
{ | |
# Parse input attachment and emit a record for each input line | |
readLine { | |
charset : UTF-8 | |
} | |
} | |
{ | |
grok { | |
# a grok-dictionary is a config file that contains prefabricated regular expressions | |
# that can be referred to by name. | |
# grok patterns specify such a regex name, plus an optional output field name. | |
# The syntax is %{REGEX_NAME:OUTPUT_FIELD_NAME} | |
# The input line is expected in the "message" input field. | |
# Get grok-patterns here : https://github.com/kite-sdk/kite/blob/master/kite-morphlines/kite-morphlines-core/src/test/resources/grok-dictionaries/grok-patterns | |
dictionaryFiles : [grok-patterns] | |
expressions : { | |
message : """%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-) %{QS:referrer} %{QS:agent}""" | |
} | |
} | |
} | |
# Consume the output record of the previous command and pipe another | |
# record downstream. | |
# | |
# convert timestamp field to native Solr timestamp format | |
# such as 2012-09-06T07:14:34Z to 2012-09-06T07:14:34.000Z | |
{ | |
convertTimestamp { | |
field : timestamp | |
inputFormats : ["dd/MMM/yy:HH:mm:ss Z"] | |
inputTimezone :Europe/Paris | |
outputFormat : "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'" | |
outputTimezone : UTC | |
} | |
} | |
# Generate a unique UUID | |
{ | |
generateUUID { | |
field : id | |
} | |
} | |
# Consume the output record of the previous command, transform it | |
# and pipe the record downstream. | |
# | |
# This command deletes record fields that are unknown to Solr | |
# schema.xml. Recall that Solr throws an exception on any attempt to | |
# load a document that contains a field that isn't specified in | |
# schema.xml. | |
{ | |
sanitizeUnknownSolrFields { | |
# Location from which to fetch Solr schema | |
solrLocator : { | |
collection : collection1 # Name of solr collection | |
zkHost : "ip-10-140-162-135:2181/solr" # ZooKeeper ensemble | |
} | |
} | |
} | |
# log the record at INFO level to SLF4J | |
{ logInfo { format : "output record: {}", args : ["@{}"] } } | |
# load the record into a Solr server or MapReduce Reducer | |
{ | |
loadSolr { | |
solrLocator : { | |
collection : collection1 # Name of solr collection | |
zkHost : "ip-10-140-162-135:2181/solr" # ZooKeeper ensemble | |
} | |
} | |
} | |
] | |
} | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment