Last active
February 5, 2022 14:54
-
-
Save csamsel/ba89966f95b87246aa2b to your computer and use it in GitHub Desktop.
Enhanced NGINX logstash parser to include upstream response time and request length fields
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Enhanced NGINX logstash parser: | |
NGINX log format: | |
log_format enhanced '$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent $request_length "$http_referer" "$http_user_agent" $request_time $upstream_response_time'; | |
access_log /var/log/nginx/access.log enhanced; | |
error_log /var/log/nginx/error.log; | |
logstash pattern (/opt/logstash/pattern/nginx): | |
NGUSERNAME [a-zA-Z\.\@\-\+_%]+ | |
NGUSER %{NGUSERNAME} | |
NGINXACCESS %{IPORHOST:client_ip} %{NGUSER:ident} %{NGUSER:auth} \[%{HTTPDATE:timestamp_nginx_access}\] "%{WORD:verb} %{URIPATHPARAM:request} HTTP/%{NUMBER:httpversion}" %{NUMBER:response} (?:%{NUMBER:bytes_sent}|-) %{NUMBER:bytes_received} (?:"(?:%{URI:referrer}|-)"|%{QS:referrer}) %{QS:agent} %{NUMBER:request_time} (?:%{NUMBER:upstream_time:float}|-) | |
logstash configuration: | |
output { | |
elasticsearch { | |
host => "localhost" | |
port => 9200 | |
protocol => http | |
template => "/etc/logstash/elasticsearch-template.json" | |
template_overwrite => true | |
} | |
} | |
filter { | |
if [type] == "nginx_access" { | |
grok { | |
match => { "message" => "%{NGINXACCESS}" } | |
} | |
geoip { | |
source => "client_ip" | |
target => "geo_ip" | |
database => "/etc/logstash/GeoLiteCity.dat" | |
add_field => [ "[geo_ip][coordinates]", "%{[geo_ip][longitude]}" ] | |
add_field => [ "[geo_ip][coordinates]", "%{[geo_ip][latitude]}" ] | |
} | |
mutate { | |
convert => [ "[geo_ip][coordinates]", "float"] | |
convert => [ "bytes_received", "integer"] | |
convert => [ "bytes_sent", "integer"] | |
convert => [ "upstream_time", "float"] | |
convert => [ "request_time", "float"] | |
} | |
date { | |
match => [ "timestamp_nginx_access" , "dd/MMM/YYYY:HH:mm:ss Z" ] | |
} | |
} | |
elasticsearch schema (/etc/logstash/elasticsearch-template.json) | |
elasticsearch-template.json | |
{ | |
"template" : "logstash-*", | |
"settings" : { | |
"index.refresh_interval" : "5s" | |
}, | |
"mappings" : { | |
"_default_" : { | |
"_all" : {"enabled" : true}, | |
"dynamic_templates" : [ { | |
"string_fields" : { | |
"match" : "*", | |
"match_mapping_type" : "string", | |
"mapping" : { | |
"type" : "string", "index" : "analyzed", "omit_norms" : true, | |
"fields" : { | |
"raw" : {"type": "string", "index" : "not_analyzed", "ignore_above" : 256} | |
} | |
} | |
} | |
} ], | |
"properties" : { | |
"@version": { "type": "string", "index": "not_analyzed" }, | |
"client_ip": { "type": "ip"}, | |
"geo_ip" : { | |
"type" : "object", | |
"dynamic": true, | |
"path": "full", | |
"properties" : { | |
"location" : { "type" : "geo_point" } | |
} | |
} | |
} | |
} | |
} | |
} | |
Result (JSON): | |
{ | |
"_index": "logstash-2015.07.02", | |
"_type": "nginx_access", | |
"_id": "AU5N_enTNNthUvRmEi_U", | |
"_score": 1, | |
"_source": { | |
"message": "123.123.123.123 - - [02/Jul/2015:10:59:20 +0200] \"POST /kibana/elasticsearch/_mget?timeout=0&ignore_unavailable=true&preference=1435827494093 HTTP/1.1\" 200 161 912 \"https://confidential.host.name/kibana/\" \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.30 Safari/537.36\" 0.003 0.003", | |
"@version": "1", | |
"@timestamp": "2015-07-02T08:59:20.000Z", | |
"type": "nginx_access", | |
"file": "/var/log/nginx/access.log", | |
"host": "confidential", | |
"offset": "2719123", | |
"client_ip": "123.123.123.123", | |
"ident": "-", | |
"auth": "-", | |
"timestamp": "02/Jul/2015:10:59:20 +0200", | |
"verb": "POST", | |
"request": "/kibana/elasticsearch/_mget?timeout=0&ignore_unavailable=true&preference=1435827494093", | |
"httpversion": "1.1", | |
"response": "200", | |
"bytes_sent": "161", | |
"bytes_received": 912, | |
"referrer": "https://confidential.host.name/kibana/", | |
"agent": "\"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.30 Safari/537.36\"", | |
"request_time": 0.003, | |
"upstream_time": 0.003, | |
"geoip": { | |
"ip": "123.123.123.123", | |
"country_code2": "DE", | |
"country_code3": "DEU", | |
"country_name": "Germany", | |
"continent_code": "EU", | |
"region_name": "07", | |
"city_name": "Aachen", | |
"postal_code": "52068", | |
"latitude": 50.77080000000001, | |
"longitude": 6.1053, | |
"timezone": "Europe/Berlin", | |
"real_region_name": "Nordrhein-Westfalen", | |
"location": [ | |
6.1053, | |
50.77080000000001 | |
], | |
"coordinates": [ | |
6.1053, | |
50.77080000000001 | |
] | |
} | |
}, | |
"fields": { | |
"@timestamp": [ | |
1435827560000 | |
] | |
} |
Thanks, where is the documentation related over this?
%{NUMBER:upstream_time:float}
Obviusly the most important part for me is :float
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Looks good, but in this code you do not consider cases when a request is processed through several upstream servers. In that cases $upstream_response_time variable contains several values separated by commas and corresponded to different upstream servers.
See nginx docs: https://www.nginx.com/resources/admin-guide/logging-and-monitoring/.