Skip to content

Instantly share code, notes, and snippets.

@portante
Last active January 11, 2018 14:11
Show Gist options
  • Select an option

  • Save portante/f59e36e5d149f81f8cac4476099e1d3c to your computer and use it in GitHub Desktop.

Select an option

Save portante/f59e36e5d149f81f8cac4476099e1d3c to your computer and use it in GitHub Desktop.
Hacky set of python and bash scripts to format output of ES _search query output using the "scroll" API (see https://www.elastic.co/guide/en/elasticsearch/reference/2.4/search-request-scroll.html)
#!/usr/bin/env python
# Use via: curl -X GET http://localhost:9200/<index>/_search/?fields=@timestamp,level,hostname,kubernetes.host,kubernetes.pod_name,kubernetes.container_name,message&q=message:<string>
import sys, os, json
if len(sys.argv) == 1 or sys.argv[1] == '-':
json_doc = json.load(sys.stdin)
else:
json_doc = json.load(open(sys.argv[1]))
#json.dump(json_doc, sys.stdout, indent=4, sort_keys=True)
try:
scroll_id = json_doc['_scroll_id']
except Exception:
scroll_id = ""
else:
with open('/tmp/scroll-logs.scroll_id', "w") as fp:
fp.write(scroll_id)
try:
hits = json_doc['hits']['hits']
except Exception:
hits = []
if len(hits) < 1:
if scroll_id:
os.unlink('/tmp/scroll-logs.scroll_id')
else:
print("*** nothing to report ***")
sys.exit(0)
fields = [ '@timestamp', 'level', 'hostname', 'kubernetes.pod_name', 'kubernetes.container_name', 'message' ]
headermap = { '@timestamp': 'timestamp', 'kubernetes.pod_name': 'pod name', 'kubernetes.container_name': 'container name' }
header = {}
for f in fields:
try:
header[f] = headermap[f]
except KeyError:
header[f] = f
print("%(@timestamp)-34s %(level)-10s %(hostname)-45s %(kubernetes.pod_name)-20s %(kubernetes.container_name)-20s %(message)s" % header)
for hit in hits:
fix_hit = {}
for k,v in hit["fields"].iteritems():
fix_hit[k] = v[0]
if 'hostname' not in fix_hit and 'kubernetes.host' in fix_hit:
fix_hit['hostname'] = fix_hit['kubernetes.host']
for f in fields:
if f not in fix_hit:
fix_hit[f] = ""
if fix_hit['message'][-1] == '\n':
fix_hit['message'] = fix_hit['message'][:-1]
print("%(@timestamp)-34s %(level)-10s %(hostname)-45s %(kubernetes.pod_name)-20s %(kubernetes.container_name)-20s %(message)r" % fix_hit)
{
"size": 10,
"fields": [
"@timestamp",
"level",
"hostname",
"kubernetes.host",
"kubernetes.pod_name",
"kubernetes.container_name",
"message"
],
"query": {
"filtered": {
"filter": {
"and": {
"filters": [
{
"range": {
"@timestamp": {
"gt": "2017-12-28T00:00:00",
"lt": "2017-12-28T00:01:00"
}
}
}
]
}
}
}
}
}
#!/bin/bash
set -eo pipefail
if [ -n ${DEBUG:-''} ]; then
set -x
fi
# usage
# scroll_logs <PROJECT>
#
rm -f /tmp/scroll-logs.scroll_id
format_es_logs="$(dirname $0)/format-es-logs"
get_scroll_id="$(dirname $0)/get-scroll-id"
ES_URL='https://localhost:9200'
curl_get='curl -s -X GET --cacert /etc/elasticsearch/secret/admin-ca --cert /etc/elasticsearch/secret/admin-cert --key /etc/elasticsearch/secret/admin-key'
curl_del='curl -s -X DELETE --cacert /etc/elasticsearch/secret/admin-ca --cert /etc/elasticsearch/secret/admin-cert --key /etc/elasticsearch/secret/admin-key'
project=$1
SEARCH="project.$project.*,.operations.*/_search"
if [ -z ${RAW:-''} ]; then
$curl_get "${ES_URL}/${SEARCH}?scroll=1m" -d @query.json | $format_es_logs
else
$curl_get "${ES_URL}/${SEARCH}" -d @query.json
fi
if [ -f /tmp/scroll-logs.scroll_id ]; then
while [ -f /tmp/scroll-logs.scroll_id ]; do
scroll_id="$(cat /tmp/scroll-logs.scroll_id)"
$curl_get "${ES_URL}/_search/scroll?scroll=1m" -d "$scroll_id" | $format_es_logs
done
$curl_del ${ES_URL}/_search/scroll -d "$scroll_id" > /dev/null 2>&1
rm -f /tmp/scroll-logs.scroll_id
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment