Skip to content

Instantly share code, notes, and snippets.

@slackorama
Last active December 23, 2015 17:36
Show Gist options
  • Save slackorama/58b9cbf3a51392e1de92 to your computer and use it in GitHub Desktop.
Save slackorama/58b9cbf3a51392e1de92 to your computer and use it in GitHub Desktop.
Output top consumers of Elasticsearch fielddata by each node in a CSV format.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""es-fielddata
Usage:
es-fielddata [options]
Options:
-h --help show this
-v --version show version info
-o FORMAT, --output FORMAT output format [default: json]
--top N top N field records [default: 5]
--host HOST elasticsearch host
--port PORT elasticsearch port [default: 9200]
"""
from collections import OrderedDict
import csv
import json
import signal
import StringIO
from docopt import docopt
import requests
from edgecast import common
def main():
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
arguments = docopt(__doc__, version="0.1")
host = arguments['--host']
port = int(arguments['--port'])
top_n = int(arguments['--top'])
es_server = 'http://{0}:{1}'.format(host, port)
url = '{es_server}/_nodes/stats/indices/fielddata?fields=*'.format(
es_server=es_server
)
resp = requests.get(url)
results = list()
if resp.status_code == requests.codes.ok:
nodes = resp.json()['nodes']
for node in nodes.values():
fielddata = node['indices']['fielddata']
data = {}
data['node'] = node['name']
data['total_memory'] = common.format_bytes(
fielddata['memory_size_in_bytes'])
# data['evictions'] = fielddata['evictions']
fielddata_memory = list()
for field, info in fielddata['fields'].items():
memory = info['memory_size_in_bytes']
if memory > 0:
fielddata_memory.append((field,
info['memory_size_in_bytes'],))
fieldata_memory = sorted(fielddata_memory,
key=lambda x: x[1],
reverse=True)[:top_n]
for field in fieldata_memory:
data[field[0]] = common.format_bytes(field[1])
results.append(data)
output = arguments['--output']
if output == 'json':
print(json.dumps(results)),
elif output == 'csv':
csv_out = StringIO.StringIO()
writer = csv.writer(csv_out)
# first get the keys for the header fow
headers = OrderedDict()
headers['node'] = 1
headers['total_memory'] = 1
for row in results:
headers.update(OrderedDict.fromkeys(
filter(lambda x: x != 'node', row.keys())))
writer.writerow(headers.keys())
for row in results:
output_row = list()
for header in headers.keys():
if header in row:
output_row.append(row[header])
else:
output_row.append(0)
writer.writerow(output_row)
print(csv_out.getvalue()),
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment