-
-
Save clcollins/4fd9c0ab3a3041b7dc360c78eba78acc to your computer and use it in GitHub Desktop.
A script to generate a report of Elasticsearch index usage (from _cat/indices?v&bytes=b) by prefix for a set of known date suffixes.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
# A script to generate a report of Elasticsearch index usage | |
# (from _cat/indices?v&bytes=b) by prefix for a set of known | |
# date suffixes. | |
# | |
# E.g. | |
# $ curl -X GET http://localhost:9200/_cat/indices?v\&bytes=b -o indices.lis | |
# $ ./sum-es-indices.py indices.lis | |
# | |
# A second argument of b, k, m, or g can be given to specify the | |
# units in which the "size" of an index is reported. | |
import sys | |
import re | |
import collections | |
import operator | |
import locale | |
locale.setlocale(locale.LC_ALL, 'en_US') | |
open_indices = [] | |
closed_indices = [] | |
prefixes = collections.defaultdict(int) | |
_factors = { 'b': 1, 'k': 1024, 'm': 1024*1024, 'g': 1024*1024*1024 } | |
try: | |
units = sys.argv[2] | |
except IndexError: | |
units = 'b' | |
try: | |
factor = _factors[units] | |
except KeyError: | |
factor = 1 | |
with open(sys.argv[1], "r") as fp: | |
header = fp.readline() | |
header_parts = header.split() | |
for line in fp.readlines(): | |
parts = line[:-1].split() | |
if len(parts) == 2 and parts[0] == "close": | |
closed_indices.append({ 'index': parts[1] }) | |
elif header_parts[3] == 'uuid': | |
open_indices.append({ 'index': parts[2], 'status': parts[1], 'health': parts[0], 'pri': int(parts[4]), 'rep': int(parts[5]), 'docs.count': int(parts[6]), 'docs.deleted': int(parts[7]), 'store.size': int(parts[8]) * factor, 'pri.store.size': int(parts[9]) * factor }) | |
else: | |
open_indices.append({ 'index': parts[2], 'status': parts[1], 'health': parts[0], 'pri': int(parts[3]), 'rep': int(parts[4]), 'docs.count': int(parts[5]), 'docs.deleted': int(parts[6]), 'store.size': int(parts[7]) * factor, 'pri.store.size': int(parts[8]) * factor }) | |
dotdate_r = re.compile(r"(.+)([0-9]{4,})\.([0-9]{2,})\.([0-9]{2,})$") | |
dashdate_r = re.compile(r"(.+)([0-9]{4,})-([0-9]{2,})-([0-9]{2,})$") | |
dashdateym_r = re.compile(r"(.+)([0-9]{4,})-([0-9]{2,})$") | |
numdate_r = re.compile(r"(.+)([0-9]{4,})([0-9]{2,})([0-9]{2,})$") | |
numdateym_r = re.compile(r"(.+)([0-9]{4,})([0-9]{2,})$") | |
patterns = [ dotdate_r, dashdate_r, dashdateym_r, numdate_r, numdateym_r ] | |
def domatch(pat, index_name): | |
m = pat.match(index_name) | |
if m is None: | |
return None | |
prefix = m.group(1) | |
prefixes[prefix] += 1 | |
return prefix | |
for idx in open_indices: | |
name = idx['index'] | |
for pat in patterns: | |
prefix = domatch(pat, name) | |
if prefix: | |
idx['prefix'] = prefix | |
break | |
else: | |
prefixes[name] += 1 | |
for idx in closed_indices: | |
name = idx['index'] | |
for pat in patterns: | |
prefix = domatch(pat, name) | |
if prefix: | |
idx['prefix'] = prefix | |
break | |
else: | |
prefixes[name] += 1 | |
stats = {} | |
for pre in prefixes: | |
closed = 0 | |
for idx in closed_indices: | |
try: | |
prefix = idx['prefix'] | |
except KeyError: | |
prefix = idx['index'] | |
if prefix == pre: | |
closed += 1 | |
opened = 0 | |
green = 0 | |
yellow = 0 | |
red = 0 | |
docs = 0 | |
deleted = 0 | |
size = 0 | |
max_pri = 0 | |
for idx in open_indices: | |
try: | |
prefix = idx['prefix'] | |
except KeyError: | |
prefix = idx['index'] | |
if prefix == pre: | |
opened += 1 | |
if idx['health'] == 'green': | |
green += 1 | |
elif idx['health'] == 'yellow': | |
yellow += 1 | |
else: | |
assert idx['health'] == 'red' | |
red += 1 | |
docs += idx['docs.count'] | |
deleted += idx['docs.deleted'] | |
size += idx['store.size'] | |
max_pri = max(idx['pri'], max_pri) | |
stats[pre] = { 'closed': closed, 'opened': opened, 'green': green, 'yellow': yellow, 'red': red, 'docs': docs, 'deleted': deleted, 'size': size, 'max_pri': max_pri } | |
def n(val): | |
return locale.format("%d", val, grouping=True) | |
def f(val): | |
return locale.format("%0.1f", val, grouping=True) | |
format_str = "%9s %6s %6s %5s %6s %5s %20s %10s %7s %20s %10s %s" | |
print format_str % ("indices", 'closed', 'opened', 'green', 'yellow', 'red', 'docs', 'deleted', 'max_pri', 'size', 'avg/sz', 'prefix') | |
sorted_prefixes = sorted(prefixes.items(), key=operator.itemgetter(1), reverse=True) | |
for pre,v in sorted_prefixes: | |
stat = stats[pre] | |
print format_str % (n(v), n(stat['closed']), n(stat['opened']), n(stat['green']), n(stat['yellow']), n(stat['red']), n(stat['docs']), n(stat['deleted']), stat['max_pri'], f(stat['size']/factor), f((stat['size']/float(stat['docs'])) if stat['docs'] else 0), pre) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment