Skip to content

Instantly share code, notes, and snippets.

@cevaris
Last active September 16, 2015 22:02
Show Gist options
  • Save cevaris/8ffbcda6e4ed53480d67 to your computer and use it in GitHub Desktop.
Save cevaris/8ffbcda6e4ed53480d67 to your computer and use it in GitHub Desktop.
Kafka Partition Lag Analysis Tools
#!/usr/bin/env python
import re
data = open('kafka-lag-logs.txt','r').readlines()
BASE_REGEX="Lag for group ([_\w]+) on partition ([-\w]+) \(([,\w]+)\)"
def extractData(row):
matches = re.findall(BASE_REGEX, row)
if matches:
group, partition, count = matches[0]
partition = int(re.sub(r'audit.*-','', partition))
count = int(count.replace(',',''))
# print 'Group: {}, Partition: {}, Count: {}'.format(
# group, partition, count
# )
print partition, count
map(extractData, data)
#!/usr/bin/env python
from itertools import groupby
import matplotlib.pyplot as plt
import numpy as np
data = map(
lambda x: x.split(),
open('data.txt','r').readlines()
)
data.sort()
# Group by
groups = {}
for i in data:
k = int(i[0])
v = int(i[1])
if not k in groups:
groups[k] = []
groups[k].append(v)
# To numpy array
for k,v in groups.iteritems():
groups[k] = np.array(v)
# Print stats per partition
percents = [25, 50, 75, 95]
for k,v in groups.iteritems():
title = 'Partition: {}'.format(k)
print title
for p in percents:
print "\t{}%: {}".format(p, np.percentile(v,p))
print "\tAverage: {}".format(np.average(v))
print "\tMin: {}".format(np.min(v))
print "\tMax: {}".format(np.max(v))
print "\tCounts: {}".format(np.size(v))
hist, bin_edges = np.histogram(v, bins = xrange(1, 400000, 1000))
plt.bar(bin_edges[:-1], hist, width = 1)
plt.xlim(min(bin_edges), max(bin_edges))
plt.title(title)
# plt.show()
plt.savefig('plots/{}.pdf'.format(title))
plt.clf()
print
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment