Last active
September 16, 2015 22:02
-
-
Save cevaris/8ffbcda6e4ed53480d67 to your computer and use it in GitHub Desktop.
Kafka Partition Lag Analysis Tools
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
data = open('kafka-lag-logs.txt','r').readlines() | |
BASE_REGEX="Lag for group ([_\w]+) on partition ([-\w]+) \(([,\w]+)\)" | |
def extractData(row): | |
matches = re.findall(BASE_REGEX, row) | |
if matches: | |
group, partition, count = matches[0] | |
partition = int(re.sub(r'audit.*-','', partition)) | |
count = int(count.replace(',','')) | |
# print 'Group: {}, Partition: {}, Count: {}'.format( | |
# group, partition, count | |
# ) | |
print partition, count | |
map(extractData, data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from itertools import groupby | |
import matplotlib.pyplot as plt | |
import numpy as np | |
data = map( | |
lambda x: x.split(), | |
open('data.txt','r').readlines() | |
) | |
data.sort() | |
# Group by | |
groups = {} | |
for i in data: | |
k = int(i[0]) | |
v = int(i[1]) | |
if not k in groups: | |
groups[k] = [] | |
groups[k].append(v) | |
# To numpy array | |
for k,v in groups.iteritems(): | |
groups[k] = np.array(v) | |
# Print stats per partition | |
percents = [25, 50, 75, 95] | |
for k,v in groups.iteritems(): | |
title = 'Partition: {}'.format(k) | |
print title | |
for p in percents: | |
print "\t{}%: {}".format(p, np.percentile(v,p)) | |
print "\tAverage: {}".format(np.average(v)) | |
print "\tMin: {}".format(np.min(v)) | |
print "\tMax: {}".format(np.max(v)) | |
print "\tCounts: {}".format(np.size(v)) | |
hist, bin_edges = np.histogram(v, bins = xrange(1, 400000, 1000)) | |
plt.bar(bin_edges[:-1], hist, width = 1) | |
plt.xlim(min(bin_edges), max(bin_edges)) | |
plt.title(title) | |
# plt.show() | |
plt.savefig('plots/{}.pdf'.format(title)) | |
plt.clf() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment