Last active
August 29, 2015 14:12
-
-
Save rhzs/f2a4ae24ceac9b9a8d74 to your computer and use it in GitHub Desktop.
Heroku Log Report Statistic (Num of called urls, Response time, and dyno)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import time | |
import resource | |
import re | |
from collections import Counter | |
from threading import Thread | |
class ThreadLogFile(Thread): | |
def __init__(self, fileName, urlsToBeProceed, finishedCallback): | |
"""Initialize the thread""" | |
Thread.__init__(self) | |
self.finishedCallback = finishedCallback | |
self.fileName = fileName | |
self.urlsDict = {} | |
for i, urlDict in enumerate(urlsToBeProceed): | |
matcher = self.buildRegexPattern(urlDict['method'], urlDict['url'], urlDict['dynamicParam']) | |
self.urlsDict[urlDict['method'] + ' ' +urlDict['url']] = {'noOfCalls': 0, 'responseList': [], 'dynoDict': {}, 'matcher': matcher} | |
self.regexConnect = re.compile(' connect=\d+ms ') | |
self.regexService = re.compile(' service=\d+ms ') | |
self.regexDyno = re.compile(' dyno=web.\d+ ') | |
# This will build a regex pattern from given URL. | |
# The pattern will be built if any value between '{' and '}' matches | |
def buildRegexPattern(self, method, url, params): | |
urlPattern = url | |
for matched, type in params.iteritems(): | |
if type == 'number': | |
urlPattern = re.sub(r'{'+re.escape(matched)+'}', '\d+', urlPattern, flags=re.IGNORECASE) | |
re.purge() | |
return method + ' (\w+=)' + urlPattern + ' ' | |
def find(self, keys, line, fn): | |
for key in keys: | |
if re.search(self.urlsDict[key]['matcher'], line) is not None: | |
fn(key, line) | |
continue | |
def onSuccess(self, key, line): | |
def addNoOfCalls(): | |
self.urlsDict[key]['noOfCalls'] += 1 | |
def addResponses(): | |
connect = re.findall("\d+", self.regexConnect.search(line).group())[0] | |
service = re.findall("\d+", self.regexService.search(line).group())[0] | |
response_time = int (connect) + int(service) | |
self.urlsDict[key]['responseList'].append(response_time) | |
def addDynos(): | |
dyno = re.findall('web.\d+', self.regexDyno.search(line).group())[0] | |
if dyno in self.urlsDict[key]['dynoDict']: | |
self.urlsDict[key]['dynoDict'][dyno] += 1 | |
else: | |
self.urlsDict[key]['dynoDict'][dyno] = 1 | |
addNoOfCalls() | |
addResponses() | |
addDynos() | |
def makeReportDict(self, key): | |
sortedResponseList = self.urlsDict[key]['responseList'] | |
sortedResponseList.sort() | |
responseLen = len(sortedResponseList) | |
def findResponseMean(): | |
return sum(sortedResponseList) / float(responseLen) if sortedResponseList else 0 | |
def findResponseMedian(): | |
return (0.5 * | |
( | |
sortedResponseList[(responseLen-1)//2] + | |
sortedResponseList[responseLen//2] | |
) | |
) if sortedResponseList else 0 | |
def findResponseMode(): | |
return Counter(sortedResponseList).most_common(1)[0][0] if sortedResponseList else 0 | |
def findRespondedDyno(): | |
dynoDict = self.urlsDict[key]['dynoDict'] | |
if not dynoDict: | |
return 0 | |
keys = [x for x, y in dynoDict.items() if y == max(dynoDict.values())] | |
return keys[0] if len(keys) == 1 else keys | |
return { | |
'url': key, | |
'num_of_calls': self.urlsDict[key]['noOfCalls'], | |
'res_mean': findResponseMean(), | |
'res_median': findResponseMedian(), | |
'res_mode': findResponseMode(), | |
'dyno_name': findRespondedDyno() | |
} | |
def run(self): | |
"""Run the thread""" | |
keys = self.urlsDict.keys() | |
with open(self.fileName, "r") as inFile: | |
for line in inFile: | |
self.find(keys, line, self.onSuccess) | |
re.purge() | |
for key in keys: | |
urlStat = self.makeReportDict(key) | |
self.finishedCallback(urlStat) | |
def chunk(list, num): | |
avg = len(list) / float(num) | |
res = [] | |
last = 0.0 | |
while last < len(list): | |
res.append(list[int(last):int(last + avg)]) | |
last += avg | |
return res | |
def main(fileName, urls): | |
""" | |
Rheza Satria, 2014-2015 | |
""" | |
startTime = time.time() | |
# change this for number of concurrency | |
concurrency = 2 | |
result = [] | |
threads = [] | |
urlLen = len(urls) | |
def onFinishedCallback(resultUrlsDict): | |
result.append(resultUrlsDict) | |
def prettyPrintResult(): | |
urlColWidth = 7 | |
callsColWidth = 3 | |
responseColWidth = 3 | |
totalWidth = 180 | |
print '='*totalWidth | |
print '\t'*(urlColWidth+(responseColWidth+responseColWidth-1)) + 'URL Response Time (in ms)' | |
print ' URL' \ | |
+ '\t'*urlColWidth + '# num. of calls'\ | |
+ '\t'*callsColWidth + 'Mean'\ | |
+ '\t'*responseColWidth + 'Median'\ | |
+ '\t'*responseColWidth + 'Mode'\ | |
+ '\t'*responseColWidth + 'Most Responded Dyno' | |
print '='*totalWidth | |
for i, res in enumerate(result): | |
callsOfWidthAdjustedSize = 0 | |
if len(res['url']) < 30: | |
callsOfWidthAdjustedSize = 2 | |
elif len(res['url']) < 40: | |
callsOfWidthAdjustedSize = 1 | |
print res['url'] \ | |
+ '\t'*(callsColWidth+callsOfWidthAdjustedSize) + str(res['num_of_calls'])\ | |
+ '\t'*responseColWidth + '%.2f' % res['res_mean']\ | |
+ '\t'*responseColWidth + str(res['res_median'])\ | |
+ '\t'*responseColWidth + str(res['res_mode'])\ | |
+ '\t'*responseColWidth + str(res['dyno_name']) | |
if i != len(result) - 1: | |
print '-'*totalWidth | |
print '='*totalWidth | |
def executeThread(fileName, urlDict): | |
thread = ThreadLogFile(fileName, urlDict, onFinishedCallback) | |
thread.start() | |
threads.append(thread) | |
def finishedAllThreads(): | |
for thread in threads: | |
thread.join() | |
if concurrency == urlLen: | |
for item, url in enumerate(urls): | |
executeThread(fileName, [url]) | |
else: | |
if concurrency > urlLen: | |
print 'ERROR: Number of concurrency should be less than number of processed urls.' | |
exit() | |
pass | |
chunk_url = chunk(urls, concurrency) | |
for item, url in enumerate(chunk_url): | |
executeThread(fileName, url) | |
finishedAllThreads() | |
prettyPrintResult() | |
del urls[:] | |
del threads[:] | |
del result[:] | |
print 'Total running time: {seconds} seconds'.format(seconds=time.time()-startTime) | |
print 'Memory usage: {bytes} bytes'.format(bytes=resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) | |
print 'We used {concurrency} thread(s) to process {numOfUrls} url(s), ' \ | |
'edit the source code to change number of running threads.'.format(concurrency=concurrency, numOfUrls=urlLen) | |
print '' | |
print 'Created by Rheza Satria - 27 December 2014' | |
if __name__ == "__main__": | |
urls = [ | |
{ | |
'url': '/api/users/{user_id}/count_pending_messages', | |
'method': 'GET', | |
'dynamicParam': dict({'user_id': 'number'}) | |
}, | |
{ | |
'url': '/api/users/{user_id}/get_messages', | |
'method': 'GET', | |
'dynamicParam': dict({'user_id': 'number'}) | |
}, | |
{ | |
'url': '/api/users/{user_id}/get_friends_progress', | |
'method': 'GET', | |
'dynamicParam': dict({'user_id': 'number'}) | |
}, | |
{ | |
'url': '/api/users/{user_id}/get_friends_score', | |
'method': 'GET', | |
'dynamicParam': dict({'user_id': 'number'}) | |
}, | |
{ | |
'url': '/api/users/{user_id}', | |
'method': 'POST', | |
'dynamicParam': dict({'user_id': 'number'}) | |
}, | |
{ | |
'url': '/api/users/{user_id}', | |
'method': 'GET', | |
'dynamicParam': dict({'user_id': 'number'}) | |
} | |
] | |
main('sample.log', urls) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment