Skip to content

Instantly share code, notes, and snippets.

@shrkw
Created September 17, 2013 07:52
Show Gist options
  • Save shrkw/6591267 to your computer and use it in GitHub Desktop.
Save shrkw/6591267 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# gunzip -c proxy_access_8844_log-20130720.gz | python ~/ResponseTimeParser.py >> /tmp/proxy_access_8844_log
import sys
import math
class ResponseResult(object):
count = 1
min, max, ave, med, sum = [0 for a in range(5)]
values = []
url = ''
is_image = False
RESOURCE = ('gif', 'jpg', 'png', 'css', 'js')
def __init__(self, url, val):
self.url = url
if self.url.split('?')[0].endswith(self.RESOURCE):
self.is_image = True
self.count = 1
self.min, self.max, self.ave, self.med, self.sum = [val for a in range(5)]
self.values = [val]
def add(self, val):
if val < self.min:
self.min = val
if self.max < val:
self.max = val
self.sum = self.sum + val
self.count = self.count + 1
self.values.append(val)
def result(self):
self.ave = self.sum / self.count
self.med = self.median()
return [self.count, self.is_image, self.min, self.ave, self.med, self.max]
def median(self):
if self.count % 2 == 0:
p1 = self.count / 2
v1, v2 = sorted(self.values)[p1 - 1:p1 + 1]
return (v1 + v2) / 2
else:
return sorted(self.values)[int(math.ceil(float(self.count) / 2)) - 1]
class ApacheResponseTimeParser(object):
"""Parse response time in apache combined log
Sample log:
"172.0.2.19" - - [31/Jul/2013:03:39:01 +0900] 183862 "POST /foo/bar.html HTTP/1.1" 200 10486 "https://example.com/bar/baz.html" "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0" -
"""
src = file
dest = file
res_dict = {}
header = ["Count", "Resource", "Min", "Ave", "Med", "Max", ]
def __init__(self, src_file, dest_filename = ''):
try:
self.src = src_file
except:
raise ValueError, 'cannot open file'
if dest_filename != '':
self.dest = file(dest_filename, "w")
else:
self.dest = sys.stdout
self.clean()
print self.dest.name
def close(self):
self.src.close()
self.dest.close()
def __del__(self):
self.close()
def log(self, val, flags):
tail = ''
if flags == 't':
tail = '\t'
elif flags == 'n':
tail = '\n'
self.dest.write(val)
self.dest.write(tail)
# print(str + tail)
def parse(self):
import csv
reader = csv.reader(self.src, delimiter=' ')
for row in reader:
res_time = int(row[5])
try:
url = row[6].split(' ')[1]
except IndexError:
print("cannot split by white space" , row)
continue
if ";jsessionid=" in url:
url = url.split(";jsessionid=")[0]
if url not in self.res_dict:
self.res_dict[url] = ResponseResult(url, res_time)
continue
prev = self.res_dict[url]
prev.add(res_time)
self.out()
self.close()
def out(self):
self.log('URL\t' + '\t'.join(self.header), 'n')
for k, v in sorted(self.res_dict.items()):
self.log(k, 't')
self.log('\t'.join([str(x) for x in v.result()]), 'n')
self.clean()
def clean(self):
pass
if __name__ == '__main__':
try:
if len(sys.argv) < 2:
parser = ApacheResponseTimeParser(sys.stdin)
if len(sys.argv) == 2:
parser = ApacheResponseTimeParser(file(sys.argv[1], "rb"), sys.stdout)
elif len(sys.argv) == 3:
parser = ApacheResponseTimeParser(file(sys.argv[1], "rb"), sys.argv[2])
parser.parse()
except KeyboardInterrupt:
print "Stopped by user."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment