Last active
December 4, 2018 10:41
-
-
Save morontt/ef7b9196f3461bcc373caaa4f1f563be to your computer and use it in GitHub Desktop.
log parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import re | |
import pprint | |
pp = pprint.PrettyPrinter() | |
counts_hash = {} | |
total_time = 0 | |
# CustomLog /path/to/request.log "%t %h \"%r\" %b %D" | |
log_file = 'requests.log' | |
# log_file = 'short.log' | |
with open(log_file) as f: | |
url_test = re.compile('^([^?]+)') | |
iwf_test = re.compile('^/ifw/[a-f0-9]+/\d+/new') | |
doccontent_test = re.compile('^/public/doccontent/\d+') | |
ajax_app_submit_test = re.compile('^/ajax/applicationSubmitted/\d+') | |
ajax_get_app_test = re.compile('^/ajax/getApplicant/\d+') | |
ajax_sign_test = re.compile('^/ajax/sign/\d+') | |
ajax_sign_test_2 = re.compile('^/ifw/[a-f0-9]+/\d+/sign/\d+/') | |
ajax_ziplookup_test = re.compile('^/ajax/ziplookup/\d+') | |
applications_edit_test = re.compile('^/applications/edit/\d+') | |
app_get_report_status_test = re.compile('^/applications/ajaxGetReportStatus/\d+') | |
app_mergedreport_test = re.compile('^/applications/mergedreport/\d+/0') | |
app_print_test = re.compile('^/applications/print/\d+') | |
app_view_test = re.compile('^/applications/view/\d+') | |
pro_app_approve_test = re.compile('^/prospectapplications/approve/\d+') | |
pro_app_view_test = re.compile('^/prospectapplications/view/\d+') | |
renderinvoicemodel_test = re.compile('^/ifw/[a-f0-9]+/\d+/renderinvoicemodel') | |
reports_print_test = re.compile('^/reports/print/\d+') | |
line = f.readline() | |
while line: | |
line = line.strip() | |
line_components = line.split() | |
raw_url = line_components[4] | |
raw_time = int(line_components[7]) | |
match_obj = url_test.match(raw_url) | |
clean_url = match_obj.group(1) | |
if iwf_test.match(clean_url): | |
clean_url = '/ifw/xxxx/yyyy/new/' | |
if doccontent_test.match(clean_url): | |
clean_url = '/public/doccontent/xxxx' | |
if ajax_app_submit_test.match(clean_url): | |
clean_url = '/ajax/applicationSubmitted/xxxx' | |
if ajax_get_app_test.match(clean_url): | |
clean_url = '/ajax/getApplicant/xxxx' | |
if ajax_sign_test.match(clean_url): | |
clean_url = '/ajax/sign/xxxx' | |
if ajax_ziplookup_test.match(clean_url): | |
clean_url = '/ajax/ziplookup/xxxx' | |
if applications_edit_test.match(clean_url): | |
clean_url = '/applications/edit/xxxx' | |
if app_get_report_status_test.match(clean_url): | |
clean_url = '/applications/ajaxGetReportStatus/xxxx' | |
if app_mergedreport_test.match(clean_url): | |
clean_url = '/applications/mergedreport/xxxx/0' | |
if app_print_test.match(clean_url): | |
clean_url = '/applications/print/xxxx' | |
if app_view_test.match(clean_url): | |
clean_url = '/applications/view/xxxx' | |
if ajax_sign_test_2.match(clean_url): | |
clean_url = '/ifw/xxxx/yyyy/sign/zzzz/' | |
if pro_app_approve_test.match(clean_url): | |
clean_url = '/prospectapplications/approve/xxxx' | |
if pro_app_view_test.match(clean_url): | |
clean_url = '/prospectapplications/view/xxxx' | |
if renderinvoicemodel_test.match(clean_url): | |
clean_url = '/ifw/xxxx/yyyy/renderinvoicemodel/zzzz' | |
if reports_print_test.match(clean_url): | |
clean_url = '/reports/print/xxxx' | |
# print clean_url | |
if clean_url in counts_hash: | |
counts_hash[clean_url] += raw_time | |
else: | |
counts_hash[clean_url] = raw_time | |
total_time += raw_time | |
line = f.readline() | |
loads_list = [] | |
for key in counts_hash: | |
loads_list.append({'url': key, | |
'time': counts_hash[key], | |
'percent': 100.0 * counts_hash[key] / total_time}) | |
sorted_loads_list = sorted(loads_list, key=lambda x: x['time'], reverse=True) | |
# pp.pprint(sorted_loads_list[0:50]) | |
for el in sorted_loads_list[0:50]: | |
print '{}\t{:2.3f}\turl: {}'.format(el['time'], el['percent'], el['url']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import re | |
import pprint | |
pp = pprint.PrettyPrinter() | |
# cat access.log | awk '{ print $7 }' > urls.log | |
# python rep.py | sort | uniq -ci | sort -nr | head -n 10 | |
with open('urls_21.log') as f: | |
for line in f: | |
# print '---' | |
# pp.pprint(line) | |
# print line | |
mathobj = re.match(r'^([^?]+)', line) | |
# print mathobj.group(1) | |
pp.pprint(mathobj.group(1)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment