Last active
October 11, 2019 19:08
-
-
Save siddhesh/3db65dc06f2fe003e115ca4dcd873dc0 to your computer and use it in GitHub Desktop.
Read one or more httpd access.log files and print statistics I care about.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Simple script to read httpd access.log and print a couple of statistics I care | |
# about. Usage: | |
# | |
# python3 process_accesslog.py <one or more log files> | |
# | |
# Copyright (c) 2019 Siddhesh Poyarekar | |
# | |
# This code is released under the MIT license: | |
# http://www.opensource.org/licenses/mit-license.php | |
import re | |
import sys | |
import pandas as pd | |
data = [] | |
import argparse | |
parser = argparse.ArgumentParser(description='Parse httpd access.log files.') | |
parser.add_argument('files', nargs='+', | |
help='One or more files to parse') | |
parser.add_argument('--top', type=int, default=10, | |
help='Limit results to this number') | |
args = parser.parse_args() | |
for f in args.files: | |
with open(f) as fd: | |
lines = fd.readlines() | |
for l in lines: | |
fields = l.split(' ') | |
page = fields[6] | |
if '/posts/' not in page and '.pdf' not in page: | |
continue | |
date = re.sub(r'\[(\d+)/(\w+)/(\d+):.*', r'\1-\2-\3', fields[3]) | |
referrer = fields[10].strip('"')[:100] | |
if referrer == '_REFERRER_': | |
referrer = '-' | |
data.append((date, page, referrer)) | |
pd.options.display.float_format = '{:,}'.format | |
df = pd.DataFrame(data) | |
df[0].name = 'Total Hits' | |
df[1].name = 'Top Pages' | |
df[2].name = 'Top Referrers' | |
print('Total Hits: %s' % '{:,}'.format(len(data))) | |
print('+' * 80) | |
for i in [0, 1, 2]: | |
print('\n%s' % df[i].name) | |
print('-' * len(df[i].name)) | |
tab = df[i].value_counts().apply(lambda x: "{:,}".format(x)) | |
print(tab.head(args.top).to_string()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment