Created
May 20, 2013 11:19
-
-
Save nbari/5611685 to your computer and use it in GitHub Desktop.
nginx log parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
nginx log format | |
log_format main '$server_name | $host | $remote_addr | $bytes_sent | $time_local | $request_method | $request_uri | $request_time | $status | $http_referer | $http_x_forwarded_for | $http_user_agent | $gzip_ratio | $connection'; | |
""" | |
import os | |
import sys | |
import time | |
import math | |
import GeoIP | |
start_time = time.time() | |
def main(log_file): | |
ips = {} | |
trafic_by_country = {} | |
""" | |
The with statement handles opening and closing the file, including if an | |
exception is raised in the inner block. The for line in f treats the file | |
object f as an iterable, which automatically uses buffered IO and memory | |
management so you don't have to worry about large files. | |
""" | |
with open(log_file) as f: | |
for line in f: | |
log = line.split(' | ') | |
try: | |
if 6 < len(log[2]) <= 15: | |
try: | |
ips[log[2]] = ips.get(log[2], 0) + int(log[3]) | |
except: | |
pass | |
except: | |
pass | |
gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE) | |
for k in ips: | |
country = gi.country_name_by_addr(k) | |
if not country in trafic_by_country: | |
trafic_by_country[country] = ips[k] | |
else: | |
trafic_by_country[country] += ips[k] | |
for k in sorted(trafic_by_country, key=trafic_by_country.get, reverse=True): | |
print '%s -- bytes sent: %s' % (k, convertSize(trafic_by_country[k])) | |
print time.time() - start_time, 'sec' | |
def convertSize(size): | |
size_name = ("KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") | |
i = int(math.floor(math.log(size, 1024))) | |
p = math.pow(1024, i) | |
s = round(size/p, 2) | |
if (s > 0): | |
return '%s %s' % (s, size_name[i]) | |
else: | |
return '0B' | |
if __name__ == "__main__": | |
if len(sys.argv) < 2: | |
sys.exit('Usage: %s access.log' % sys.argv[0]) | |
if not os.path.exists(sys.argv[1]): | |
sys.exit('ERROR: log file %s was not found!' % sys.argv[1]) | |
else: | |
main(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment