Created
June 2, 2014 11:42
-
-
Save Duologic/979345d7559597d781a9 to your computer and use it in GitHub Desktop.
Quick and dirty json to csv log parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import sys | |
import json | |
import argparse | |
import datetime | |
from dateutil import parser as dateparser | |
parser = argparse.ArgumentParser() | |
parser.add_argument('logfiles', metavar='N', nargs='+') | |
args = parser.parse_args() | |
jdata_all = [] | |
for logfile in args.logfiles: | |
jfile = open(logfile) | |
errors = 0 | |
jdata = [] | |
sys.stdout.write("Reading file {}\n".format(logfile)) | |
for line in jfile: | |
try: | |
#line = "{%s" % line.split(':{')[1] | |
jdata.append(json.loads(line)) | |
except: | |
errors = errors + 1 | |
exit(0) | |
pass | |
sys.stdout.write("Lines read: {}\nLines not read: {}\n\n".format(len(jdata), errors)) | |
if (errors / len(jdata))>=1: | |
exit(0) | |
sys.stdout.write("Simplifying request data...\n") | |
simpelrequests = {} | |
for request in jdata: | |
simreq = {} | |
timestamp = dateparser.parse(request["@timestamp"]) | |
simreq['date'] = str(timestamp) | |
#simreq['user'] = request["@fields"]["upstream_user"] | |
simreq['uri'] = request["@fields"]["request"].split()[1].split("?")[0] | |
simpelrequests.setdefault(str(timestamp.date()), []) | |
simpelrequests[str(timestamp.date())].append(simreq) | |
sys.stdout.write("Writing to files...\n") | |
for day in simpelrequests: | |
with open('output/output-{}.csv'.format(day), 'a') as csvfile: | |
dwriter = csv.DictWriter(csvfile, fieldnames=['date','user','uri',], delimiter=';', quotechar='"') | |
for row in simpelrequests[day]: | |
dwriter.writerow(row) | |
sys.stdout.write("Done.\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment