Last active
May 6, 2024 01:58
-
-
Save tomatohater/8853161 to your computer and use it in GitHub Desktop.
parsehar.py - Reads a har file from the filesystem, converts to CSV, then dumps to stdout.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Reads a har file from the filesystem, converts to CSV, then dumps to | |
stdout. | |
""" | |
import argparse | |
import json | |
from urlparse import urlparse | |
def main(harfile_path): | |
"""Reads a har file from the filesystem, converts to CSV, then dumps to | |
stdout. | |
""" | |
harfile = open(harfile_path) | |
harfile_json = json.loads(harfile.read()) | |
i = 0 | |
for entry in harfile_json['log']['entries']: | |
i = i + 1 | |
url = entry['request']['url'] | |
urlparts = urlparse(entry['request']['url']) | |
size_bytes = entry['response']['bodySize'] | |
size_kilobytes = float(entry['response']['bodySize'])/1024 | |
mimetype = 'unknown' | |
if 'mimeType' in entry['response']['content']: | |
mimetype = entry['response']['content']['mimeType'] | |
print '%s,"%s",%s,%s,%s,%s' % (i, url, urlparts.hostname, size_bytes, | |
size_kilobytes, mimetype) | |
if __name__ == '__main__': | |
argparser = argparse.ArgumentParser( | |
prog='parsehar', | |
description='Parse .har files into comma separated values (csv).') | |
argparser.add_argument('harfile', type=str, nargs=1, | |
help='path to harfile to be processed.') | |
args = argparser.parse_args() | |
main(args.harfile[0]) |
Hi, thank you for this code !
What about actually save the csv instead of printing it ?
"""Reads a har file from the filesystem, converts to CSV, then dumps to
stdout.
"""
import argparse
import json
import csv
from urlparse import urlparse
def main(harfile_path):
"""Reads a har file from the filesystem, converts to CSV, then dumps to
stdout.
"""
harfile = open(harfile_path)
harfile_json = json.loads(harfile.read())
i = 0
with open(harfile_path[:-3] + '.csv', 'w') as f:
csv_file = csv.writer(f)
csv_file.writerow(['id', 'url', 'hostname', 'size (bytes)',
'size (kilobytes)', 'mimetype'])
for entry in harfile_json['log']['entries']:
i = i + 1
url = entry['request']['url']
urlparts = urlparse(entry['request']['url'])
size_bytes = entry['response']['bodySize']
size_kilobytes = float(entry['response']['bodySize'])/1024
mimetype = 'unknown'
if 'mimeType' in entry['response']['content']:
mimetype = entry['response']['content']['mimeType']
csv_file.writerow([i, url, urlparts.hostname, size_bytes,
size_kilobytes, mimetype])
if __name__ == '__main__':
argparser = argparse.ArgumentParser(
prog='parsehar',
description='Parse .har files into comma separated values (csv).')
argparser.add_argument('harfile', type=str, nargs=1,
help='path to harfile to be processed.')
args = argparser.parse_args()
main(args.harfile[0])
Very useful, indeed. Thank you very much!
I have also included the time in the csv:
csv_file.writerow(['id', 'url', 'time','hostname', 'size (bytes)', 'size (kilobytes)', 'mimetype'])
...
time = entry['time']
...
csv_file.writerow([i, url, time, urlparts.hostname, size_bytes, size_kilobytes, mimetype])
Best.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Very usefull code.
Thanks!