-
-
Save craigds/00331c6ff8fd2334de68a52ef0cd79c2 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
""" | |
Converts Internet Explorer 'capture network traffic' XML to a HAR file. | |
Turns out that XML is just a HAR file anyways, but in XML form. So this | |
just converts it to JSON, and Bob's your uncle. | |
Requires Python 2.7+ and LXML. | |
""" | |
from __future__ import unicode_literals | |
import argparse | |
import json | |
from lxml import objectify | |
import sys | |
if sys.version_info > (3,): | |
str_type = str | |
else: | |
str_type = unicode | |
list_things = { | |
'pages', | |
'entries', | |
'cookies', | |
'queryString', | |
'headers', | |
} | |
def xml_to_dict(element): | |
if element.tag in list_things: | |
return [xml_to_dict(e) for e in element.getchildren()] | |
else: | |
if element.getchildren(): | |
return {e.tag: xml_to_dict(e) for e in element.getchildren()} | |
else: | |
return str_type(element.pyval) | |
def main(): | |
parser = argparse.ArgumentParser(description="Convert IE's crazy XML-HAR into a real HAR file") | |
parser.add_argument('infile', type=argparse.FileType('r'), default=sys.stdin) | |
parser.add_argument('outfile', type=argparse.FileType('w'), default=sys.stdout) | |
args = parser.parse_args() | |
tree = objectify.parse(args.infile) | |
root = tree.getroot() | |
d = {root.tag: xml_to_dict(root)} | |
json.dump(d, args.outfile, indent=2, sort_keys=True) | |
if __name__ == '__main__': | |
main() |
For anyone struggling with file encodings on the Windows (MSYSGIT) command line like me, I created a fork that forces UTF-8 when reading and writing the files. This small fix cost me almost three hours today since I was puzzled why lxml
or an extra open()
would ignore my encoding (unless I specify 'ascii'
). Turns out argparse
has a bit more magic in it than I thought.
Coming from php; had to do this to the json in order for it to be validated in http://www.softwareishard.com/har/viewer/ Source xml came from IE11.
$contents = file_get_contents('NetworkData.har');
$json = json_decode($contents, TRUE);
foreach ($json['log']['entries'] as $key => &$values) {
$values['time'] = (int) $values['time'];
$values['timings']['receive'] = (int) $values['timings']['receive'];
$values['timings']['send'] = (int) $values['timings']['send'];
$values['timings']['wait'] = (int) $values['timings']['wait'];
if (!isset($values['response']['redirectURL'])) {
$values['response']['redirectURL'] = '';
}
if (!isset($values['cache'])) {
$values['cache'] = array();
}
}
$output = json_encode($json, JSON_HEX_TAG | JSON_HEX_APOS | JSON_HEX_AMP | JSON_HEX_QUOT | JSON_PRETTY_PRINT);
file_put_contents('NetworkData2.har', $output);
Does it work in Python 3, or only in 2.7?
Thanks a lot for this handy tool, much appreciated!
No Bob is not my uncle :P
I want to record chrome session in HAR and playback it in IE. Is there an option?
So glad I landed here, it's very useful :D Thank you so much!
YES, thanks alot! The other alternatives where getting Fiddler to work on linux or get a windows VM....