Last active
December 20, 2015 19:19
-
-
Save mdaniel/6182528 to your computer and use it in GitHub Desktop.
Convert the .har files saved by Chrome into a OWASP WebScarab session This is obviously an imperfect conversion since Chrome chooses to omit the response body more often than not.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import calendar | |
import json | |
import os | |
import re | |
import sys | |
import time | |
import urllib | |
class Conversation: | |
""" | |
conversationlog: | |
### Conversation : 1 | |
RESPONSE_SIZE: 151 | |
WHEN: 1375942513708 | |
COOKIE: JIMBO=/ | |
METHOD: GET | |
STATUS: 200 OK | |
URL: http://atesis.local:80/ | |
ORIGIN: Proxy | |
cookies: | |
### Cookie : atesis.local/ JIMBO | |
1375942925153 JIMBO=/; Domain=atesis.local; Path=/ | |
urlinfo | |
### URL : http://atesis.local:80/ | |
METHODS: GET | |
SIGNATURE: GET http://atesis.local:80/ (null) | |
STATUS: 200 OK | |
CHECKSUM: 5b6d74f1453e20c09d6a20d909779ad7 | |
### URL : http://atesis.local:80/fred/ | |
REFERER: http://atesis.local:80/ | |
fragments/ | |
conversations/ | |
%d-request / %d-response | |
""" | |
def __init__(self, num, entry): | |
self.num = 1 + num | |
self.entry = entry | |
self.req = entry['request'] | |
self.resp = entry['response'] | |
## this is causing some kind of Scarab collision on load so just omit it | |
self.want_urlinfo = False | |
def write(self): | |
if 1 == self.num: | |
if not os.path.exists('conversations'): | |
os.mkdir('conversations') | |
have_content = 'text' in self.resp['content'] | |
if self.want_urlinfo: | |
urlinfo = open('urlinfo', 'a') | |
print >> urlinfo, '### URL : %s\n' % self.req['url'], | |
print >> urlinfo, 'METHODS: %s\n' % self.req['method'], | |
print >> urlinfo, 'STATUS: %d %s\n' % ( \ | |
self.resp['status'], self.resp['statusText'] ), | |
print >> urlinfo, 'SIGNATURE: %s %s (null)\n' % ( | |
self.req['method'], self.req['url']), | |
print >> urlinfo, '\n', | |
urlinfo.close() | |
c_log = open('conversationlog', 'a') | |
print >> c_log, '### Conversation : %d\n' % self.num, | |
resp_size = self.resp['content']['size'] | |
if not have_content: | |
resp_size = 0 | |
print >> c_log, 'RESPONSE_SIZE: %d\n' % resp_size, | |
del resp_size | |
when = calendar.timegm( \ | |
time.strptime( \ | |
re.sub(r'Z$', 'UTC', self.entry['startedDateTime']), \ | |
'%Y-%m-%dT%H:%M:%S.%f%Z') ) | |
# it wants java millis, which is annoying because | |
# strptime was told about the fractional seconds | |
when = when * 1000 | |
print >> c_log, 'WHEN: %d\n' % when, | |
print >> c_log, 'METHOD: %s\n' % self.req['method'], | |
print >> c_log, 'STATUS: %d %s\n' % ( \ | |
self.resp['status'], self.resp['statusText'] ), | |
print >> c_log, 'URL: %s\n' % self.req['url'], | |
print >> c_log, '\n', | |
c_log.close() | |
with open('conversations/%d-request' % self.num, 'wb') as fh: | |
# (proto, rest) = urllib.splittype( self.req['url'] ) | |
# (hostname, uri) = urllib.splithost( rest ) | |
# del rest | |
## Scarab always thinks it is a proxy request | |
print >> fh, '%s %s %s\r\n' % (self.req['method'], \ | |
self.req['url'], self.req['httpVersion']), | |
for h in self.req['headers']: | |
print >> fh, '%s: %s\r\n' % (h['name'], h['value']), | |
print >> fh, '\r\n', | |
if 'postData' in self.req: | |
txt = self.req['postData']['text'] | |
assert len(txt) == self.req['bodySize'],\ | |
'postData.text[%d] != bodySize[%d] for %s' % ( | |
len(txt), self.req['bodySize'], str(self.entry)) | |
print >> fh, '%s' % txt | |
with open('conversations/%d-response' % self.num, 'wb') as fh: | |
print >> fh, '%s %d %s\r\n' % (self.resp['httpVersion'], \ | |
self.resp['status'], self.resp['statusText']), | |
for h in self.resp['headers']: | |
h_name = h['name'] | |
h_val = h['value'] | |
if 'content-length' == h_name.lower() and not have_content: | |
# pretend the server didn't provide content, since we don't have it | |
h_val = '0' | |
## eat the C-E if we don't have any content | |
## actually, just eat the C-E and T-E all the time, because har | |
## doesn't encode that way but Scarab will try to interpret them | |
if 'content-encoding' == h_name.lower() or\ | |
'transfer-encoding' == h_name.lower(): | |
continue | |
print >> fh, '%s: %s\r\n' % (h_name, h_val), | |
del h_name, h_val | |
print >> fh, '\r\n', | |
if have_content: | |
txt = self.resp['content']['text'] | |
mime_type = self.resp['content']['mimeType'] | |
if mime_type.lower().startswith('image/'): | |
txt = txt.decode('base64') | |
else: | |
txt = txt.encode('utf-8') | |
print >> fh, txt, | |
def main( argv ): | |
""" | |
log | |
version : string | |
creator | |
name : string | |
version : string | |
pages : list | |
entries : list | |
request | |
method : string | |
url : string | |
httpVersion : string | |
headers : list | |
name : string | |
value : string | |
queryString : list | |
cookies : list | |
bodySize : number | |
response | |
status : number | |
statusText : string | |
httpVersion : string | |
headers : list | |
cookies : list | |
content | |
size : number | |
compression : number | |
redirectURL : string | |
headerSize : number | |
bodySize : number | |
""" | |
har_filename = sys.argv[1] | |
with open( har_filename, 'rb' ) as fh: | |
har = json.load( fh ) | |
# "page" is a destination that the user saw | |
# "entry" is something Chrome loaded | |
# pg_list = har['log']['pages'] | |
# for pg in pg_list: print 'PAGE', pg['title'] | |
# for e in entries: print 'URL', e['request']['url'] | |
entries = har['log']['entries'] | |
for x in xrange(0, len(entries)): | |
conv = Conversation(x, entries[x]) | |
conv.write() | |
del conv | |
if __name__ == '__main__': | |
main( sys.argv ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment