akost · May 5, 2012 11:49
diff --git a/har_request_urls.py b/har_request_urls.py
 #!/usr/bin/env python
 """
 Parse a HAR (HTTP Archive) and return URLs which do not match specific domain
 HAR Spec: http://groups.google.com/group/http-archive-specification/web/har-1-2-spec
 HAR can be saved, for example, from Chrome Developer Tools
 Copyleft 2012 Andrei Kost <[email protected]>
 based Ian Gallagher <[email protected]> script https://gist.github.com/892479
 Example usage: ./har_request_urls.py foo.har
 """

 import json, urlparse

 except_domain = 'example.com' # domain to exclude

 if '__main__' == __name__:
    import sys


    har_file = sys.argv[1]

    # Read HAR archive (skip over binary header if present - Fiddler2 exports contain this)
    har_data = open(har_file, 'rb').read()
    skip = 3 if '\xef\xbb\xbf' == har_data[:3] else 0

    har = json.loads(har_data[skip:])

    matching_entries = filter(lambda x: except_domain != urlparse.urlparse(x['request']['url']).hostname, har['log']['entries'])
    matching_urls = set(map(lambda x: x['request']['url'], matching_entries))

    
    for url in sorted(matching_urls):
        print url
	#!/usr/bin/env python
	"""
	Parse a HAR (HTTP Archive) and return URLs which do not match specific domain
	HAR Spec: http://groups.google.com/group/http-archive-specification/web/har-1-2-spec
	HAR can be saved, for example, from Chrome Developer Tools
	Copyleft 2012 Andrei Kost <[email protected]>
	based Ian Gallagher <[email protected]> script https://gist.github.com/892479
	Example usage: ./har_request_urls.py foo.har
	"""

	import json, urlparse

	except_domain = 'example.com' # domain to exclude

	if '__main__' == __name__:
	import sys


	har_file = sys.argv[1]

	# Read HAR archive (skip over binary header if present - Fiddler2 exports contain this)
	har_data = open(har_file, 'rb').read()
	skip = 3 if '\xef\xbb\xbf' == har_data[:3] else 0

	har = json.loads(har_data[skip:])

	matching_entries = filter(lambda x: except_domain != urlparse.urlparse(x['request']['url']).hostname, har['log']['entries'])
	matching_urls = set(map(lambda x: x['request']['url'], matching_entries))


	for url in sorted(matching_urls):
	print url