Forked from christopherhesse/extract_chrome_cache_html.py
Created
October 14, 2020 06:38
-
-
Save h1code2/d9906e011d0be560227703c05b372d04 to your computer and use it in GitHub Desktop.
extract original content from chrome about:cache html source
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import re | |
import gzip | |
import mimetypes | |
from mimetools import Message | |
from StringIO import StringIO | |
def parse_headers(raw_headers): | |
response_line, headers_text = raw_headers.split('\n', 1) | |
headers = Message(StringIO(headers_text)) | |
return dict(headers) | |
def filter_blank(lines): | |
for line in lines: | |
line = line.strip() | |
if line != '': | |
yield line | |
def convert_cache_line(cache_line): | |
result = '' | |
for byte in cache_line.split(' ')[1:17]: | |
if byte == '': | |
break | |
result += chr(int(byte, 16)) | |
return result | |
def gzip_decompress(data): | |
return gzip.GzipFile('', 'rb', 9, StringIO(data)).read() | |
def decode_cache_html(cache_html): | |
pre_regexp = re.compile('<pre>(.*?)</pre>', re.DOTALL) | |
matches = pre_regexp.findall(cache_html) | |
raw_headers = matches[0] | |
headers = parse_headers(raw_headers) | |
segments = [] | |
for raw_content in filter_blank(matches[1:]): | |
segment = '' | |
for line in filter_blank(raw_content.split('\n')): | |
if line != '': | |
segment += convert_cache_line(line) | |
segments.append(segment) | |
return headers, segments | |
def main(): | |
for filename in sys.argv[1:]: | |
with open(filename) as input_file: | |
cache_html = input_file.read() | |
headers, segments = decode_cache_html(cache_html) | |
# segment[0] is usually the header + certificate | |
data = segments[1] | |
if headers.get('content-encoding') == 'gzip': | |
data = gzip_decompress(data) | |
content_type = headers['content-type'].split(';')[0] | |
extension = mimetypes.guess_all_extensions(content_type)[0] | |
output_filename = filename + '-decoded' + extension | |
print 'writing to {0}'.format(output_filename) | |
with open(output_filename, 'w') as output_file: | |
output_file.write(data) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment