-
-
Save Lewiscowles1986/645e79295efa84698f4e45cd06d610ea to your computer and use it in GitHub Desktop.
import json | |
import base64 | |
import os | |
import pathlib | |
from urllib.parse import urlparse | |
# list of supported image mime-types | |
# Special thanks to https://gist.github.com/FurloSK/0477e01024f701db42341fc3223a5d8c | |
# Special mention, and thanks to MDN | |
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types | |
mimetypes = { | |
"image/webp": ".webp", | |
"image/jpeg": ".jpeg", # *.jpg files have two possible extensions | |
"image/jpeg": ".jpg", # (but .jpeg is official and thus preferred) | |
"image/png": ".png", | |
"image/svg+xml": ".svg", | |
"image/avif": ".avif", | |
"image/bmp": ".bmp", | |
"image/gif": ".gif", | |
"image/vnd.microsoft.icon": ".ico", | |
"image/tiff": ".tif", # *.tiff files have two possible extensions | |
"image/tiff": ".tiff", # (but .tiff is what I know and prefer) | |
} | |
# make sure the output directory exists before running! | |
folder = os.path.join(os.getcwd(), "imgs") | |
with open("src.har", "rb") as f: | |
har = json.loads(f.read()) | |
entries = har["log"]["entries"] | |
for entry in entries: | |
mimetype = entry["response"]["content"]["mimeType"] | |
url = urlparse(entry["request"]["url"]) | |
path = pathlib.Path(url.path) | |
filename = path.stem | |
response_text = entry["response"]["content"].get("text") | |
encoding = entry["response"]["content"].get("encoding", "literal") | |
if not response_text: | |
continue | |
# Python lets you lookup values against dictionaries using the in keyword | |
if mimetype in mimetypes: | |
ext = mimetypes[mimetype] | |
file = os.path.join(folder, str(path.parent)[1:], f"{filename}{ext}") | |
os.makedirs(os.path.join(folder, str(path.parent)[1:]), exist_ok=True) | |
print(file) | |
with open(file, "wb") as f: | |
f.write( | |
response_text.encode(encoding = "UTF-8", errors = "strict") | |
if encoding == "literal" | |
else base64.b64decode(response_text) | |
) |
Another update. Tracking this via https://github.com/Lewiscowles1986/har-img-extract/tree/python from now on. Latest update will likely be the last via Gist
Doesn't have linting, instructions, automated checks or anything else I'd really like. Lovely little hack though.
@MrCheatEugene you say tried; It's not the worst start. If I may. Did they explain why C?
IDK, just for fun
@MrCheatEugene I've uploaded a working C example https://github.com/Lewiscowles1986/har-img-extract I've only built it on OSX, but I'd welcome contributions and feedback if your friend struggles to get it to build or encounters errors that I have not.
he managed to get it working yesterday, https://github.com/OhMyCatile/ExtractHar
Yeah, I saw the Rust edition. I Don't know enough Rust to comment on it. Very cool that there are now so many forks of this code.
Would you like to please include an open-source or other type of license so that we know how we are legally allowed to use your code?
dnk8n, however the heck you like; use it to burn baby sheep for all I care.
@MrCheatEugene I've uploaded a working C example https://github.com/Lewiscowles1986/har-img-extract
I've only built it on OSX, but I'd welcome contributions and feedback if your friend struggles to get it to build or encounters errors that I have not.