Skip to content

Instantly share code, notes, and snippets.

@eslof
Last active December 10, 2022 13:34
Show Gist options
  • Save eslof/48252860901bcb82a8e7456ccf65cb50 to your computer and use it in GitHub Desktop.
Save eslof/48252860901bcb82a8e7456ccf65cb50 to your computer and use it in GitHub Desktop.
script to take a right click -> save as (Web page, Single File) .mht file and save it as a png
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from io import BytesIO
from base64 import b64decode
from PIL import Image
from tkinter import Tk
from tkinter.filedialog import askopenfilename, asksaveasfilename
# output settings
resize_divisor = 2
# we don't want a full GUI, so keep the root window from appearing
Tk().withdraw()
# show an "Open" dialog box and return the path to the selected file
openType = [("Web page, Single File", "*.mht")]
html_file_path = askopenfilename(defaultextension='.mht', filetypes=openType)
if not html_file_path:
exit("No file selected")
# show a "Save" dialog box and return the path to the selected file
saveType = [("PNG Image", "*.png")]
output_file_path = asksaveasfilename(defaultextension='.png', filetypes=saveType)
if not output_file_path: # asksaveasfile return `None` if dialog closed with "cancel".
exit("No save selected")
print("Converting", html_file_path, "to", output_file_path)
# create a new Chrome webdriver
options = webdriver.ChromeOptions()
# set the options to run in headless mode
options.headless = True
driver = webdriver.Chrome(options=options)
# navigate to the HTML file
driver.get(f'file://{html_file_path}')
# clean the html page to only contain the content
# get content element
content = driver.find_element('css selector', "[class^='react-scroll-to-bottom--css-'] > *:first-child")
body = driver.find_element('css selector', "body")
# move the element into root, then clear body and move back element
driver.execute_script("document.documentElement.appendChild(arguments[0])", content)
driver.execute_script("arguments[0].innerHTML = ''", body)
driver.execute_script("arguments[0].appendChild(arguments[1])", body, content)
# get the total width and height of the content element, including overflow
height = driver.execute_script('return arguments[0].scrollHeight;', content)
width = driver.execute_script('return arguments[0].scrollWidth;', content)
# Set the viewport size to the maximum size
driver.set_window_size(width, height)
# grab screenshot
screenshot = driver.get_screenshot_as_base64()
# resize screenshot to its size divided by resize_divisor
if resize_divisor > 0 and resize_divisor != 1:
im = Image.open(BytesIO(b64decode(screenshot)))
im = im.resize((int(im.width / resize_divisor), int(im.height / resize_divisor)), Image.ANTIALIAS)
im.save(output_file_path)
else:
exit("Invalid resize divisor")
print("Saved screenshot to", output_file_path)
driver.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment