Last active
December 10, 2022 13:34
-
-
Save eslof/48252860901bcb82a8e7456ccf65cb50 to your computer and use it in GitHub Desktop.
script to take a right click -> save as (Web page, Single File) .mht file and save it as a png
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.common.action_chains import ActionChains | |
from io import BytesIO | |
from base64 import b64decode | |
from PIL import Image | |
from tkinter import Tk | |
from tkinter.filedialog import askopenfilename, asksaveasfilename | |
# output settings | |
resize_divisor = 2 | |
# we don't want a full GUI, so keep the root window from appearing | |
Tk().withdraw() | |
# show an "Open" dialog box and return the path to the selected file | |
openType = [("Web page, Single File", "*.mht")] | |
html_file_path = askopenfilename(defaultextension='.mht', filetypes=openType) | |
if not html_file_path: | |
exit("No file selected") | |
# show a "Save" dialog box and return the path to the selected file | |
saveType = [("PNG Image", "*.png")] | |
output_file_path = asksaveasfilename(defaultextension='.png', filetypes=saveType) | |
if not output_file_path: # asksaveasfile return `None` if dialog closed with "cancel". | |
exit("No save selected") | |
print("Converting", html_file_path, "to", output_file_path) | |
# create a new Chrome webdriver | |
options = webdriver.ChromeOptions() | |
# set the options to run in headless mode | |
options.headless = True | |
driver = webdriver.Chrome(options=options) | |
# navigate to the HTML file | |
driver.get(f'file://{html_file_path}') | |
# clean the html page to only contain the content | |
# get content element | |
content = driver.find_element('css selector', "[class^='react-scroll-to-bottom--css-'] > *:first-child") | |
body = driver.find_element('css selector', "body") | |
# move the element into root, then clear body and move back element | |
driver.execute_script("document.documentElement.appendChild(arguments[0])", content) | |
driver.execute_script("arguments[0].innerHTML = ''", body) | |
driver.execute_script("arguments[0].appendChild(arguments[1])", body, content) | |
# get the total width and height of the content element, including overflow | |
height = driver.execute_script('return arguments[0].scrollHeight;', content) | |
width = driver.execute_script('return arguments[0].scrollWidth;', content) | |
# Set the viewport size to the maximum size | |
driver.set_window_size(width, height) | |
# grab screenshot | |
screenshot = driver.get_screenshot_as_base64() | |
# resize screenshot to its size divided by resize_divisor | |
if resize_divisor > 0 and resize_divisor != 1: | |
im = Image.open(BytesIO(b64decode(screenshot))) | |
im = im.resize((int(im.width / resize_divisor), int(im.height / resize_divisor)), Image.ANTIALIAS) | |
im.save(output_file_path) | |
else: | |
exit("Invalid resize divisor") | |
print("Saved screenshot to", output_file_path) | |
driver.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment