Last active
October 23, 2023 19:47
-
-
Save royshil/504b3178f68d28b0cfe20b85af64d20d to your computer and use it in GitHub Desktop.
Export Tweet as Image (screenshot embed)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
import logging | |
import os | |
import sys | |
import argparse | |
import urllib.parse | |
import html | |
import gradio as gr | |
from PIL import Image | |
parser = argparse.ArgumentParser(description="Create a screenshot from twitter embed") | |
# optional argument for multiple urls to get an image for (only used in headless mode) | |
parser.add_argument( | |
"--urls", nargs="+", help="URLs to get images for (only used in headless mode)" | |
) | |
# allow running as gradio interface, add an argument to opt for headless mode | |
parser.add_argument( | |
"--headless", action="store_true", help="Run in headless mode (no GUI)" | |
) | |
args = parser.parse_args() | |
logging.getLogger().setLevel(logging.INFO) | |
script_directory = os.path.dirname(__file__) | |
def get_photo_from_tweet(tweet_url): | |
options = webdriver.ChromeOptions() | |
options.add_argument("--allow-insecure-localhost") | |
options.add_experimental_option( | |
"excludeSwitches", ["ignore-certificate-errors", "enable-automation"] | |
) | |
options.add_argument("--disable-blink-features=AutomationControlled") | |
options.add_argument("--disable-web-security") | |
options.add_argument("--autoplay-policy=no-user-gesture-required") | |
options.add_argument("--headless") | |
options.add_argument("--nogpu") | |
options.add_argument("--disablegpu") | |
options.add_argument("--window-size=1280,1280") | |
options.add_experimental_option("useAutomationExtension", False) | |
options.add_experimental_option( | |
"prefs", | |
{ | |
"download.default_directory": script_directory, | |
"profile.default_content_setting_values.automatic_downloads": 2, | |
}, | |
) | |
options.add_argument("--mute-audio") | |
logging.info("Create environment") | |
driver = webdriver.Chrome(options=options) | |
logging.info("Loading embed tool page") | |
# replace x.com with twitter.com | |
url_for_embed_tool = tweet_url.replace("x.com", "twitter.com") | |
# add the embed url to the query string, url-escape it | |
driver.get( | |
"https://publish.twitter.com/?query=" + urllib.parse.quote(url_for_embed_tool) | |
) | |
logging.info("Waiting for page to load") | |
# wait for the 'EmbedCode-code' element to be loaded | |
embed_code = WebDriverWait(driver, 10).until( | |
EC.presence_of_element_located((By.CLASS_NAME, "EmbedCode-code")) | |
) | |
if embed_code is None: | |
logging.error("Embed code not found") | |
sys.exit(1) | |
# get the embed code | |
embed_code_html = embed_code.get_attribute("innerHTML") | |
logging.info("Embed code loaded") | |
# decode html escaping | |
embed_code_html = html.unescape(embed_code_html) | |
logging.info("Embed code decoded") | |
logging.info("Create embed code HTML") | |
embedding_html = f"""<html> | |
<body> | |
{embed_code_html} | |
</body> | |
</html> | |
""" | |
# write the embed code to a file | |
with open(f"{script_directory}/embed_twitter.html", "w") as f: | |
f.write(embedding_html) | |
# load the embed code | |
driver.get(f"file://{script_directory}/embed_twitter.html") | |
logging.info("Creating video...") | |
#log the current html | |
logging.info(driver.page_source) | |
# wait until the iframe is loaded | |
iframe = WebDriverWait(driver, 10).until( | |
EC.presence_of_element_located((By.TAG_NAME, "iframe")) | |
) | |
# switch to the iframe | |
driver.switch_to.frame(iframe) | |
# wait untill all images are loaded | |
images = WebDriverWait(driver, 10).until( | |
EC.presence_of_all_elements_located((By.TAG_NAME, "img")) | |
) | |
# switch back to the main frame | |
driver.switch_to.default_content() | |
logging.info("Done.") | |
# capture screenshot | |
driver.save_screenshot("screenshot.png") | |
driver.close() | |
driver.quit() | |
logging.info("Converting...") | |
# rename url_for_embed_tool to file-safe name | |
output_filename = url_for_embed_tool.replace("https://twitter.com/", "") | |
# replace any non alphanumeric characters with underscores | |
output_filename = "".join( | |
[c if c.isalnum() else "_" for c in output_filename] | |
) # https://stackoverflow.com/a/295146/149721 | |
# trim whitespace with imagemagick | |
os.system(f"convert -trim screenshot.png {output_filename}.png") | |
logging.info("Done.") | |
# delete the embed code file | |
os.remove(f"{script_directory}/embed_twitter.html") | |
# read the screenshot with PIL | |
im = Image.open(f"{output_filename}.png") | |
return im | |
if __name__ == "__main__": | |
if args.headless: | |
# headless mode | |
if args.urls is None: | |
logging.error("URL not specified") | |
sys.exit(1) | |
else: | |
for url in args.urls: | |
get_photo_from_tweet(url) | |
else: | |
# interactive mode - build Gradio interface | |
iface = gr.Interface( | |
fn=get_photo_from_tweet, | |
inputs="text", | |
outputs="image", | |
title="Twitter Embed Screenshot", | |
description="Create a screenshot from a Twitter embed", | |
allow_flagging=False, | |
allow_screenshot=False, | |
allow_download=False, | |
) | |
iface.launch() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment