Forked from ilovefreesw/bulk_webpage_screenshots.py
Last active
October 5, 2022 15:13
-
-
Save chiraagshah-qa/e35c06438228b12f77b1deff597dd71a to your computer and use it in GitHub Desktop.
A Python-Selenium script to bulk take screenshots of webpage using headless Chrome by reading a text file full of URLs. Screenshots are saved in a timestamped directory. You will need to add your location for sites_list, screenshots_dir_path and options.binary_location
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from selenium import webdriver | |
from selenium.common.exceptions import WebDriverException | |
from selenium.webdriver.chrome.options import Options | |
from PIL import Image | |
from tqdm import tqdm | |
import time | |
lines = [] | |
timestr = time.strftime("%Y%m%d-%H%M%S") | |
sites_list = 'Location of Text File' | |
screenshots_dir_path = 'Location where screenshots should be saved' | |
screenshots_dir = f'{screenshots_dir_path}_{timestr}' | |
os.makedirs(screenshots_dir) | |
i = 1 | |
def s(x): return driver.execute_script('return document.body.parentNode.scroll' + x) | |
with open(sites_list, "r") as f: | |
lines = f.readlines() | |
lines = [line.rstrip() for line in lines] | |
options = Options() | |
options.headless = True | |
options.add_argument('--log-level=3') | |
options.binary_location = "Location of chrome.exe" | |
driver = webdriver.Chrome(options=options) | |
user_agent = driver.execute_script("return navigator.userAgent;") | |
print(user_agent) | |
for link in tqdm(lines, ncols=65): | |
try: | |
driver.get(link) | |
time.sleep(5) | |
driver.set_window_size(2560, 1440) # May need manual adjustment | |
driver.get_screenshot_as_file(f'{screenshots_dir}/{i}.png') | |
# Optional file conversion to save space. | |
img = Image.open(f'{screenshots_dir}/{i}.png') | |
rgb_img = img.convert('RGB') | |
rgb_img.save(f'{screenshots_dir}/{i}.jpg') | |
os.remove(f'{screenshots_dir}/{i}.png') | |
i = i + 1 | |
except WebDriverException: | |
print("Error getting screenshot of " + link + ".") | |
continue | |
driver.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment