Last active
May 4, 2023 09:54
-
-
Save ilovefreesw/36587762f3239162a4c1acef5e759822 to your computer and use it in GitHub Desktop.
A Python-Selenium script to bulk take screenshots of webpage using headless Chrome by reading a text file full of URLs Tutorial: https://www.ilovefreesoftware.com/26/tutorial/how-to-take-full-page-screenshot-in-bulk-from-multiple-urls.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.common.exceptions import WebDriverException | |
from selenium.webdriver.common.by import By | |
from tqdm import tqdm | |
import time | |
lines = [] | |
Links_File = r'' | |
OP_DIR = r'' | |
i = 1 | |
S = lambda X: driver.execute_script('return document.body.scrollHeight') + X | |
with open(Links_File, "r") as f: | |
lines = f.readlines() | |
lines = [line.rstrip() for line in lines] | |
options = webdriver.ChromeOptions() | |
options.headless = True | |
options.add_argument('--log-level=3') | |
driver = webdriver.Chrome(options=options) | |
driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.4103.97 Safari/537.36'}) | |
print(driver.execute_script("return navigator.userAgent;")) | |
for link in tqdm(lines, ncols=65): | |
try: | |
driver.get(link) | |
time.sleep(5) | |
driver.set_window_size(1024,S(0)) # May need manual adjustment | |
driver.find_element(By.TAG_NAME,"body").screenshot(f'{OP_DIR}\{i}.png') | |
i = i + 1 | |
except WebDriverException: | |
print(link) | |
continue | |
driver.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you following yours suggestions worked fine!
Thank you following yours suggestions worked fine!