Last active
October 6, 2022 10:25
-
-
Save chiraagshah-qa/d32530bd3ee91262d72a6526df8f4a6f to your computer and use it in GitHub Desktop.
Grab screenshots of sites from a csv file which contains Site Name and URL headers and reduce space by converting .png to .jpg.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import time | |
import csv | |
from selenium import webdriver | |
from selenium.common.exceptions import WebDriverException | |
from selenium.webdriver.chrome.options import Options | |
from PIL import Image | |
from tqdm import tqdm | |
sites_list = 'Location of .csv File' | |
# creation of screenshot directory | |
timestr = time.strftime("%Y%m%d-%H%M%S") | |
screenshots_dir_path = 'Location where screenshots should be saved' | |
screenshots_dir = f'{screenshots_dir_path}_{timestr}' | |
os.makedirs(screenshots_dir) | |
# Total rows in .csv minus the header | |
open_csv = open(sites_list) | |
list_csv = list(csv.reader(open_csv)) | |
total_sites = len(list_csv)-1 | |
# Webdriver stuff | |
options = Options() | |
options.headless = True | |
options.add_argument('--log-level=3') | |
options.binary_location = "Location of chrome.exe" | |
driver = webdriver.Chrome(options=options) | |
user_agent = driver.execute_script("return navigator.userAgent;") | |
print(user_agent) | |
with open(sites_list, 'r') as f: | |
reader_obj = csv.DictReader(f) | |
for i, row in enumerate(tqdm(reader_obj, total=total_sites)): | |
site_name = (row.get('Site Name')) | |
site_url = (row.get('URL')) | |
try: | |
driver.get(site_url) | |
time.sleep(15) # May need manual adjustment | |
driver.set_window_size(2560, 5000) # May need manual adjustment | |
driver.get_screenshot_as_file(f'{screenshots_dir}/{site_name}.png') | |
# Optional file conversion to save space. | |
img = Image.open(f'{screenshots_dir}/{site_name}.png') | |
rgb_img = img.convert('RGB') | |
rgb_img.save(f'{screenshots_dir}/{site_name}.jpg') | |
os.remove(f'{screenshots_dir}/{site_name}.png') | |
except WebDriverException: | |
print("Error getting screenshot of " + site_name + ".") | |
continue | |
driver.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment