Last active
January 21, 2019 10:39
-
-
Save joaoescribano/b9c34e7a07fb450ae7747e582dd4e6ba to your computer and use it in GitHub Desktop.
Python script to screenshot the entire URL and save into a FILE.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import datetime | |
import math | |
import os | |
import sys | |
import tempfile | |
# third-party imports | |
from PIL import Image | |
from selenium import webdriver | |
from time import sleep | |
def get_chrome_drive(driver_path=None): | |
base_dir = os.path.dirname( os.path.abspath(__file__) ) | |
log_path = os.path.join( base_dir, 'chromedriver.log' ) | |
if driver_path is None: | |
driver_path = '/usr/bin/chromedriver' | |
pass | |
options = webdriver.ChromeOptions() | |
options.headless = True | |
options.add_argument('--hide-scrollbars') | |
options.add_argument('--no-sandbox') | |
driver = webdriver.Chrome( | |
executable_path=driver_path, | |
chrome_options=options, | |
service_args=[ | |
# '--log-path={}'.format(log_path), | |
# '--verbose', | |
] | |
) | |
return driver | |
def get_firefox_drive(driver_path=None): | |
base_dir = os.path.dirname( os.path.abspath(__file__) ) | |
log_path = os.path.join( base_dir, 'geckodriver.log' ) | |
if driver_path is None: | |
driver_path = '/usr/bin/geckodriver' | |
pass | |
options = webdriver.FirefoxOptions() | |
options.add_argument('-headless') | |
driver = webdriver.Firefox( | |
executable_path=driver_path, | |
firefox_options=options | |
) | |
return driver | |
def save_fullpage_screenshot(driver, url, output_path, tmp_prefix='selenium_screenshot', tmp_suffix='.png'): | |
""" | |
Creates a full page screenshot using a selenium driver by scrolling and taking multiple screenshots, | |
and stitching them into a single image. | |
""" | |
# get the page | |
driver.get(url) | |
# get dimensions | |
window_height = driver.execute_script('return window.innerHeight') | |
scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight') | |
num = int( math.ceil( float(scroll_height) / float(window_height) ) ) | |
# get temp files | |
tempfiles = [] | |
for i in range( num ): | |
fd,path = tempfile.mkstemp(prefix='{0}-{1:02}-'.format(tmp_prefix, i+1), suffix=tmp_suffix) | |
os.close(fd) | |
tempfiles.append(path) | |
pass | |
tempfiles_len = len(tempfiles) | |
try: | |
# take screenshots | |
for i,path in enumerate(tempfiles): | |
if i > 0: | |
driver.execute_script( 'window.scrollBy(%d,%d)' % (0, window_height) ) | |
driver.save_screenshot(path) | |
pass | |
# stitch images together | |
stiched = None | |
for i,path in enumerate(tempfiles): | |
img = Image.open(path) | |
w, h = img.size | |
y = i * window_height | |
if i == ( tempfiles_len - 1 ) and num > 1: | |
img = img.crop(( | |
0, | |
h-(scroll_height % h), | |
w, | |
h | |
)) | |
w, h = img.size | |
pass | |
if stiched is None: | |
stiched = Image.new('RGB', (w, scroll_height)) | |
stiched.paste(img, ( | |
0, # x0 | |
y, # y0 | |
w, # x1 | |
y + h # y1 | |
)) | |
pass | |
stiched.save(output_path) | |
finally: | |
# cleanup | |
for path in tempfiles: | |
if os.path.isfile(path): | |
os.remove(path) | |
pass | |
return output_path | |
def main(): | |
url = sys.argv[1] | |
filename = sys.argv[2] | |
driver = get_chrome_drive() | |
driver.set_window_size(1280,768) | |
save_fullpage_screenshot(driver, url, filename) | |
driver.quit() | |
return | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Usage:
python3 urlcapture.py https://google.com google-screenshot.png