Created
September 19, 2018 11:59
-
-
Save dcragusa/a78748cfd72bbac2cebddb259535c584 to your computer and use it in GitHub Desktop.
A Firefox instance for Python Selenium, including a general retry decorator, utilities for downloading PDF files, and custom expected conditions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import os | |
import sys | |
import time | |
import shutil | |
import datetime as dt | |
from functools import wraps | |
from typing import Optional as O, Callable as C, List as L | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary | |
from selenium.common.exceptions import ( | |
NoSuchElementException, StaleElementReferenceException, | |
JavascriptException, TimeoutException | |
) | |
join = os.path.join | |
listdir = os.listdir | |
getsize = os.path.getsize | |
def get_browser(download_dir: str) -> (webdriver.Firefox, WebDriverWait): | |
# Firefox, has to be more recent than v52 | |
binary = FirefoxBinary('C:\\Program Files\\Mozilla Firefox\\firefox.exe', log_file=sys.stdout) | |
profile = webdriver.FirefoxProfile() | |
# Auto config proxy | |
profile.set_preference('network.proxy.type', 4) | |
# Disable auto pdf parsing | |
profile.set_preference('pdfjs.disabled', True) | |
profile.set_preference('plugin.scan.plid.all', False) | |
profile.set_preference('plugin.scan.Acrobat', '99.0') | |
# Set download location | |
profile.set_preference('browser.download.folderList', 2) | |
profile.set_preference('browser.download.dir', download_dir) | |
profile.set_preference( | |
'browser.helperApps.neverAsk.saveToDisk', 'application/pdf;text/html;application/vnd.ms-excel' | |
) | |
profile.accept_untrusted_certs = True | |
profile.update_preferences() | |
browser = webdriver.Firefox(firefox_binary=binary, firefox_profile=profile) | |
browser.delete_all_cookies() | |
# browser will wait up to 30sec for various conditions | |
wait = WebDriverWait(browser, 30) | |
return browser, wait | |
def retry_exception(func: C) -> O[C]: | |
@wraps(func) | |
def func_wrapper(*args, **kwargs): | |
tries = 1 | |
while tries != 5: | |
try: | |
return func(*args, **kwargs) | |
except StaleElementReferenceException: | |
print('Stale element exception in %s: retrying' % func.__name__) | |
if tries == 5: | |
raise StaleElementReferenceException | |
tries += 1 | |
time.sleep(0.5) | |
except JavascriptException: | |
print('JS exception in %s: retrying' % func.__name__) | |
if tries == 5: | |
raise JavascriptException | |
tries += 1 | |
time.sleep(0.5) | |
except TimeoutException: | |
print('Timeout exception in %s: retrying' % func.__name__) | |
if tries == 5: | |
raise TimeoutException | |
tries += 1 | |
time.sleep(0.5) | |
return func_wrapper | |
class PDFUtilities: | |
@staticmethod | |
def get_existing_dates(download_dir: str) -> L[str]: | |
# extract dates from all downloaded files | |
files = os.listdir(download_dir) | |
date_strs = [re.findall(r'\d{4}-\d{2}-\d{2}', file)[0] for file in files if file.endswith('.pdf')] | |
return date_strs | |
@classmethod | |
def process_file(cls, tmp_dir: str, download_dir: str, date: dt.date) -> O[bool]: | |
if listdir(tmp_dir): | |
# we need the file to exist and be non-zero size | |
fp = join(tmp_dir, listdir(tmp_dir)[0]) | |
if getsize(fp): | |
shutil.move(fp, join(download_dir, '%s.pdf' % date)) | |
print('Moved %s to %s.pdf' % (fp, date)) | |
return True | |
@classmethod | |
def download_and_process_file( | |
cls, wait: WebDriverWait, tmp_dir: str, download_dir: str, date: dt.date, iterator: int): | |
# download, rename to date and move to actual dir | |
if cls.process_file(tmp_dir, download_dir, date): | |
return | |
download_el = wait.until(EC.presence_of_element_located((By.ID, 'download'))) | |
download_el.click() | |
# wait for the download | |
time.sleep(5) | |
if cls.process_file(tmp_dir, download_dir, date): | |
return | |
if iterator == 4: | |
raise Exception('PDF has not been downloaded for 20 sec') | |
cls.download_and_process_file(wait, tmp_dir, download_dir, date, iterator+1) | |
@staticmethod | |
def clear_directory(dir: str): | |
# clean up tmp dir | |
for file in listdir(dir): | |
os.remove(join(dir, file)) | |
class CustomExpectedConditions: | |
class presence_of_element_located_with_text(object): | |
""" | |
An expectation for checking that an element is present and has a certain text. | |
locator - used to find the element | |
returns the WebElement once it is located | |
""" | |
def __init__(self, locator, text, exact): | |
self.locator = locator | |
self.text = text | |
self.exact = exact | |
def __call__(self, driver): | |
try: | |
els = driver.find_elements(*self.locator) | |
for el in els: | |
if self.exact and self.text == el.text: | |
return el | |
elif not self.exact and self.text in el.text: | |
return el | |
return False | |
except (NoSuchElementException, StaleElementReferenceException): | |
return False | |
class presence_of_element_located_with_text_in_value(object): | |
""" | |
An expectation for checking that an element is present and has a certain text. | |
locator - used to find the element | |
returns the WebElement once it is located | |
""" | |
def __init__(self, locator, text, exact): | |
self.locator = locator | |
self.text = text | |
self.exact = exact | |
def __call__(self, driver): | |
try: | |
els = driver.find_elements(*self.locator) | |
for el in els: | |
if self.exact and self.text == el.get_attribute('value'): | |
return el | |
elif not self.exact and self.text in el.get_attribute('value'): | |
return el | |
return False | |
except (NoSuchElementException, StaleElementReferenceException): | |
return False | |
class presence_of_element_located_with_different_id(object): | |
"""An expectation for finding an element and checking | |
it has a different ID to a previous result | |
locator - used to find the element | |
returns the WebElement once it is located | |
""" | |
def __init__(self, func, args, id_): | |
self.func = func | |
self.args = args | |
self.id = id_ | |
def __call__(self, driver): | |
el = self.func(*self.args).__call__(driver) | |
if el and el._id != self.id: | |
return el | |
return False | |
class presence_of_elements_located_with_different_ids(object): | |
"""An expectation for finding a list of elements and checking | |
the elements have different IDs to a previous result | |
locator - used to find the element | |
returns the list of WebElements once it is located | |
""" | |
def __init__(self, func, args, ids): | |
self.func = func | |
self.args = args | |
self.ids = ids | |
def __call__(self, driver): | |
els = self.func(*self.args).__call__(driver) | |
if els: | |
print([el.id for el in els]) | |
for el in els: | |
if el and el.id in self.ids: | |
return False | |
return els | |
return False | |
class number_of_elements_located: | |
""" | |
An expectation for checking that there are a certain number of elements with a particular css class. | |
locator - used to find the element | |
returns the list of WebElements when there are a sufficient number | |
""" | |
def __init__(self, locator, number): | |
self.locator = locator | |
self.number = number | |
def __call__(self, driver): | |
els = driver.find_elements(*self.locator) # Finding the referenced elements | |
if len(els) < self.number: | |
return False | |
else: | |
return els | |
class absence_of_element_located: | |
""" | |
An expectation for checking that an element is not present. | |
locator - used to find the element | |
returns True when the element is not present | |
""" | |
def __init__(self, locator): | |
self.locator = locator | |
def __call__(self, driver): | |
try: | |
driver.find_element(*self.locator) # Finding the referenced elements | |
return False | |
except NoSuchElementException: | |
return True | |
class second_window_to_be_opened_and_switch_to_it(object): | |
""" | |
An expectation for checking whether a second window is available to | |
switch to. If the window is available it switches the given driver to the | |
specified window. | |
""" | |
def __call__(self, driver): | |
if len(driver.window_handles) == 2: | |
driver.switch_to_window(driver.window_handles[1]) | |
return True | |
else: | |
return False | |
class presence_of_any_element_located: | |
""" | |
An expectation for checking that any element in a list is present. | |
locator_list - list of locators used to find the element | |
returns a WebElement once it is located, and the index of the locator which matched | |
""" | |
def __init__(self, locator_list): | |
self.locator_list = locator_list | |
def __call__(self, driver): | |
for index, locator in enumerate(self.locator_list): | |
try: | |
el = driver.find_element(*locator) # Finding the referenced elements | |
return el, index | |
except NoSuchElementException: | |
pass | |
return False | |
class presence_of_all_elements_located: | |
""" | |
An expectation for checking that all element in a list are present. | |
locator_list - list of locators used to find the element | |
returns True when all elements are present | |
""" | |
def __init__(self, locator_list): | |
self.num_elements = len(locator_list) | |
self.locator_list = locator_list | |
def __call__(self, driver): | |
num_elements = 0 | |
for locator in self.locator_list: | |
try: | |
driver.find_element(*locator) # Finding the referenced elements | |
num_elements += 1 | |
except NoSuchElementException: | |
pass | |
return num_elements == self.num_elements # True if all elements found |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment