Skip to content

Instantly share code, notes, and snippets.

@dcragusa
Created September 19, 2018 11:59
Show Gist options
  • Save dcragusa/a78748cfd72bbac2cebddb259535c584 to your computer and use it in GitHub Desktop.
Save dcragusa/a78748cfd72bbac2cebddb259535c584 to your computer and use it in GitHub Desktop.
A Firefox instance for Python Selenium, including a general retry decorator, utilities for downloading PDF files, and custom expected conditions
import re
import os
import sys
import time
import shutil
import datetime as dt
from functools import wraps
from typing import Optional as O, Callable as C, List as L
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.common.exceptions import (
NoSuchElementException, StaleElementReferenceException,
JavascriptException, TimeoutException
)
join = os.path.join
listdir = os.listdir
getsize = os.path.getsize
def get_browser(download_dir: str) -> (webdriver.Firefox, WebDriverWait):
# Firefox, has to be more recent than v52
binary = FirefoxBinary('C:\\Program Files\\Mozilla Firefox\\firefox.exe', log_file=sys.stdout)
profile = webdriver.FirefoxProfile()
# Auto config proxy
profile.set_preference('network.proxy.type', 4)
# Disable auto pdf parsing
profile.set_preference('pdfjs.disabled', True)
profile.set_preference('plugin.scan.plid.all', False)
profile.set_preference('plugin.scan.Acrobat', '99.0')
# Set download location
profile.set_preference('browser.download.folderList', 2)
profile.set_preference('browser.download.dir', download_dir)
profile.set_preference(
'browser.helperApps.neverAsk.saveToDisk', 'application/pdf;text/html;application/vnd.ms-excel'
)
profile.accept_untrusted_certs = True
profile.update_preferences()
browser = webdriver.Firefox(firefox_binary=binary, firefox_profile=profile)
browser.delete_all_cookies()
# browser will wait up to 30sec for various conditions
wait = WebDriverWait(browser, 30)
return browser, wait
def retry_exception(func: C) -> O[C]:
@wraps(func)
def func_wrapper(*args, **kwargs):
tries = 1
while tries != 5:
try:
return func(*args, **kwargs)
except StaleElementReferenceException:
print('Stale element exception in %s: retrying' % func.__name__)
if tries == 5:
raise StaleElementReferenceException
tries += 1
time.sleep(0.5)
except JavascriptException:
print('JS exception in %s: retrying' % func.__name__)
if tries == 5:
raise JavascriptException
tries += 1
time.sleep(0.5)
except TimeoutException:
print('Timeout exception in %s: retrying' % func.__name__)
if tries == 5:
raise TimeoutException
tries += 1
time.sleep(0.5)
return func_wrapper
class PDFUtilities:
@staticmethod
def get_existing_dates(download_dir: str) -> L[str]:
# extract dates from all downloaded files
files = os.listdir(download_dir)
date_strs = [re.findall(r'\d{4}-\d{2}-\d{2}', file)[0] for file in files if file.endswith('.pdf')]
return date_strs
@classmethod
def process_file(cls, tmp_dir: str, download_dir: str, date: dt.date) -> O[bool]:
if listdir(tmp_dir):
# we need the file to exist and be non-zero size
fp = join(tmp_dir, listdir(tmp_dir)[0])
if getsize(fp):
shutil.move(fp, join(download_dir, '%s.pdf' % date))
print('Moved %s to %s.pdf' % (fp, date))
return True
@classmethod
def download_and_process_file(
cls, wait: WebDriverWait, tmp_dir: str, download_dir: str, date: dt.date, iterator: int):
# download, rename to date and move to actual dir
if cls.process_file(tmp_dir, download_dir, date):
return
download_el = wait.until(EC.presence_of_element_located((By.ID, 'download')))
download_el.click()
# wait for the download
time.sleep(5)
if cls.process_file(tmp_dir, download_dir, date):
return
if iterator == 4:
raise Exception('PDF has not been downloaded for 20 sec')
cls.download_and_process_file(wait, tmp_dir, download_dir, date, iterator+1)
@staticmethod
def clear_directory(dir: str):
# clean up tmp dir
for file in listdir(dir):
os.remove(join(dir, file))
class CustomExpectedConditions:
class presence_of_element_located_with_text(object):
"""
An expectation for checking that an element is present and has a certain text.
locator - used to find the element
returns the WebElement once it is located
"""
def __init__(self, locator, text, exact):
self.locator = locator
self.text = text
self.exact = exact
def __call__(self, driver):
try:
els = driver.find_elements(*self.locator)
for el in els:
if self.exact and self.text == el.text:
return el
elif not self.exact and self.text in el.text:
return el
return False
except (NoSuchElementException, StaleElementReferenceException):
return False
class presence_of_element_located_with_text_in_value(object):
"""
An expectation for checking that an element is present and has a certain text.
locator - used to find the element
returns the WebElement once it is located
"""
def __init__(self, locator, text, exact):
self.locator = locator
self.text = text
self.exact = exact
def __call__(self, driver):
try:
els = driver.find_elements(*self.locator)
for el in els:
if self.exact and self.text == el.get_attribute('value'):
return el
elif not self.exact and self.text in el.get_attribute('value'):
return el
return False
except (NoSuchElementException, StaleElementReferenceException):
return False
class presence_of_element_located_with_different_id(object):
"""An expectation for finding an element and checking
it has a different ID to a previous result
locator - used to find the element
returns the WebElement once it is located
"""
def __init__(self, func, args, id_):
self.func = func
self.args = args
self.id = id_
def __call__(self, driver):
el = self.func(*self.args).__call__(driver)
if el and el._id != self.id:
return el
return False
class presence_of_elements_located_with_different_ids(object):
"""An expectation for finding a list of elements and checking
the elements have different IDs to a previous result
locator - used to find the element
returns the list of WebElements once it is located
"""
def __init__(self, func, args, ids):
self.func = func
self.args = args
self.ids = ids
def __call__(self, driver):
els = self.func(*self.args).__call__(driver)
if els:
print([el.id for el in els])
for el in els:
if el and el.id in self.ids:
return False
return els
return False
class number_of_elements_located:
"""
An expectation for checking that there are a certain number of elements with a particular css class.
locator - used to find the element
returns the list of WebElements when there are a sufficient number
"""
def __init__(self, locator, number):
self.locator = locator
self.number = number
def __call__(self, driver):
els = driver.find_elements(*self.locator) # Finding the referenced elements
if len(els) < self.number:
return False
else:
return els
class absence_of_element_located:
"""
An expectation for checking that an element is not present.
locator - used to find the element
returns True when the element is not present
"""
def __init__(self, locator):
self.locator = locator
def __call__(self, driver):
try:
driver.find_element(*self.locator) # Finding the referenced elements
return False
except NoSuchElementException:
return True
class second_window_to_be_opened_and_switch_to_it(object):
"""
An expectation for checking whether a second window is available to
switch to. If the window is available it switches the given driver to the
specified window.
"""
def __call__(self, driver):
if len(driver.window_handles) == 2:
driver.switch_to_window(driver.window_handles[1])
return True
else:
return False
class presence_of_any_element_located:
"""
An expectation for checking that any element in a list is present.
locator_list - list of locators used to find the element
returns a WebElement once it is located, and the index of the locator which matched
"""
def __init__(self, locator_list):
self.locator_list = locator_list
def __call__(self, driver):
for index, locator in enumerate(self.locator_list):
try:
el = driver.find_element(*locator) # Finding the referenced elements
return el, index
except NoSuchElementException:
pass
return False
class presence_of_all_elements_located:
"""
An expectation for checking that all element in a list are present.
locator_list - list of locators used to find the element
returns True when all elements are present
"""
def __init__(self, locator_list):
self.num_elements = len(locator_list)
self.locator_list = locator_list
def __call__(self, driver):
num_elements = 0
for locator in self.locator_list:
try:
driver.find_element(*locator) # Finding the referenced elements
num_elements += 1
except NoSuchElementException:
pass
return num_elements == self.num_elements # True if all elements found
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment