dcragusa · September 19, 2018 11:59
diff --git a/selenium_utils.py b/selenium_utils.py
 import re
 import os
 import sys
 import time
 import shutil
 import datetime as dt
 from functools import wraps
 from typing import Optional as O, Callable as C, List as L

 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
 from selenium.common.exceptions import (
    NoSuchElementException, StaleElementReferenceException,
    JavascriptException, TimeoutException
 )

 join = os.path.join
 listdir = os.listdir
 getsize = os.path.getsize

 def get_browser(download_dir: str) -> (webdriver.Firefox, WebDriverWait):

    # Firefox, has to be more recent than v52
    binary = FirefoxBinary('C:\\Program Files\\Mozilla Firefox\\firefox.exe', log_file=sys.stdout)

    profile = webdriver.FirefoxProfile()
    # Auto config proxy
    profile.set_preference('network.proxy.type', 4)
    # Disable auto pdf parsing
    profile.set_preference('pdfjs.disabled', True)
    profile.set_preference('plugin.scan.plid.all', False)
    profile.set_preference('plugin.scan.Acrobat', '99.0')
    # Set download location
    profile.set_preference('browser.download.folderList', 2)
    profile.set_preference('browser.download.dir', download_dir)
    profile.set_preference(
        'browser.helperApps.neverAsk.saveToDisk', 'application/pdf;text/html;application/vnd.ms-excel'
    )
    profile.accept_untrusted_certs = True
    profile.update_preferences()

    browser = webdriver.Firefox(firefox_binary=binary, firefox_profile=profile)
    browser.delete_all_cookies()

    # browser will wait up to 30sec for various conditions
    wait = WebDriverWait(browser, 30)

    return browser, wait


 def retry_exception(func: C) -> O[C]:
    @wraps(func)
    def func_wrapper(*args, **kwargs):
        tries = 1
        while tries != 5:
            try:
                return func(*args, **kwargs)
            except StaleElementReferenceException:
                print('Stale element exception in %s: retrying' % func.__name__)
                if tries == 5:
                    raise StaleElementReferenceException
                tries += 1
                time.sleep(0.5)
            except JavascriptException:
                print('JS exception in %s: retrying' % func.__name__)
                if tries == 5:
                    raise JavascriptException
                tries += 1
                time.sleep(0.5)
            except TimeoutException:
                print('Timeout exception in %s: retrying' % func.__name__)
                if tries == 5:
                    raise TimeoutException
                tries += 1
                time.sleep(0.5)
    return func_wrapper


 class PDFUtilities:

    @staticmethod
    def get_existing_dates(download_dir: str) -> L[str]:
        # extract dates from all downloaded files
        files = os.listdir(download_dir)
        date_strs = [re.findall(r'\d{4}-\d{2}-\d{2}', file)[0] for file in files if file.endswith('.pdf')]
        return date_strs

    @classmethod
    def process_file(cls, tmp_dir: str, download_dir: str, date: dt.date) -> O[bool]:

        if listdir(tmp_dir):
            # we need the file to exist and be non-zero size
            fp = join(tmp_dir, listdir(tmp_dir)[0])
            if getsize(fp):
                shutil.move(fp, join(download_dir, '%s.pdf' % date))
                print('Moved %s to %s.pdf' % (fp, date))
                return True

    @classmethod
    def download_and_process_file(
            cls, wait: WebDriverWait, tmp_dir: str, download_dir: str, date: dt.date, iterator: int):

        # download, rename to date and move to actual dir

        if cls.process_file(tmp_dir, download_dir, date):
            return

        download_el = wait.until(EC.presence_of_element_located((By.ID, 'download')))
        download_el.click()
        # wait for the download
        time.sleep(5)

        if cls.process_file(tmp_dir, download_dir, date):
            return

        if iterator == 4:
            raise Exception('PDF has not been downloaded for 20 sec')
        cls.download_and_process_file(wait, tmp_dir, download_dir, date, iterator+1)

    @staticmethod
    def clear_directory(dir: str):
        # clean up tmp dir
        for file in listdir(dir):
            os.remove(join(dir, file))


 class CustomExpectedConditions:

    class presence_of_element_located_with_text(object):
        """
        An expectation for checking that an element is present and has a certain text.
        locator - used to find the element
        returns the WebElement once it is located
        """

        def __init__(self, locator, text, exact):
            self.locator = locator
            self.text = text
            self.exact = exact

        def __call__(self, driver):
            try:
                els = driver.find_elements(*self.locator)
                for el in els:
                    if self.exact and self.text == el.text:
                        return el
                    elif not self.exact and self.text in el.text:
                        return el
                return False
            except (NoSuchElementException, StaleElementReferenceException):
                return False


    class presence_of_element_located_with_text_in_value(object):
        """
        An expectation for checking that an element is present and has a certain text.
        locator - used to find the element
        returns the WebElement once it is located
        """

        def __init__(self, locator, text, exact):
            self.locator = locator
            self.text = text
            self.exact = exact

        def __call__(self, driver):
            try:
                els = driver.find_elements(*self.locator)
                for el in els:
                    if self.exact and self.text == el.get_attribute('value'):
                        return el
                    elif not self.exact and self.text in el.get_attribute('value'):
                        return el
                return False
            except (NoSuchElementException, StaleElementReferenceException):
                return False


    class presence_of_element_located_with_different_id(object):
        """An expectation for finding an element and checking
        it has a different ID to a previous result
        locator - used to find the element
        returns the WebElement once it is located
        """
        def __init__(self, func, args, id_):
            self.func = func
            self.args = args
            self.id = id_

        def __call__(self, driver):
            el = self.func(*self.args).__call__(driver)
            if el and el._id != self.id:
                return el
            return False


    class presence_of_elements_located_with_different_ids(object):
        """An expectation for finding a list of elements and checking
        the elements have different IDs to a previous result
        locator - used to find the element
        returns the list of WebElements once it is located
        """
        def __init__(self, func, args, ids):
            self.func = func
            self.args = args
            self.ids = ids

        def __call__(self, driver):
            els = self.func(*self.args).__call__(driver)
            if els:
                print([el.id for el in els])
                for el in els:
                    if el and el.id in self.ids:
                        return False
                return els
            return False


    class number_of_elements_located:
        """
        An expectation for checking that there are a certain number of elements with a particular css class.
        locator - used to find the element
        returns the list of WebElements when there are a sufficient number
        """

        def __init__(self, locator, number):
            self.locator = locator
            self.number = number

        def __call__(self, driver):
            els = driver.find_elements(*self.locator)  # Finding the referenced elements
            if len(els) < self.number:
                return False
            else:
                return els


    class absence_of_element_located:
        """
        An expectation for checking that an element is not present.
        locator - used to find the element
        returns True when the element is not present
        """

        def __init__(self, locator):
            self.locator = locator

        def __call__(self, driver):
            try:
                driver.find_element(*self.locator)  # Finding the referenced elements
                return False
            except NoSuchElementException:
                return True


    class second_window_to_be_opened_and_switch_to_it(object):
        """
        An expectation for checking whether a second window is available to
        switch to.  If the window is available it switches the given driver to the
        specified window.
        """

        def __call__(self, driver):
            if len(driver.window_handles) == 2:
                driver.switch_to_window(driver.window_handles[1])
                return True
            else:
                return False


    class presence_of_any_element_located:
        """
        An expectation for checking that any element in a list is present.
        locator_list - list of locators used to find the element
        returns a WebElement once it is located, and the index of the locator which matched
        """

        def __init__(self, locator_list):
            self.locator_list = locator_list

        def __call__(self, driver):
            for index, locator in enumerate(self.locator_list):
                try:
                    el = driver.find_element(*locator)  # Finding the referenced elements
                    return el, index
                except NoSuchElementException:
                    pass
            return False


    class presence_of_all_elements_located:
        """
        An expectation for checking that all element in a list are present.
        locator_list - list of locators used to find the element
        returns True when all elements are present
        """

        def __init__(self, locator_list):
            self.num_elements = len(locator_list)
            self.locator_list = locator_list

        def __call__(self, driver):
            num_elements = 0
            for locator in self.locator_list:
                try:
                    driver.find_element(*locator)  # Finding the referenced elements
                    num_elements += 1
                except NoSuchElementException:
                    pass
            return num_elements == self.num_elements  # True if all elements found
	import re
	import os
	import sys
	import time
	import shutil
	import datetime as dt
	from functools import wraps
	from typing import Optional as O, Callable as C, List as L

	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC
	from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
	from selenium.common.exceptions import (
	NoSuchElementException, StaleElementReferenceException,
	JavascriptException, TimeoutException
	)

	join = os.path.join
	listdir = os.listdir
	getsize = os.path.getsize

	def get_browser(download_dir: str) -> (webdriver.Firefox, WebDriverWait):

	# Firefox, has to be more recent than v52
	binary = FirefoxBinary('C:\\Program Files\\Mozilla Firefox\\firefox.exe', log_file=sys.stdout)

	profile = webdriver.FirefoxProfile()
	# Auto config proxy
	profile.set_preference('network.proxy.type', 4)
	# Disable auto pdf parsing
	profile.set_preference('pdfjs.disabled', True)
	profile.set_preference('plugin.scan.plid.all', False)
	profile.set_preference('plugin.scan.Acrobat', '99.0')
	# Set download location
	profile.set_preference('browser.download.folderList', 2)
	profile.set_preference('browser.download.dir', download_dir)
	profile.set_preference(
	'browser.helperApps.neverAsk.saveToDisk', 'application/pdf;text/html;application/vnd.ms-excel'
	)
	profile.accept_untrusted_certs = True
	profile.update_preferences()

	browser = webdriver.Firefox(firefox_binary=binary, firefox_profile=profile)
	browser.delete_all_cookies()

	# browser will wait up to 30sec for various conditions
	wait = WebDriverWait(browser, 30)

	return browser, wait


	def retry_exception(func: C) -> O[C]:
	@wraps(func)
	def func_wrapper(args, *kwargs):
	tries = 1
	while tries != 5:
	try:
	return func(args, *kwargs)
	except StaleElementReferenceException:
	print('Stale element exception in %s: retrying' % func.__name__)
	if tries == 5:
	raise StaleElementReferenceException
	tries += 1
	time.sleep(0.5)
	except JavascriptException:
	print('JS exception in %s: retrying' % func.__name__)
	if tries == 5:
	raise JavascriptException
	tries += 1
	time.sleep(0.5)
	except TimeoutException:
	print('Timeout exception in %s: retrying' % func.__name__)
	if tries == 5:
	raise TimeoutException
	tries += 1
	time.sleep(0.5)
	return func_wrapper


	class PDFUtilities:

	@staticmethod
	def get_existing_dates(download_dir: str) -> L[str]:
	# extract dates from all downloaded files
	files = os.listdir(download_dir)
	date_strs = [re.findall(r'\d{4}-\d{2}-\d{2}', file)[0] for file in files if file.endswith('.pdf')]
	return date_strs

	@classmethod
	def process_file(cls, tmp_dir: str, download_dir: str, date: dt.date) -> O[bool]:

	if listdir(tmp_dir):
	# we need the file to exist and be non-zero size
	fp = join(tmp_dir, listdir(tmp_dir)[0])
	if getsize(fp):
	shutil.move(fp, join(download_dir, '%s.pdf' % date))
	print('Moved %s to %s.pdf' % (fp, date))
	return True

	@classmethod
	def download_and_process_file(
	cls, wait: WebDriverWait, tmp_dir: str, download_dir: str, date: dt.date, iterator: int):

	# download, rename to date and move to actual dir

	if cls.process_file(tmp_dir, download_dir, date):
	return

	download_el = wait.until(EC.presence_of_element_located((By.ID, 'download')))
	download_el.click()
	# wait for the download
	time.sleep(5)

	if cls.process_file(tmp_dir, download_dir, date):
	return

	if iterator == 4:
	raise Exception('PDF has not been downloaded for 20 sec')
	cls.download_and_process_file(wait, tmp_dir, download_dir, date, iterator+1)

	@staticmethod
	def clear_directory(dir: str):
	# clean up tmp dir
	for file in listdir(dir):
	os.remove(join(dir, file))


	class CustomExpectedConditions:

	class presence_of_element_located_with_text(object):
	"""
	An expectation for checking that an element is present and has a certain text.
	locator - used to find the element
	returns the WebElement once it is located
	"""

	def __init__(self, locator, text, exact):
	self.locator = locator
	self.text = text
	self.exact = exact

	def __call__(self, driver):
	try:
	els = driver.find_elements(*self.locator)
	for el in els:
	if self.exact and self.text == el.text:
	return el
	elif not self.exact and self.text in el.text:
	return el
	return False
	except (NoSuchElementException, StaleElementReferenceException):
	return False


	class presence_of_element_located_with_text_in_value(object):
	"""
	An expectation for checking that an element is present and has a certain text.
	locator - used to find the element
	returns the WebElement once it is located
	"""

	def __init__(self, locator, text, exact):
	self.locator = locator
	self.text = text
	self.exact = exact

	def __call__(self, driver):
	try:
	els = driver.find_elements(*self.locator)
	for el in els:
	if self.exact and self.text == el.get_attribute('value'):
	return el
	elif not self.exact and self.text in el.get_attribute('value'):
	return el
	return False
	except (NoSuchElementException, StaleElementReferenceException):
	return False


	class presence_of_element_located_with_different_id(object):
	"""An expectation for finding an element and checking
	it has a different ID to a previous result
	locator - used to find the element
	returns the WebElement once it is located
	"""
	def __init__(self, func, args, id_):
	self.func = func
	self.args = args
	self.id = id_

	def __call__(self, driver):
	el = self.func(*self.args).__call__(driver)
	if el and el._id != self.id:
	return el
	return False


	class presence_of_elements_located_with_different_ids(object):
	"""An expectation for finding a list of elements and checking
	the elements have different IDs to a previous result
	locator - used to find the element
	returns the list of WebElements once it is located
	"""
	def __init__(self, func, args, ids):
	self.func = func
	self.args = args
	self.ids = ids

	def __call__(self, driver):
	els = self.func(*self.args).__call__(driver)
	if els:
	print([el.id for el in els])
	for el in els:
	if el and el.id in self.ids:
	return False
	return els
	return False


	class number_of_elements_located:
	"""
	An expectation for checking that there are a certain number of elements with a particular css class.
	locator - used to find the element
	returns the list of WebElements when there are a sufficient number
	"""

	def __init__(self, locator, number):
	self.locator = locator
	self.number = number

	def __call__(self, driver):
	els = driver.find_elements(*self.locator) # Finding the referenced elements
	if len(els) < self.number:
	return False
	else:
	return els


	class absence_of_element_located:
	"""
	An expectation for checking that an element is not present.
	locator - used to find the element
	returns True when the element is not present
	"""

	def __init__(self, locator):
	self.locator = locator

	def __call__(self, driver):
	try:
	driver.find_element(*self.locator) # Finding the referenced elements
	return False
	except NoSuchElementException:
	return True


	class second_window_to_be_opened_and_switch_to_it(object):
	"""
	An expectation for checking whether a second window is available to
	switch to. If the window is available it switches the given driver to the
	specified window.
	"""

	def __call__(self, driver):
	if len(driver.window_handles) == 2:
	driver.switch_to_window(driver.window_handles[1])
	return True
	else:
	return False


	class presence_of_any_element_located:
	"""
	An expectation for checking that any element in a list is present.
	locator_list - list of locators used to find the element
	returns a WebElement once it is located, and the index of the locator which matched
	"""

	def __init__(self, locator_list):
	self.locator_list = locator_list

	def __call__(self, driver):
	for index, locator in enumerate(self.locator_list):
	try:
	el = driver.find_element(*locator) # Finding the referenced elements
	return el, index
	except NoSuchElementException:
	pass
	return False


	class presence_of_all_elements_located:
	"""
	An expectation for checking that all element in a list are present.
	locator_list - list of locators used to find the element
	returns True when all elements are present
	"""

	def __init__(self, locator_list):
	self.num_elements = len(locator_list)
	self.locator_list = locator_list

	def __call__(self, driver):
	num_elements = 0
	for locator in self.locator_list:
	try:
	driver.find_element(*locator) # Finding the referenced elements
	num_elements += 1
	except NoSuchElementException:
	pass
	return num_elements == self.num_elements # True if all elements found