Last active
October 11, 2016 01:15
-
-
Save cbiggins/ee540f76d44db3f32ce2af0b3fcc5065 to your computer and use it in GitHub Desktop.
This gist is a python Selenium test. It loads a page, scrapes links, selects one at random (making sure its matching the right domain) and then checks that page for a Javascript object (in this scenario the object is Nielsen analytics but could be anything.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ScriptName : spiderTestNielssen.py | |
#--------------------- | |
import unittest | |
import random | |
from selenium import webdriver | |
from selenium.webdriver.common.keys import Keys | |
#Following are optional required | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import Select | |
from selenium.common.exceptions import NoSuchElementException | |
class SpiderNielsenTest(unittest.TestCase): | |
def setUp(self): | |
self.driver = webdriver.Firefox() | |
def test_for_nielsen_in_site(self): | |
driver = self.driver | |
visitedLinks = [] | |
visitedLinksCount = 0; | |
visitedLinksLimit = 100; | |
testDomain = "http://www.example.com" | |
driver.get(testDomain) | |
links = driver.find_elements_by_tag_name('a') | |
assert "object" in driver.execute_script("return typeof NOLCMB;") | |
# The loop is large but will break when visitedLinksLimit is reached. | |
for i in range(0, 1000): | |
# Get a random link from the page. | |
curLink = random.choice(links) | |
curLinkHref = curLink.get_attribute('href') | |
# Discard empty links. | |
if not curLinkHref: | |
continue; | |
# Have we already seen this link? | |
if curLinkHref not in visitedLinks: | |
if testDomain in curLinkHref: | |
print curLinkHref | |
driver.get(curLinkHref) | |
try: | |
assert "object" in driver.execute_script("return typeof NOLCMB;") | |
except AssertionError: | |
# Object doesn't exist. Its possible it hasn't been initialised so ... | |
print "NO NIELSEN - FIRST RUN!" | |
# We'll wait 10 seconds and try again. | |
driver.implicitly_wait(10) | |
assert "object" in driver.execute_script("return typeof NOLCMB;") | |
# Test will fail here if object still doesn't exist after 10 seconds. | |
# Create a new list of links. | |
links = driver.find_elements_by_tag_name('a') | |
# Make sure we don't visit the same link twice. We'll log them in this list. | |
visitedLinks.append(curLinkHref) | |
visitedLinksCount += 1 | |
else: | |
continue | |
else: | |
continue | |
# Break if we've hit our limit. | |
if visitedLinksCount is visitedLinksLimit: | |
break; | |
print visitedLinks | |
assert "object" in driver.execute_script("return typeof NOLCMB;") | |
def tearDown(self): | |
self.driver.close() | |
if __name__ == "__main__": | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment