Skip to content

Instantly share code, notes, and snippets.

@fsouza
Created May 7, 2011 01:22
Show Gist options
  • Save fsouza/960106 to your computer and use it in GitHub Desktop.
Save fsouza/960106 to your computer and use it in GitHub Desktop.
A script that navigates on a Wordpress blog, visiting all posts, and a customized collection of links (using splinter)
from splinter.browser import Browser
def navigate_on_blog(blog_url, entry_selector, previous_selector, *link_selectors):
browser = Browser('webdriver.firefox')
browser.visit(blog_url)
browser.find_by_css(entry_selector).first.click()
while (browser.is_element_present_by_css(previous_selector)):
element = browser.find_by_css(previous_selector).first
print 'Clicking %s... ' % element['href'],
try:
element.click()
print 'ok'
except KeyboardInterrupt:
raise
except:
print 'fail'
browser.visit(blog_url)
all_links = []
for link_selector in link_selectors:
all_links.extend(browser.find_by_css(link_selector))
urls = [link['href'] for link in all_links]
for url in urls:
if blog_url not in url:
url = '%s/%s' % (blog_url.rstrip('/'), url.lstrip('/'))
print 'Touching %s... ' % url,
try:
browser.visit(url)
print 'ok'
except KeyboardInterrupt:
raise
except:
print 'fail'
browser.quit()
if __name__ == '__main__':
navigate_on_blog('http://www.franciscosouza.com', 'h2.title a', 'div.nav-previous a', 'div#menus ul li a', 'div.tagcloud a')
navigate_on_blog('http://www.franciscosouza.com.br', 'div.post h2 a', 'div.fleft a[rel="prev"]', 'div.horizmenu ul li a', 'div.tagcloud a')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment