Skip to content

Instantly share code, notes, and snippets.

@nblackburn87
Created April 17, 2015 23:07
Show Gist options
  • Save nblackburn87/19f1255f3627f7134b4c to your computer and use it in GitHub Desktop.
Save nblackburn87/19f1255f3627f7134b4c to your computer and use it in GitHub Desktop.
Python Scraper
from bs4 import BeautifulSoup
from urllib3 import PoolManager
from time import sleep
BASE_URL = 'http://www.chicagoreader.com'
def make_soup(url):
html = urlopen(section_url).read()
return BeautifulSoup(html, 'lxml')
def get_category_links(section_url):
soup = make_soup(url)
boccat = coup.find('dl', 'boccat')
category_links = [BASE_URL + dd.a['href'] for dd in boccat.findALL('dd')]
return category_links
def get_category_winner(category_url):
soup = make_soup(url)
category = soup.find('h1', 'headline').string
winner = [h2.string for h2 in soup.findALL('h2', 'boc1')]
runners_up = [h2.string for h2 in soup.findALL('h2', 'boc2')]
return {
'category': category,
'category_url': category_url,
'winner': winner,
'runners_up': runners_up,
}
if __name__ == '__main__':
food_n_drink = ('http://chicagoreader.com/chicago'
'best-of-chicago-2011-food-drink/BestOf?oid=4106228')
categories = get_category_links(food_n_drink)
data = []
for category in categories:
winner = get_category_winner(category)
data.append(winner)
sleep(1)
print data
@nblackburn87
Copy link
Author

Ahh. That makes sense I think. Lemme see what I can get going. Thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment