Created
June 3, 2016 16:35
-
-
Save markuskreitzer/3f2b94688ae3fc7a1b9b335677a09a41 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
import urllib2 | |
from bs4 import BeautifulSoup | |
fortune_500 = 'http://fortune.com/fortune500/2015' | |
page = urllib2.urlopen(fortune_500) | |
#page = open('boo.html') | |
soup = BeautifulSoup(page, 'lxml') | |
# This is how I found it and a better way to drill down to it to avoid generic class names. | |
#print soup.prettify() | |
#results = soup.find_all("div", {"class": "company-franchise-result-content current"})[0] | |
#for list_item in results.find_all("li",{"class": "company-list-item icon-new-right-arrow"}): | |
# print list_item.find("span", {"class": "company-name"}).text | |
# This is a quick and dirty way. | |
companies = [list_item.text for list_item in soup.find_all("span",{"class":"company-name"})] | |
print companies | |
# Or pretty | |
#for company in companies: | |
# print company |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment