Last active
August 29, 2015 14:02
-
-
Save Andygmb/260183b1a05cb66ccc2e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
r = requests.get("http://www.sciencemag.org/content/current") | |
soup = BeautifulSoup(r.content) | |
data = soup.findAll("div", {"class":"level2"}) | |
#Find the "research article" and get it's index. | |
for block in data: | |
if block.find("h3", {"id":"ResearchArticles"}) is not None: | |
index = data.index(block) | |
break | |
#set data to equal everything after the research article block. | |
data = data[index:] | |
#loop through the blocks and get the titles. | |
for block in data: | |
section = block.find("ul",{"class":"cit-list"}) | |
titles_list = section.findAll("h4",{"class":"cit-title-group"}) | |
for title in titles_list: | |
print title.text.encode("utf-8") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment