Last active
January 4, 2016 09:19
-
-
Save woemler/8601450 to your computer and use it in GitHub Desktop.
Searches Sanger's COSMIC cell line database and retrieves basic sample metadata.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from BeautifulSoup import BeautifulSoup | |
import urllib2 | |
import re | |
def fetch_page_soup(url): | |
""" Fetches page data from a URL and returns a parsed BeautifulSoup object """ | |
try: | |
response = urllib2.urlopen(url) | |
soup = BeautifulSoup(response.read()) | |
finally: | |
if response: | |
response.close() | |
return soup | |
def find_cosmic_cell_line(cosmic_id): | |
""" Returns a COSMIC cell line's annotation, given a COSMIC ID. """ | |
url = r'http://cancer.sanger.ac.uk/cosmic/sample/overview?id=%s'%(str(cosmic_id)) | |
soup = fetch_page_soup(url) | |
metadata = {} | |
#The sample metadata is stored in the "overview" tab | |
if soup.find("div", id="overview"): | |
soup = soup.find("div", id="overview").find("div", {"class":re.compile("w75")}) | |
#Zip the metadata up into a dictionary | |
metadata = dict(zip([x.string for x in soup.findAll("dt")], [x.string for x in soup.findAll("dd")])) | |
#The sample name will not properly parse this way, so we have to pluck it out separately. | |
metadata["Sample name"] = soup.find(text="Sample name").findNext("dd").find("a").string | |
return metadata | |
if __name__ == "__main__": | |
cosmic_id = 905965 | |
results = find_cosmic_cell_line(id) | |
for k,v in results.items(): | |
print "%s\t%s"%(k,v) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment