Created
June 17, 2015 16:09
-
-
Save ejmurray/d9d8a22f3291e0b286f8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
__author__ = 'Ernest' | |
# http://goo.gl/uXcUob for the final getting the information from the PubmedID | |
from Bio import Entrez | |
Entrez.email = "[email protected]" | |
for single_term in ["The metabolic fate of 14C-ximoprofen in rats, baboons and humans"]: | |
data = Entrez.esearch(db="pubmed",term = single_term) | |
res = Entrez.read(data) | |
pmids = res["IdList"] | |
print 'PMIDs for %s: %s' % (single_term, pmids) | |
#!/usr/bin/env python | |
################## | |
# PYTHON SCRIPT | |
# PERFORM WEBSITE SCRAPE OF PUBMED | |
# PULL RELEVANT ARTICLE INFO FROM WEBPAGE | |
# FORMAT CONTENT FOR WIKI TEMPLATE | |
# REQUIRES 'BeautifulSoup' | |
# AUTHOR: BRADLEY MONK | |
# LICENSE: GNU | |
################# | |
import re | |
# re.compile('<title>(.*)</title>') | |
import urllib2 | |
from bs4 import BeautifulSoup | |
soup = BeautifulSoup(urllib2.urlopen('http://www.ncbi.nlm.nih.gov/pubmed/2336835').read()) | |
print("#################------------------#################") | |
#------- pubmed authors ---------# | |
print("{{Article|") | |
div_tag = soup.find_all('div', attrs={"class": "auths"}) | |
for div_tag.a in div_tag: | |
diva = div_tag.a | |
for string in diva.strings: | |
auts = string | |
print(string) | |
#------- pubmed authors ---------# | |
print(auts) | |
#------- pubmed year ------------# | |
print("|") | |
jouryear = soup.find_all(attrs={"class": "cit"}) | |
year = jouryear[0].get_text() | |
yearlength = len(year) | |
titleend = year.find(".") | |
year1 = titleend+2 | |
year2 = year1+1 | |
year3 = year2+1 | |
year4 = year3+1 | |
year5 = year4+1 | |
print(year[year1:year5]) | |
#------- pubmed year ------------# | |
#------- pubmed journal ---------# | |
journal = soup.find_all(attrs={"class": "cit"}) | |
print("|") | |
print(journal[0].a.string) | |
#------- pubmed journal ---------# | |
#--------- pubmed PMID -----------# | |
PMID = soup.find_all(attrs={"class": "rprtid"}) | |
print("|") | |
print(PMID[0].dd.string) | |
#--------- pubmed PMID -----------# | |
link = soup.find_all(attrs={"class": "linkoulist"}) | |
print("|") | |
print(PMID[0].dd.string) | |
#------- pubmed title ---------# | |
title = soup.find_all(attrs={"class": "rprt abstract"}) | |
print("|") | |
print(title[0].h1.string) | |
#------- pubmed title ---------# | |
# print("}}") | |
# print("{{ExpandBox|Expand to view experiment summary|") | |
#------- pubmed abstract ---------# | |
# abstract = soup.find_all(attrs={"class": "abstr"}) | |
# print(abstract[0].p.string) | |
#------- pubmed abstract ---------# | |
print("}}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment