Skip to content

Instantly share code, notes, and snippets.

@ejmurray
Created June 17, 2015 16:09
Show Gist options
  • Save ejmurray/d9d8a22f3291e0b286f8 to your computer and use it in GitHub Desktop.
Save ejmurray/d9d8a22f3291e0b286f8 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
__author__ = 'Ernest'
# http://goo.gl/uXcUob for the final getting the information from the PubmedID
from Bio import Entrez
Entrez.email = "[email protected]"
for single_term in ["The metabolic fate of 14C-ximoprofen in rats, baboons and humans"]:
data = Entrez.esearch(db="pubmed",term = single_term)
res = Entrez.read(data)
pmids = res["IdList"]
print 'PMIDs for %s: %s' % (single_term, pmids)
#!/usr/bin/env python
##################
# PYTHON SCRIPT
# PERFORM WEBSITE SCRAPE OF PUBMED
# PULL RELEVANT ARTICLE INFO FROM WEBPAGE
# FORMAT CONTENT FOR WIKI TEMPLATE
# REQUIRES 'BeautifulSoup'
# AUTHOR: BRADLEY MONK
# LICENSE: GNU
#################
import re
# re.compile('<title>(.*)</title>')
import urllib2
from bs4 import BeautifulSoup
soup = BeautifulSoup(urllib2.urlopen('http://www.ncbi.nlm.nih.gov/pubmed/2336835').read())
print("#################------------------#################")
#------- pubmed authors ---------#
print("{{Article|")
div_tag = soup.find_all('div', attrs={"class": "auths"})
for div_tag.a in div_tag:
diva = div_tag.a
for string in diva.strings:
auts = string
print(string)
#------- pubmed authors ---------#
print(auts)
#------- pubmed year ------------#
print("|")
jouryear = soup.find_all(attrs={"class": "cit"})
year = jouryear[0].get_text()
yearlength = len(year)
titleend = year.find(".")
year1 = titleend+2
year2 = year1+1
year3 = year2+1
year4 = year3+1
year5 = year4+1
print(year[year1:year5])
#------- pubmed year ------------#
#------- pubmed journal ---------#
journal = soup.find_all(attrs={"class": "cit"})
print("|")
print(journal[0].a.string)
#------- pubmed journal ---------#
#--------- pubmed PMID -----------#
PMID = soup.find_all(attrs={"class": "rprtid"})
print("|")
print(PMID[0].dd.string)
#--------- pubmed PMID -----------#
link = soup.find_all(attrs={"class": "linkoulist"})
print("|")
print(PMID[0].dd.string)
#------- pubmed title ---------#
title = soup.find_all(attrs={"class": "rprt abstract"})
print("|")
print(title[0].h1.string)
#------- pubmed title ---------#
# print("}}")
# print("{{ExpandBox|Expand to view experiment summary|")
#------- pubmed abstract ---------#
# abstract = soup.find_all(attrs={"class": "abstr"})
# print(abstract[0].p.string)
#------- pubmed abstract ---------#
print("}}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment