danielecook · May 14, 2013 19:59 · mabar1 · Oct 14, 2022
diff --git a/format_pubs.py b/format_pubs.py
 """
 Daniel E. Cook 2013 
 (danielecook.com)

 This script takes a csv containing authors and associated Pubmed identifiers (PMIDs) of their publications and outputs a formatted html document of their publications.
 The first row of the csv should contain the authors, and each row below their publications (as PMIDs). If you put something other than a PMID in it will simply be outputted -
 so you can add publications that might not be in pubmed or that you want to display in a certain way.

 This script might be useful for individuals who maintains publication lists for researchers at a university, for instance. 

 Requires BioPython:
 pip install biopython

 The way the publications are displayed can be customized using CSS. This CSS can be used if desired:

 /* pubs */
 .pub_title {
  font-weight: bold;
 	font-size: 13px;
 	margin: 0px;
 	}
 .pub_authors {
 	color: #929292;
 	font-size: 11px;
 	margin: 0px;
 }
 .pub_info {
 	font-size: 11px;
 }
 .pub_info a {
 	padding-left: 3px;
 	padding-right: 3px;
 }

 """


 from Bio import Entrez
 from Bio import Medline
 import csv
 import os


 # Set your email here.
 email = "[email protected]"

 def f7(seq):
 	""" Removes non-unique items, stolen from stackoverflow (thanks stack overflow!) """
 	seen = set()
 	seen_add = seen.add
 	return [ x for x in seq if x not in seen and not seen_add(x)]

 def csv_dict_array(f):
 	""" Convert CSV to array for each author """
 	f = csv.DictReader(open(f,'rU'),dialect='excel') # U = Universal New Line Dialect
 	# Generate per author dictionary
 	auth_dict = {}
 	for row in f:
 		for auth in row.keys():
 			# Set Default - initialize array; else append.
 			if row[auth] != '':
 				auth_dict.setdefault(auth,[]).append(row[auth])
 	# Remove duplicates
 	for i in auth_dict:
 		auth_dict[i] = list(f7(auth_dict[i]))
 	return auth_dict

 def fetch_pub(pmid):
 	Entrez.email = email
 	recs = []
 	for k,v in enumerate(pmid):
 		print v
 		try:
 			""" Fetches pubmed data on publication using PMID """
 			handle = Entrez.efetch(db="pubmed",id=int(v),retmode="text",rettype="medline")
 			pub = Medline.parse(handle)
 			for p in pub:
 				pubmed_link = "<a class='pub_link' href='http://www.ncbi.nlm.nih.gov/pubmed/%s'>%s</a>" % (p['PMID'],p['PMID'])
 				if 'PMC' in p:
 					pubmed_link += " ( <a class='pmc_link' href='http://www.ncbi.nlm.nih.gov/pmc/articles/%s/'>Full Text</a> )" % (p['PMC'])
 				
 				formatted = """
 				<div class='pub'>
 					<div class='pub_title'>%s</div>
 					<div class='pub_authors'>%s</div>
 					<div class='pub_date'>%s</div>
 					<div class='pub_journal_pages'>%s</div>
 					%s
 				</div>""" % (p['TI'],', '.join(p['AU']),p['DP'],p['SO'],pubmed_link)

 				recs.append(formatted.replace('\t','').strip())
 		except:
 			recs.append(v)
 	
 	return recs


 pubs = csv_dict_array("pubs.csv")
 for auth,pub_list in pubs.items():
 	if not os.path.exists("pubs_formatted"):
 		os.makedirs("pubs_formatted")
 	f = open("pubs_formatted" + "/" + auth + ".txt",'w!a')
 	f.write('\n'.join(fetch_pub(pub_list)))
 	f.close()
	"""
	Daniel E. Cook 2013
	(danielecook.com)

	This script takes a csv containing authors and associated Pubmed identifiers (PMIDs) of their publications and outputs a formatted html document of their publications.
	The first row of the csv should contain the authors, and each row below their publications (as PMIDs). If you put something other than a PMID in it will simply be outputted -
	so you can add publications that might not be in pubmed or that you want to display in a certain way.

	This script might be useful for individuals who maintains publication lists for researchers at a university, for instance.

	Requires BioPython:
	pip install biopython

	The way the publications are displayed can be customized using CSS. This CSS can be used if desired:

	/* pubs */
	.pub_title {
	font-weight: bold;
	font-size: 13px;
	margin: 0px;
	}
	.pub_authors {
	color: #929292;
	font-size: 11px;
	margin: 0px;
	}
	.pub_info {
	font-size: 11px;
	}
	.pub_info a {
	padding-left: 3px;
	padding-right: 3px;
	}

	"""


	from Bio import Entrez
	from Bio import Medline
	import csv
	import os


	# Set your email here.
	email = "[email protected]"

	def f7(seq):
	""" Removes non-unique items, stolen from stackoverflow (thanks stack overflow!) """
	seen = set()
	seen_add = seen.add
	return [ x for x in seq if x not in seen and not seen_add(x)]

	def csv_dict_array(f):
	""" Convert CSV to array for each author """
	f = csv.DictReader(open(f,'rU'),dialect='excel') # U = Universal New Line Dialect
	# Generate per author dictionary
	auth_dict = {}
	for row in f:
	for auth in row.keys():
	# Set Default - initialize array; else append.
	if row[auth] != '':
	auth_dict.setdefault(auth,[]).append(row[auth])
	# Remove duplicates
	for i in auth_dict:
	auth_dict[i] = list(f7(auth_dict[i]))
	return auth_dict

	def fetch_pub(pmid):
	Entrez.email = email
	recs = []
	for k,v in enumerate(pmid):
	print v
	try:
	""" Fetches pubmed data on publication using PMID """
	handle = Entrez.efetch(db="pubmed",id=int(v),retmode="text",rettype="medline")
	pub = Medline.parse(handle)
	for p in pub:
	pubmed_link = "<a class='pub_link' href='http://www.ncbi.nlm.nih.gov/pubmed/%s'>%s</a>" % (p['PMID'],p['PMID'])
	if 'PMC' in p:
	pubmed_link += " ( <a class='pmc_link' href='http://www.ncbi.nlm.nih.gov/pmc/articles/%s/'>Full Text</a> )" % (p['PMC'])

	formatted = """
	<div class='pub'>
	<div class='pub_title'>%s</div>
	<div class='pub_authors'>%s</div>
	<div class='pub_date'>%s</div>
	<div class='pub_journal_pages'>%s</div>
	%s
	</div>""" % (p['TI'],', '.join(p['AU']),p['DP'],p['SO'],pubmed_link)

	recs.append(formatted.replace('\t','').strip())
	except:
	recs.append(v)

	return recs


	pubs = csv_dict_array("pubs.csv")
	for auth,pub_list in pubs.items():
	if not os.path.exists("pubs_formatted"):
	os.makedirs("pubs_formatted")
	f = open("pubs_formatted" + "/" + auth + ".txt",'w!a')
	f.write('\n'.join(fetch_pub(pub_list)))
	f.close()
No results found