Skip to content

Instantly share code, notes, and snippets.

@ginixsan
Forked from SemiQuant/ImpactHunter.py
Created June 3, 2023 15:57
Show Gist options
  • Save ginixsan/2514adafce3a4a4a4ec3df60573a1af5 to your computer and use it in GitHub Desktop.
Save ginixsan/2514adafce3a4a4a4ec3df60573a1af5 to your computer and use it in GitHub Desktop.
Summarize the publication of a researcher, and calculate their IFs for first and last authorships in a data range
#!/bin/python
from Bio import Entrez
import subprocess
import ast
import statistics
def search_publications(author_name, start_year, end_year):
# Search PubMed for publications by the author within the specified date range
query = f'({author_name}[Author]) AND (Journal Article[Publication Type]) NOT (Clinical Trial[Publication Type]) NOT preprint[pt] NOT review[pt] AND ({start_year}:{end_year}[PDAT])'
handle = Entrez.esearch(db='pubmed', term=query)
record = Entrez.read(handle)
handle.close()
# Retrieve publication IDs
id_list = record['IdList']
# Retrieve publication details
publications = []
for pub_id in id_list:
pub_info = {'id': pub_id}
handle = Entrez.esummary(db='pubmed', id=pub_id)
summary = Entrez.read(handle)[0]
handle.close()
pub_info['title'] = summary['Title']
pub_info['authors'] = summary['AuthorList']
pub_info['first_author'] = pub_info['authors'][0]
pub_info['last_author'] = pub_info['authors'][-1]
if 'PubTypeList' in summary and summary['PubTypeList']:
pub_info['article_type'] = summary['PubTypeList'][0]
else:
pub_info['article_type'] = 'N/A'
# Retrieve journal name
pub_info['journal_name'] = summary['FullJournalName']
publications.append(pub_info)
return publications
def get_if(journal_name):
# journal_name = "nature"
command = ["impact_factor", "search", journal_name]
result = subprocess.run(command, capture_output=True, text=True)
try:
if result.returncode == 0:
output = result.stdout.strip()
output = ast.literal_eval(output)
output = output[0]['factor']
else:
output = 0
except:
output = 0
return output
# Main program
author_name = input('Enter author name: ')
start_year = int(input('Enter start year: '))
end_year = int(input('Enter end year: '))
# author_name = 'john Metcalfe'
# start_year = 2019
# end_year = 2023
surname = author_name.split()[-1].lower()
publications = search_publications(author_name, start_year, end_year)
total_publications = len(publications)
first_author_count = sum(surname in pub['first_author'].lower() for pub in publications)
last_author_count = sum(surname in pub['last_author'].lower() for pub in publications)
impacts_all = []
impacts = []
if not publications:
print(f"No publications found for '{author_name}' within the specified date range.")
else:
for pub in publications:
imp_tmp = get_if(pub['journal_name'])
impacts_all.append(imp_tmp)
if surname in pub['first_author'].lower() or surname in pub['last_author'].lower():
print(f"Publication ID: {pub['id']}")
print(f"Title: {pub['title']}")
print(f"Authors: {', '.join(pub['authors'])}")
print(f"Article Type: {pub['article_type']}")
print(f"Journal: {pub['journal_name']}")
print('\n')
impacts.append(imp_tmp)
print(f"\nSummary for '{author_name}' (Publications from {start_year} to {end_year}):")
print(f"Total publications: {total_publications}")
print(f"Number of publications where '{author_name}' is the first author: {first_author_count}")
print(f"Number of publications where '{author_name}' is the last author: {last_author_count}")
print("\nPublications where the author is the first or last author:\n")
print("First or Last")
num_zeros = impacts.count(0)
impacts = [x for x in impacts if x != 0]
median = statistics.median(impacts)
iqr = statistics.median_grouped(impacts)
print("Number of zeros:", num_zeros)
print("Median:", median)
print("IQR:", iqr)
print("\nALL")
num_zeros = impacts_all.count(0)
impacts = [x for x in impacts_all if x != 0]
median = statistics.median(impacts_all)
iqr = statistics.median_grouped(impacts_all)
print("Number of zeros:", num_zeros)
print("Median:", median)
print("IQR:", iqr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment