Last active
June 3, 2023 15:57
-
-
Save SemiQuant/7ceb5456f0a7331eda660ba2f9beb1a5 to your computer and use it in GitHub Desktop.
Summarize the publication of a researcher, and calculate their IFs for first and last authorships in a data range
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/python | |
from Bio import Entrez | |
import subprocess | |
import ast | |
import statistics | |
def search_publications(author_name, start_year, end_year): | |
# Search PubMed for publications by the author within the specified date range | |
query = f'({author_name}[Author]) AND (Journal Article[Publication Type]) NOT (Clinical Trial[Publication Type]) NOT preprint[pt] NOT review[pt] AND ({start_year}:{end_year}[PDAT])' | |
handle = Entrez.esearch(db='pubmed', term=query) | |
record = Entrez.read(handle) | |
handle.close() | |
# Retrieve publication IDs | |
id_list = record['IdList'] | |
# Retrieve publication details | |
publications = [] | |
for pub_id in id_list: | |
pub_info = {'id': pub_id} | |
handle = Entrez.esummary(db='pubmed', id=pub_id) | |
summary = Entrez.read(handle)[0] | |
handle.close() | |
pub_info['title'] = summary['Title'] | |
pub_info['authors'] = summary['AuthorList'] | |
pub_info['first_author'] = pub_info['authors'][0] | |
pub_info['last_author'] = pub_info['authors'][-1] | |
if 'PubTypeList' in summary and summary['PubTypeList']: | |
pub_info['article_type'] = summary['PubTypeList'][0] | |
else: | |
pub_info['article_type'] = 'N/A' | |
# Retrieve journal name | |
pub_info['journal_name'] = summary['FullJournalName'] | |
publications.append(pub_info) | |
return publications | |
def get_if(journal_name): | |
# journal_name = "nature" | |
command = ["impact_factor", "search", journal_name] | |
result = subprocess.run(command, capture_output=True, text=True) | |
try: | |
if result.returncode == 0: | |
output = result.stdout.strip() | |
output = ast.literal_eval(output) | |
output = output[0]['factor'] | |
else: | |
output = 0 | |
except: | |
output = 0 | |
return output | |
# Main program | |
author_name = input('Enter author name: ') | |
start_year = int(input('Enter start year: ')) | |
end_year = int(input('Enter end year: ')) | |
# author_name = 'john Metcalfe' | |
# start_year = 2019 | |
# end_year = 2023 | |
surname = author_name.split()[-1].lower() | |
publications = search_publications(author_name, start_year, end_year) | |
total_publications = len(publications) | |
first_author_count = sum(surname in pub['first_author'].lower() for pub in publications) | |
last_author_count = sum(surname in pub['last_author'].lower() for pub in publications) | |
impacts_all = [] | |
impacts = [] | |
if not publications: | |
print(f"No publications found for '{author_name}' within the specified date range.") | |
else: | |
for pub in publications: | |
imp_tmp = get_if(pub['journal_name']) | |
impacts_all.append(imp_tmp) | |
if surname in pub['first_author'].lower() or surname in pub['last_author'].lower(): | |
print(f"Publication ID: {pub['id']}") | |
print(f"Title: {pub['title']}") | |
print(f"Authors: {', '.join(pub['authors'])}") | |
print(f"Article Type: {pub['article_type']}") | |
print(f"Journal: {pub['journal_name']}") | |
print('\n') | |
impacts.append(imp_tmp) | |
print(f"\nSummary for '{author_name}' (Publications from {start_year} to {end_year}):") | |
print(f"Total publications: {total_publications}") | |
print(f"Number of publications where '{author_name}' is the first author: {first_author_count}") | |
print(f"Number of publications where '{author_name}' is the last author: {last_author_count}") | |
print("\nPublications where the author is the first or last author:\n") | |
print("First or Last") | |
num_zeros = impacts.count(0) | |
impacts = [x for x in impacts if x != 0] | |
median = statistics.median(impacts) | |
iqr = statistics.median_grouped(impacts) | |
print("Number of zeros:", num_zeros) | |
print("Median:", median) | |
print("IQR:", iqr) | |
print("\nALL") | |
num_zeros = impacts_all.count(0) | |
impacts = [x for x in impacts_all if x != 0] | |
median = statistics.median(impacts_all) | |
iqr = statistics.median_grouped(impacts_all) | |
print("Number of zeros:", num_zeros) | |
print("Median:", median) | |
print("IQR:", iqr) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment