Skip to content

Instantly share code, notes, and snippets.

@computron
Last active April 22, 2025 05:43
Show Gist options
  • Save computron/b073d7e5962f30327f23b966d33c78c2 to your computer and use it in GitHub Desktop.
Save computron/b073d7e5962f30327f23b966d33c78c2 to your computer and use it in GitHub Desktop.
Script to get all your coauthors, last date of collaboration, and number of times collaborated. Author: Anubhav Jain (https://github.com/computron) Video tutorial: https://youtu.be/vgEWvP3M02w
#!/usr/bin/env python3
"""
Script to get all your coauthors, last date of collaboration, and number of times collaborated
Author: Anubhav Jain (https://github.com/computron)
Video tutorial: https://youtu.be/vgEWvP3M02w
Troubleshooting:
1. Make sure you use quotes if there is a space, e.g., --institution "Stanford University" with quotes
2. Make sure you have a Scopus subscription; you may need to VPN into your institution or be on an institutional wifi network. e.g., this should solve "pybliometrics.scopus.exception.Scopus401Error: The requestor is not authorized to access the requested view or fields of the resource"
3. Try using the --authorid flag instead of --lastname, --firstname, and --institution. You can find an author id at: https://www.scopus.com/freelookup/form/author.uri
4. Make sure your API key is configured correctly / switch your API key to a new one. You can adjust settings in: $HOME/.config/pybliometrics.cfg
"""
import argparse
from pybliometrics.scopus import AuthorSearch, ScopusSearch, AbstractRetrieval, AuthorRetrieval
from collections import Counter
import sys
from tqdm import tqdm
import pybliometrics
pybliometrics.scopus.init()
# Parse command-line arguments
parser = argparse.ArgumentParser(description="Retrieve Scopus co-author data for a given author.")
# User-defined parameters as command-line arguments
parser.add_argument("--lastname", required=False, default=None, help="Target author's last name")
parser.add_argument("--firstname", required=False, default=None, help="Target author's first name")
parser.add_argument("--institution", required=False, default=None, help="Target author's institution")
parser.add_argument("--authorid", required=False, default=None, help="Scopus author ID (optional)")
parser.add_argument("--year_min", required=False, type=int, default=None, help="Minimum publication year (inclusive)")
parser.add_argument("--year_max", required=False, type=int, default=None, help="Maximum publication year (inclusive)")
parser.add_argument("--additional_query", required=False, default="", help="Additional search query parameters")
parser.add_argument("--output_template", required=False, default="{last_name}\t{first_name}\t{affiliation_name}\t{lastyear}\t{count}", help="Output template format, e.g. {last_name}\t{first_name}\t{affiliation_name}\t{lastyear}\t{count}")
parser.add_argument("--output_file", required=False, default=None, help="Output file name (if not provided, results are printed to console)")
args = parser.parse_args()
# Assign arguments to variables
target_author_lastname = args.lastname
target_author_firstname = args.firstname
target_author_institution = args.institution
target_authorid = args.authorid
year_min = args.year_min
year_max = args.year_max
additional_query = args.additional_query
output_template = args.output_template
output_file = args.output_file
if not target_authorid and target_author_lastname is None:
print(f"Must provide either a target_authorid OR lastname! Use the --help flag to see usage.")
sys.exit()
# Find target author if not specified
if not target_authorid:
query = f"AUTHLAST({target_author_lastname})"
if target_author_firstname:
query += f" AND AUTHFIRST({target_author_firstname})"
if target_author_institution:
query += f" AND AFFIL({target_author_institution})"
author_search = AuthorSearch(query)
if author_search.authors:
if len(author_search.authors) == 1:
target_authorid = (author_search.authors[0].eid.split('-')[-1])
else:
print(f"Multiple authors found matching the criteria: {query}")
print("Try looking up the author id at: https://www.scopus.com/freelookup/form/author.uri")
print("Then use the --authorid flag instead to set the author id manually.")
sys.exit()
else:
print(f"No authors found matching the criteria: {query}")
print("Try looking up the author id at: https://www.scopus.com/freelookup/form/author.uri")
print("Then use the --authorid flag instead to set the author id manually.")
sys.exit()
# Initialize dictionaries and counters for co-author data
coauthorids_lastdate = {} # Tracks the most recent collaboration date for each co-author
coauthorids_count = Counter() # Tracks the number of collaborations with each co-author
# Search for all documents from the target author
if year_min is not None:
additional_query += f" AND PUBYEAR > {year_min - 1}"
if year_max is not None:
additional_query += f" AND PUBYEAR < {year_max + 1}"
query = f"AU-ID({target_authorid}){additional_query}"
search = ScopusSearch(query, view="COMPLETE")
# Process each document retrieved from Scopus to track coauthors, lastdate, and count
for doc in tqdm(search.results, desc=f"Collecting all documents for query: {query}"):
doc_dict = doc._asdict() # Convert the document to a dictionary
# Extract co-author IDs and publication date
coauthorids_string = doc_dict.get("author_ids")
pubdate = doc_dict.get("coverDate")
if coauthorids_string and coauthorids_string.strip(): # Ensure co-author IDs are not empty
coauthorids_list = coauthorids_string.split(";")
for coauthorid in coauthorids_list:
if coauthorid != str(target_authorid): # Exclude the target author ID
coauthorids_count.update([coauthorid]) # Update the collaboration count
# Update the last collaboration date if it's more recent
if coauthorid not in coauthorids_lastdate or coauthorids_lastdate[coauthorid] < pubdate:
coauthorids_lastdate[coauthorid] = pubdate
all_data = []
# Process each co-author and output their information
for coauthorid, lastdate in tqdm(coauthorids_lastdate.items(), desc="Getting information on every coauthor"):
try:
author = AuthorRetrieval(coauthorid) # Retrieve co-author details from Scopus
try:
# Get the primary affiliation name
primary_affiliation = author.affiliation_current[0]
primary_affiliation_name = (
primary_affiliation.preferred_name
if primary_affiliation.parent_preferred_name is None
else primary_affiliation.parent_preferred_name
)
except Exception:
primary_affiliation_name = "Unknown" # Handle cases where affiliation is unavailable
# Extract the year from the last collaboration date
lastyear = lastdate.split("-")[0]
# Format the output data - note some fields may be unused in the template
data = output_template.format(
coauthorid=coauthorid,
last_name=author.surname,
first_name=author.given_name,
affiliation_name=primary_affiliation_name,
lastdate=lastdate,
lastyear=lastyear,
count=coauthorids_count[coauthorid]
)
all_data.append(data)
except Exception:
print(f"Unable to find information for coauthorid: {coauthorid}")
all_data.sort() # alphabetically sort the entries
# Write data to the output file or print it to the console
if output_file:
with open(output_file, "w") as f:
f.write(output_template + "\n")
f.write("\n".join(all_data))
else:
print("\n".join(all_data))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment