Instantly share code, notes, and snippets.
Last active
April 22, 2025 05:43
-
Star
1
(1)
You must be signed in to star a gist -
Fork
0
(0)
You must be signed in to fork a gist
-
Save computron/b073d7e5962f30327f23b966d33c78c2 to your computer and use it in GitHub Desktop.
Script to get all your coauthors, last date of collaboration, and number of times collaborated. Author: Anubhav Jain (https://github.com/computron) Video tutorial: https://youtu.be/vgEWvP3M02w
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Script to get all your coauthors, last date of collaboration, and number of times collaborated | |
Author: Anubhav Jain (https://github.com/computron) | |
Video tutorial: https://youtu.be/vgEWvP3M02w | |
Troubleshooting: | |
1. Make sure you use quotes if there is a space, e.g., --institution "Stanford University" with quotes | |
2. Make sure you have a Scopus subscription; you may need to VPN into your institution or be on an institutional wifi network. e.g., this should solve "pybliometrics.scopus.exception.Scopus401Error: The requestor is not authorized to access the requested view or fields of the resource" | |
3. Try using the --authorid flag instead of --lastname, --firstname, and --institution. You can find an author id at: https://www.scopus.com/freelookup/form/author.uri | |
4. Make sure your API key is configured correctly / switch your API key to a new one. You can adjust settings in: $HOME/.config/pybliometrics.cfg | |
""" | |
import argparse | |
from pybliometrics.scopus import AuthorSearch, ScopusSearch, AbstractRetrieval, AuthorRetrieval | |
from collections import Counter | |
import sys | |
from tqdm import tqdm | |
import pybliometrics | |
pybliometrics.scopus.init() | |
# Parse command-line arguments | |
parser = argparse.ArgumentParser(description="Retrieve Scopus co-author data for a given author.") | |
# User-defined parameters as command-line arguments | |
parser.add_argument("--lastname", required=False, default=None, help="Target author's last name") | |
parser.add_argument("--firstname", required=False, default=None, help="Target author's first name") | |
parser.add_argument("--institution", required=False, default=None, help="Target author's institution") | |
parser.add_argument("--authorid", required=False, default=None, help="Scopus author ID (optional)") | |
parser.add_argument("--year_min", required=False, type=int, default=None, help="Minimum publication year (inclusive)") | |
parser.add_argument("--year_max", required=False, type=int, default=None, help="Maximum publication year (inclusive)") | |
parser.add_argument("--additional_query", required=False, default="", help="Additional search query parameters") | |
parser.add_argument("--output_template", required=False, default="{last_name}\t{first_name}\t{affiliation_name}\t{lastyear}\t{count}", help="Output template format, e.g. {last_name}\t{first_name}\t{affiliation_name}\t{lastyear}\t{count}") | |
parser.add_argument("--output_file", required=False, default=None, help="Output file name (if not provided, results are printed to console)") | |
args = parser.parse_args() | |
# Assign arguments to variables | |
target_author_lastname = args.lastname | |
target_author_firstname = args.firstname | |
target_author_institution = args.institution | |
target_authorid = args.authorid | |
year_min = args.year_min | |
year_max = args.year_max | |
additional_query = args.additional_query | |
output_template = args.output_template | |
output_file = args.output_file | |
if not target_authorid and target_author_lastname is None: | |
print(f"Must provide either a target_authorid OR lastname! Use the --help flag to see usage.") | |
sys.exit() | |
# Find target author if not specified | |
if not target_authorid: | |
query = f"AUTHLAST({target_author_lastname})" | |
if target_author_firstname: | |
query += f" AND AUTHFIRST({target_author_firstname})" | |
if target_author_institution: | |
query += f" AND AFFIL({target_author_institution})" | |
author_search = AuthorSearch(query) | |
if author_search.authors: | |
if len(author_search.authors) == 1: | |
target_authorid = (author_search.authors[0].eid.split('-')[-1]) | |
else: | |
print(f"Multiple authors found matching the criteria: {query}") | |
print("Try looking up the author id at: https://www.scopus.com/freelookup/form/author.uri") | |
print("Then use the --authorid flag instead to set the author id manually.") | |
sys.exit() | |
else: | |
print(f"No authors found matching the criteria: {query}") | |
print("Try looking up the author id at: https://www.scopus.com/freelookup/form/author.uri") | |
print("Then use the --authorid flag instead to set the author id manually.") | |
sys.exit() | |
# Initialize dictionaries and counters for co-author data | |
coauthorids_lastdate = {} # Tracks the most recent collaboration date for each co-author | |
coauthorids_count = Counter() # Tracks the number of collaborations with each co-author | |
# Search for all documents from the target author | |
if year_min is not None: | |
additional_query += f" AND PUBYEAR > {year_min - 1}" | |
if year_max is not None: | |
additional_query += f" AND PUBYEAR < {year_max + 1}" | |
query = f"AU-ID({target_authorid}){additional_query}" | |
search = ScopusSearch(query, view="COMPLETE") | |
# Process each document retrieved from Scopus to track coauthors, lastdate, and count | |
for doc in tqdm(search.results, desc=f"Collecting all documents for query: {query}"): | |
doc_dict = doc._asdict() # Convert the document to a dictionary | |
# Extract co-author IDs and publication date | |
coauthorids_string = doc_dict.get("author_ids") | |
pubdate = doc_dict.get("coverDate") | |
if coauthorids_string and coauthorids_string.strip(): # Ensure co-author IDs are not empty | |
coauthorids_list = coauthorids_string.split(";") | |
for coauthorid in coauthorids_list: | |
if coauthorid != str(target_authorid): # Exclude the target author ID | |
coauthorids_count.update([coauthorid]) # Update the collaboration count | |
# Update the last collaboration date if it's more recent | |
if coauthorid not in coauthorids_lastdate or coauthorids_lastdate[coauthorid] < pubdate: | |
coauthorids_lastdate[coauthorid] = pubdate | |
all_data = [] | |
# Process each co-author and output their information | |
for coauthorid, lastdate in tqdm(coauthorids_lastdate.items(), desc="Getting information on every coauthor"): | |
try: | |
author = AuthorRetrieval(coauthorid) # Retrieve co-author details from Scopus | |
try: | |
# Get the primary affiliation name | |
primary_affiliation = author.affiliation_current[0] | |
primary_affiliation_name = ( | |
primary_affiliation.preferred_name | |
if primary_affiliation.parent_preferred_name is None | |
else primary_affiliation.parent_preferred_name | |
) | |
except Exception: | |
primary_affiliation_name = "Unknown" # Handle cases where affiliation is unavailable | |
# Extract the year from the last collaboration date | |
lastyear = lastdate.split("-")[0] | |
# Format the output data - note some fields may be unused in the template | |
data = output_template.format( | |
coauthorid=coauthorid, | |
last_name=author.surname, | |
first_name=author.given_name, | |
affiliation_name=primary_affiliation_name, | |
lastdate=lastdate, | |
lastyear=lastyear, | |
count=coauthorids_count[coauthorid] | |
) | |
all_data.append(data) | |
except Exception: | |
print(f"Unable to find information for coauthorid: {coauthorid}") | |
all_data.sort() # alphabetically sort the entries | |
# Write data to the output file or print it to the console | |
if output_file: | |
with open(output_file, "w") as f: | |
f.write(output_template + "\n") | |
f.write("\n".join(all_data)) | |
else: | |
print("\n".join(all_data)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment