Skip to content

Instantly share code, notes, and snippets.

@shyuep
Last active November 1, 2024 18:00
Show Gist options
  • Save shyuep/c588b5eb0d623021ffb58fb4e0453d0f to your computer and use it in GitHub Desktop.
Save shyuep/c588b5eb0d623021ffb58fb4e0453d0f to your computer and use it in GitHub Desktop.
Generates a NSF collaborator list from a bibtex.
#!/usr/bin/env python
"""
Script for generating collaborators in a csv file from a bibtex, for easy
insertion into the new NSF format.
"""
from __future__ import division
import datetime
import os
import collections
import csv
from pybtex.database import parse_file, BibliographyData
greek_alphabet = {
u'\u0391': 'Alpha',
u'\u0392': 'Beta',
u'\u0393': 'Gamma',
u'\u0394': 'Delta',
u'\u0395': 'Epsilon',
u'\u0396': 'Zeta',
u'\u0397': 'Eta',
u'\u0398': 'Theta',
u'\u0399': 'Iota',
u'\u039A': 'Kappa',
u'\u039B': 'Lamda',
u'\u039C': 'Mu',
u'\u039D': 'Nu',
u'\u039E': 'Xi',
u'\u039F': 'Omicron',
u'\u03A0': 'Pi',
u'\u03A1': 'Rho',
u'\u03A3': 'Sigma',
u'\u03A4': 'Tau',
u'\u03A5': 'Upsilon',
u'\u03A6': 'Phi',
u'\u03A7': 'Chi',
u'\u03A8': 'Psi',
u'\u03A9': 'Omega',
u'\u03B1': 'alpha',
u'\u03B2': 'beta',
u'\u03B3': 'gamma',
u'\u03B4': 'delta',
u'\u03B5': 'epsilon',
u'\u03B6': 'zeta',
u'\u03B7': 'eta',
u'\u03B8': 'theta',
u'\u03B9': 'iota',
u'\u03BA': 'kappa',
u'\u03BB': 'lamda',
u'\u03BC': 'mu',
u'\u03BD': 'nu',
u'\u03BE': 'xi',
u'\u03BF': 'omicron',
u'\u03C0': 'pi',
u'\u03C1': 'rho',
u'\u03C3': 'sigma',
u'\u03C4': 'tau',
u'\u03C5': 'upsilon',
u'\u03C6': 'phi',
u'\u03C7': 'chi',
u'\u03C8': 'psi',
u'\u03C9': 'omega',
}
greek_alphabet = {v: k for k, v in greek_alphabet.items()}
def process_unicode(s):
m = re.search(r"\$\\(\w+)\$", s)
if m:
replacement = greek_alphabet[m.group(1)]
s = re.sub(r"\$\\(\w+)\$", replacement, s)
return s
def generate_csv(args):
db = parse_file(args.bib)
collaborators = set()
for k, v in db.entries.items():
if v.type == "article":
year = int(v.fields["year"])
if int(year) >= args.year:
for p in v.persons["author"]:
first_middle = p.first_names + p.middle_names
collaborators.add((" ".join(first_middle), " ".join(p.last_names)))
collaborators = sorted(collaborators, key=lambda c: (c[1], c[0]))
collaborators = [{"first_name": c[0], "last_name": c[1]} for c in collaborators]
unfound = []
if args.input_csv:
with open(args.input_csv, 'rt') as f:
for row in csv.reader(f):
if row[0] == "First Name":
continue
first_name, last_name, affiliation = row
found = False
for c in collaborators:
if (c["first_name"].lower() in first_name.lower()) and (c["last_name"].lower() in last_name.lower()):
c["first_name"] = first_name
c["last_name"] = last_name
c["affiliation"] = affiliation
found = True
break
if not found:
unfound.append(row)
unique = []
done = []
for c in collaborators:
full_name = "%s %s" % (c["first_name"].lower(), c["last_name"].lower())
if full_name not in done:
unique.append(c)
done.append(full_name)
collaborators = unique
with open("collaborators_%s.csv" % datetime.datetime.now().date(), "wt") as f:
writer = csv.writer(f)
writer.writerow(["First Name", "Last Name", "Affiliation"])
for c in collaborators:
writer.writerow([c["first_name"], c["last_name"], c.get("affiliation", "")])
if unfound:
print("Old collaborators")
print("-----------------")
for c in unfound:
print(", ".join(c))
def csv2str(args):
home_institution = args.excluded_institution or "-"
with open(args.csv, "rt") as f:
output = []
for row in csv.reader(f):
if row[0] == "First Name":
continue
if home_institution not in row[-1]:
output.append("%s %s, %s" % (row[0], row[1], row[2]))
print("; ".join(output))
if __name__ == "__main__":
import argparse
import sys
desc = """
This script helps automate the generation and updating of collaborator lists from
bibtex files and/or csv file. Note that the first time this script is used, the csv
generated do not contain affiliations. These have to be entered by hand.
Subsequently, supply old processed csv using -i and affiliations will be obtained
from the old list where possible. After you generate the csv, you can convert it to
a string for pasting into the biosketch using make_str.
"""
p = argparse.ArgumentParser(
description="Generate collaborator list for proposals.",
epilog="Author: Shyue Ping Ong")
sp = p.add_subparsers()
sp_csv = sp.add_parser(
"make_csv", help="Make csv file from bibtex.")
sp_csv.add_argument("bib", metavar="bib", type=str,
help="Bibtex file to process")
sp_csv.add_argument("-y", "--year", dest="year",
type=int, default=datetime.datetime.now().year - 4,
help="Year from which to update. Defaults to current year - 4, based on the usual NSF guideline of past 48 months.")
sp_csv.add_argument("-i", "--input_csv", dest="input_csv",
type=str,
help="An input CSV file. This is used mainly for prior information on institutions.")
sp_csv.set_defaults(func=generate_csv)
sp_str = sp.add_parser(
"make_str", help="Convert data from a csv to a string for pasting into biosketches")
sp_str.add_argument("csv", metavar="csv", type=str,
help="Csv file to process")
sp_str.add_argument("-e", "--excluded_institution", dest="excluded_institution",
type=str,
help="Supply a home institution to exclude collaborators within the same instutition. A sufficiently unique part of the string should suffice.")
sp_str.set_defaults(func=csv2str)
args = p.parse_args()
try:
getattr(args, "func")
except AttributeError:
p.print_help()
sys.exit(0)
args.func(args)
@jayhesselberth
Copy link

need an import sys on line 146.

@shyuep
Copy link
Author

shyuep commented May 4, 2023

Thanks. Just pushed a fix.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment