Last active
November 1, 2024 18:00
-
-
Save shyuep/c588b5eb0d623021ffb58fb4e0453d0f to your computer and use it in GitHub Desktop.
Generates a NSF collaborator list from a bibtex.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Script for generating collaborators in a csv file from a bibtex, for easy | |
insertion into the new NSF format. | |
""" | |
from __future__ import division | |
import datetime | |
import os | |
import collections | |
import csv | |
from pybtex.database import parse_file, BibliographyData | |
greek_alphabet = { | |
u'\u0391': 'Alpha', | |
u'\u0392': 'Beta', | |
u'\u0393': 'Gamma', | |
u'\u0394': 'Delta', | |
u'\u0395': 'Epsilon', | |
u'\u0396': 'Zeta', | |
u'\u0397': 'Eta', | |
u'\u0398': 'Theta', | |
u'\u0399': 'Iota', | |
u'\u039A': 'Kappa', | |
u'\u039B': 'Lamda', | |
u'\u039C': 'Mu', | |
u'\u039D': 'Nu', | |
u'\u039E': 'Xi', | |
u'\u039F': 'Omicron', | |
u'\u03A0': 'Pi', | |
u'\u03A1': 'Rho', | |
u'\u03A3': 'Sigma', | |
u'\u03A4': 'Tau', | |
u'\u03A5': 'Upsilon', | |
u'\u03A6': 'Phi', | |
u'\u03A7': 'Chi', | |
u'\u03A8': 'Psi', | |
u'\u03A9': 'Omega', | |
u'\u03B1': 'alpha', | |
u'\u03B2': 'beta', | |
u'\u03B3': 'gamma', | |
u'\u03B4': 'delta', | |
u'\u03B5': 'epsilon', | |
u'\u03B6': 'zeta', | |
u'\u03B7': 'eta', | |
u'\u03B8': 'theta', | |
u'\u03B9': 'iota', | |
u'\u03BA': 'kappa', | |
u'\u03BB': 'lamda', | |
u'\u03BC': 'mu', | |
u'\u03BD': 'nu', | |
u'\u03BE': 'xi', | |
u'\u03BF': 'omicron', | |
u'\u03C0': 'pi', | |
u'\u03C1': 'rho', | |
u'\u03C3': 'sigma', | |
u'\u03C4': 'tau', | |
u'\u03C5': 'upsilon', | |
u'\u03C6': 'phi', | |
u'\u03C7': 'chi', | |
u'\u03C8': 'psi', | |
u'\u03C9': 'omega', | |
} | |
greek_alphabet = {v: k for k, v in greek_alphabet.items()} | |
def process_unicode(s): | |
m = re.search(r"\$\\(\w+)\$", s) | |
if m: | |
replacement = greek_alphabet[m.group(1)] | |
s = re.sub(r"\$\\(\w+)\$", replacement, s) | |
return s | |
def generate_csv(args): | |
db = parse_file(args.bib) | |
collaborators = set() | |
for k, v in db.entries.items(): | |
if v.type == "article": | |
year = int(v.fields["year"]) | |
if int(year) >= args.year: | |
for p in v.persons["author"]: | |
first_middle = p.first_names + p.middle_names | |
collaborators.add((" ".join(first_middle), " ".join(p.last_names))) | |
collaborators = sorted(collaborators, key=lambda c: (c[1], c[0])) | |
collaborators = [{"first_name": c[0], "last_name": c[1]} for c in collaborators] | |
unfound = [] | |
if args.input_csv: | |
with open(args.input_csv, 'rt') as f: | |
for row in csv.reader(f): | |
if row[0] == "First Name": | |
continue | |
first_name, last_name, affiliation = row | |
found = False | |
for c in collaborators: | |
if (c["first_name"].lower() in first_name.lower()) and (c["last_name"].lower() in last_name.lower()): | |
c["first_name"] = first_name | |
c["last_name"] = last_name | |
c["affiliation"] = affiliation | |
found = True | |
break | |
if not found: | |
unfound.append(row) | |
unique = [] | |
done = [] | |
for c in collaborators: | |
full_name = "%s %s" % (c["first_name"].lower(), c["last_name"].lower()) | |
if full_name not in done: | |
unique.append(c) | |
done.append(full_name) | |
collaborators = unique | |
with open("collaborators_%s.csv" % datetime.datetime.now().date(), "wt") as f: | |
writer = csv.writer(f) | |
writer.writerow(["First Name", "Last Name", "Affiliation"]) | |
for c in collaborators: | |
writer.writerow([c["first_name"], c["last_name"], c.get("affiliation", "")]) | |
if unfound: | |
print("Old collaborators") | |
print("-----------------") | |
for c in unfound: | |
print(", ".join(c)) | |
def csv2str(args): | |
home_institution = args.excluded_institution or "-" | |
with open(args.csv, "rt") as f: | |
output = [] | |
for row in csv.reader(f): | |
if row[0] == "First Name": | |
continue | |
if home_institution not in row[-1]: | |
output.append("%s %s, %s" % (row[0], row[1], row[2])) | |
print("; ".join(output)) | |
if __name__ == "__main__": | |
import argparse | |
import sys | |
desc = """ | |
This script helps automate the generation and updating of collaborator lists from | |
bibtex files and/or csv file. Note that the first time this script is used, the csv | |
generated do not contain affiliations. These have to be entered by hand. | |
Subsequently, supply old processed csv using -i and affiliations will be obtained | |
from the old list where possible. After you generate the csv, you can convert it to | |
a string for pasting into the biosketch using make_str. | |
""" | |
p = argparse.ArgumentParser( | |
description="Generate collaborator list for proposals.", | |
epilog="Author: Shyue Ping Ong") | |
sp = p.add_subparsers() | |
sp_csv = sp.add_parser( | |
"make_csv", help="Make csv file from bibtex.") | |
sp_csv.add_argument("bib", metavar="bib", type=str, | |
help="Bibtex file to process") | |
sp_csv.add_argument("-y", "--year", dest="year", | |
type=int, default=datetime.datetime.now().year - 4, | |
help="Year from which to update. Defaults to current year - 4, based on the usual NSF guideline of past 48 months.") | |
sp_csv.add_argument("-i", "--input_csv", dest="input_csv", | |
type=str, | |
help="An input CSV file. This is used mainly for prior information on institutions.") | |
sp_csv.set_defaults(func=generate_csv) | |
sp_str = sp.add_parser( | |
"make_str", help="Convert data from a csv to a string for pasting into biosketches") | |
sp_str.add_argument("csv", metavar="csv", type=str, | |
help="Csv file to process") | |
sp_str.add_argument("-e", "--excluded_institution", dest="excluded_institution", | |
type=str, | |
help="Supply a home institution to exclude collaborators within the same instutition. A sufficiently unique part of the string should suffice.") | |
sp_str.set_defaults(func=csv2str) | |
args = p.parse_args() | |
try: | |
getattr(args, "func") | |
except AttributeError: | |
p.print_help() | |
sys.exit(0) | |
args.func(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
need an
import sys
on line 146.