Created
March 19, 2015 01:15
-
-
Save jdahm/97246bd39cfbe4ace492 to your computer and use it in GitHub Desktop.
Convert bibtex library to use cite keys
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import os.path as op | |
import re | |
import glob | |
import shutil | |
CITETYPES = ('@{}{{(?P<{}>[^,]+)'.format(x[0], x[1]) for x in (('Article', 'A'), ('InCollection', 'I'), ('PhdThesis', 'P'), ('Unpublished', 'U'), ('InProceedings', 'IP'))) | |
CITERE = '|'.join(x for x in CITETYPES) | |
KEYTYPES = ('\s*{}\s*=\s*{{(?P<{}>[^,]+)'.format(x[0], x[1]) for x in (('Title', 'T'), ('Author', 'A'), ('Year', 'Y'))) | |
KEYRE = '|'.join(x for x in KEYTYPES) | |
def convert_bibfiles(directory, bibfile): | |
with open(op.join(directory, bibfile), mode='r') as f: | |
indef = False | |
for l in f.readlines(): | |
print(indef, l, end='') | |
m = re.match(CITERE, l) | |
if m: | |
kind = m.lastgroup | |
key = m.group(kind) | |
indef = True | |
titlewords = None | |
authorname = None | |
year = None | |
m = re.match('}', l) | |
if m: | |
if not indef: | |
print(titlewords, authorname, year) | |
raise ValueError('End of key found when not in key') | |
indef = False | |
if titlewords is None or authorname is None or year is None: | |
print(titlewords, authorname, year) | |
raise ValueError('Did not find all required files') | |
matches = glob.glob(op.join(directory, authorname, str(year))+'*') | |
print(matches) | |
if len(matches) == 0: | |
print('Could not find matches for {} {}', authorname, year) | |
continue | |
if len(matches) == 1: | |
filename = matches[0] | |
elif len(matches) > 1: | |
print('Multiple matches found') | |
done = False | |
for w in titlewords: | |
for c in matches: | |
if w in c: | |
filename = c | |
done = True | |
break | |
if done: break | |
shutil.move(filename, op.join(directory, key)+'.pdf') | |
if indef: | |
m = re.match(KEYRE, l) | |
if m: | |
kind = m.lastgroup | |
content = m.group(kind).rstrip('}') | |
if kind == 'T': | |
titlewords = content.split() | |
elif kind == 'A': | |
while '{' in content: | |
print(content) | |
content = re.sub('\{[^}]+\}', '*', content) | |
print(content) | |
if '{' in content: content = content.replace('{', '*') | |
if '}' in content: content = content.replace('}', '*') | |
authorname = content | |
elif kind == 'Y': | |
year = int(content) | |
if __name__ == '__main__': | |
convert_bibfiles(sys.argv[1], 'main.bib') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment