Last active
August 29, 2015 14:13
-
-
Save EoinTravers/33dfcd76714405a4e9c1 to your computer and use it in GitHub Desktop.
Manually sorts bracketed BibTex citations in Pandoc Markdown
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Manually sorts bracketed BibTex citations in Pandoc Markdown. | |
# pandoc-citeproc will do with automatically for most csl styles, | |
# but has no option to override the sorting order for specific | |
# groups of references - for example if you wish to say | |
# "Smith's (1997; 2001; Aardvark & Smith, 1990) work with ant colonies ..." | |
# References are sorted by author, then by year, then by title, | |
# as per the APA system. | |
# To use, modify your csl file (i.e. apa.csl) to remove the | |
# command to sort in text citations automatically | |
# (the bits between the "<sort>...</sort>" tags. | |
# Within any citation group you wish to leave unsorted, | |
# include exactly the following comment: | |
# <!--NOSORT--> | |
# Usage: | |
# cat unsortedIntroduction.md | python cite.py > sortedIntroduction.md | |
# or | |
# python cite.py unsortedIntroduction.md > sortedIntroduction.md | |
# Note - there may be strange behaviour if your Markdown file | |
# includes links with @ signs - [Link to profile @ work](www.example.com). | |
import re | |
import sys | |
def bibtex_to_dict(cite): | |
split = cite.split('_') | |
if len(split) > 3: | |
# Multi-word surname (i.e. "de Neys") | |
# Use second part of name to sort | |
author = re.sub('\@|\-', '', split[1]) | |
title = split[2] | |
elif len(split) == 3: | |
author = re.sub('\@|\-', '', split[0]) | |
title = split[1] | |
else: | |
# Not bibtex, return null item | |
return {'author':None, 'year':None, 'title':None} | |
return {'author':author, 'year':split[-1], 'title':title} | |
# Crude sort - Assign large numeric value to name, and then add year to differntiate | |
def cmp_alpha(x, y): | |
if x == y: | |
return 0 | |
if x > y: | |
return 1 | |
if x < y: | |
return -1 | |
def cmp_dicts(x, y): | |
by_author = cmp_alpha(x['author'], y['author']) | |
if by_author != 0: | |
return by_author | |
else: | |
by_year = int(x['year']) - int(y['year']) | |
if by_year != 0: | |
return by_year | |
else: | |
by_title = cmp_alpha(x['title'], y['title']) | |
return by_title | |
def cmp_enum_dicts(x, y): | |
return cmp_dicts(x[1], y[1]) | |
try: | |
f = open(sys.argv[1]) | |
except: | |
f = sys.stdin | |
target = f.read() | |
all_matches = re.findall(r'\[.+?\]', target, re.DOTALL) | |
bibtex_matches = [m for m in all_matches if m.find('@') != -1] | |
for match in bibtex_matches: | |
if match.find('<!--NOSORT-->') > -1: | |
break | |
citations = re.sub('\[|\]|\s', '', match).split(';') | |
if len(citations) > 1: | |
citations = [c for c in citations if c != ''] | |
# print "Sorting citation : \"%s\"" % match | |
citation_dicts = [bibtex_to_dict(c) for c in citations] | |
# Sort the citation dicts, and get the indices of them in the correct order | |
ordered_indices = [i[0] for i in sorted(enumerate(citation_dicts), cmp = cmp_enum_dicts)] | |
ordered_citations = [citations[i] for i in ordered_indices] | |
# print "\tas \"%s\"" % ordered_citations | |
target = target.replace(match, "[" + "; ".join(ordered_citations) + "]") | |
print target |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment