Skip to content

Instantly share code, notes, and snippets.

@EoinTravers
Last active August 29, 2015 14:13
Show Gist options
  • Save EoinTravers/33dfcd76714405a4e9c1 to your computer and use it in GitHub Desktop.
Save EoinTravers/33dfcd76714405a4e9c1 to your computer and use it in GitHub Desktop.
Manually sorts bracketed BibTex citations in Pandoc Markdown
#!/usr/bin/env python
# Manually sorts bracketed BibTex citations in Pandoc Markdown.
# pandoc-citeproc will do with automatically for most csl styles,
# but has no option to override the sorting order for specific
# groups of references - for example if you wish to say
# "Smith's (1997; 2001; Aardvark & Smith, 1990) work with ant colonies ..."
# References are sorted by author, then by year, then by title,
# as per the APA system.
# To use, modify your csl file (i.e. apa.csl) to remove the
# command to sort in text citations automatically
# (the bits between the "<sort>...</sort>" tags.
# Within any citation group you wish to leave unsorted,
# include exactly the following comment:
# <!--NOSORT-->
# Usage:
# cat unsortedIntroduction.md | python cite.py > sortedIntroduction.md
# or
# python cite.py unsortedIntroduction.md > sortedIntroduction.md
# Note - there may be strange behaviour if your Markdown file
# includes links with @ signs - [Link to profile @ work](www.example.com).
import re
import sys
def bibtex_to_dict(cite):
split = cite.split('_')
if len(split) > 3:
# Multi-word surname (i.e. "de Neys")
# Use second part of name to sort
author = re.sub('\@|\-', '', split[1])
title = split[2]
elif len(split) == 3:
author = re.sub('\@|\-', '', split[0])
title = split[1]
else:
# Not bibtex, return null item
return {'author':None, 'year':None, 'title':None}
return {'author':author, 'year':split[-1], 'title':title}
# Crude sort - Assign large numeric value to name, and then add year to differntiate
def cmp_alpha(x, y):
if x == y:
return 0
if x > y:
return 1
if x < y:
return -1
def cmp_dicts(x, y):
by_author = cmp_alpha(x['author'], y['author'])
if by_author != 0:
return by_author
else:
by_year = int(x['year']) - int(y['year'])
if by_year != 0:
return by_year
else:
by_title = cmp_alpha(x['title'], y['title'])
return by_title
def cmp_enum_dicts(x, y):
return cmp_dicts(x[1], y[1])
try:
f = open(sys.argv[1])
except:
f = sys.stdin
target = f.read()
all_matches = re.findall(r'\[.+?\]', target, re.DOTALL)
bibtex_matches = [m for m in all_matches if m.find('@') != -1]
for match in bibtex_matches:
if match.find('<!--NOSORT-->') > -1:
break
citations = re.sub('\[|\]|\s', '', match).split(';')
if len(citations) > 1:
citations = [c for c in citations if c != '']
# print "Sorting citation : \"%s\"" % match
citation_dicts = [bibtex_to_dict(c) for c in citations]
# Sort the citation dicts, and get the indices of them in the correct order
ordered_indices = [i[0] for i in sorted(enumerate(citation_dicts), cmp = cmp_enum_dicts)]
ordered_citations = [citations[i] for i in ordered_indices]
# print "\tas \"%s\"" % ordered_citations
target = target.replace(match, "[" + "; ".join(ordered_citations) + "]")
print target
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment