EoinTravers · August 29, 2015 14:13
diff --git a/bibtexSort.py b/bibtexSort.py
 #!/usr/bin/env python

 # Manually sorts bracketed BibTex citations in Pandoc Markdown.
 # pandoc-citeproc will do with automatically for most csl styles,
 # but has no option to override the sorting order for specific
 # groups of references - for example if you wish to say
 # "Smith's (1997; 2001; Aardvark & Smith, 1990) work with ant colonies ..."

 # References are sorted by author, then by year, then by title,
 # as per the APA system.

 # To use, modify your csl file (i.e. apa.csl) to remove the
 # command to sort in text citations automatically
 # (the bits between the "<sort>...</sort>" tags.
 # Within any citation group you wish to leave unsorted,
 # include exactly the following comment:

 # <!--NOSORT-->

 # Usage:
 # cat unsortedIntroduction.md | python cite.py > sortedIntroduction.md
 # or
 # python cite.py unsortedIntroduction.md > sortedIntroduction.md

 # Note - there may be strange behaviour if your Markdown file
 # includes links with @ signs - [Link to profile @ work](www.example.com).

 import re
 import sys

 def bibtex_to_dict(cite):
    split = cite.split('_')
    if len(split) > 3:
        # Multi-word surname (i.e. "de Neys")
        # Use second part of name to sort
        author = re.sub('\@|\-', '',  split[1])
        title = split[2]
    elif len(split) == 3:
        author = re.sub('\@|\-', '',  split[0])
        title = split[1]
    else:
        # Not bibtex, return null item
        return {'author':None, 'year':None, 'title':None}
    return {'author':author, 'year':split[-1], 'title':title}

 # Crude sort - Assign large numeric value to name, and then add year to differntiate
 def cmp_alpha(x, y):
    if x == y:
        return 0
    if x > y:
        return 1
    if x < y:
        return -1

 def cmp_dicts(x, y):
    by_author = cmp_alpha(x['author'], y['author'])
    if by_author != 0:
        return by_author
    else:
        by_year = int(x['year']) - int(y['year'])
        if by_year != 0:
            return by_year
        else:
            by_title = cmp_alpha(x['title'], y['title'])
            return by_title

 def cmp_enum_dicts(x, y):
    return cmp_dicts(x[1], y[1])

 try:
   f = open(sys.argv[1])
 except:
   f = sys.stdin

 target = f.read()

 all_matches  = re.findall(r'\[.+?\]', target, re.DOTALL)
 bibtex_matches = [m for m in all_matches if m.find('@') != -1]

 for match in bibtex_matches:
    if match.find('<!--NOSORT-->') > -1:
        break
    citations = re.sub('\[|\]|\s', '', match).split(';')
    if len(citations) > 1:
        citations = [c for c in citations if c != '']
        # print "Sorting citation : \"%s\"" % match   
        citation_dicts = [bibtex_to_dict(c) for c in citations]
        # Sort the citation dicts, and get the indices of them in the correct order
        ordered_indices = [i[0] for i in sorted(enumerate(citation_dicts), cmp = cmp_enum_dicts)]
        ordered_citations = [citations[i] for i in ordered_indices]
        # print "\tas \"%s\"" % ordered_citations
        target = target.replace(match, "[" + "; ".join(ordered_citations) + "]")
            
 print target
	#!/usr/bin/env python

	# Manually sorts bracketed BibTex citations in Pandoc Markdown.
	# pandoc-citeproc will do with automatically for most csl styles,
	# but has no option to override the sorting order for specific
	# groups of references - for example if you wish to say
	# "Smith's (1997; 2001; Aardvark & Smith, 1990) work with ant colonies ..."

	# References are sorted by author, then by year, then by title,
	# as per the APA system.

	# To use, modify your csl file (i.e. apa.csl) to remove the
	# command to sort in text citations automatically
	# (the bits between the "<sort>...</sort>" tags.
	# Within any citation group you wish to leave unsorted,
	# include exactly the following comment:

	# <!--NOSORT-->

	# Usage:
	# cat unsortedIntroduction.md \| python cite.py > sortedIntroduction.md
	# or
	# python cite.py unsortedIntroduction.md > sortedIntroduction.md

	# Note - there may be strange behaviour if your Markdown file
	# includes links with @ signs - [Link to profile @ work](www.example.com).

	import re
	import sys

	def bibtex_to_dict(cite):
	split = cite.split('_')
	if len(split) > 3:
	# Multi-word surname (i.e. "de Neys")
	# Use second part of name to sort
	author = re.sub('\@\|\-', '', split[1])
	title = split[2]
	elif len(split) == 3:
	author = re.sub('\@\|\-', '', split[0])
	title = split[1]
	else:
	# Not bibtex, return null item
	return {'author':None, 'year':None, 'title':None}
	return {'author':author, 'year':split[-1], 'title':title}

	# Crude sort - Assign large numeric value to name, and then add year to differntiate
	def cmp_alpha(x, y):
	if x == y:
	return 0
	if x > y:
	return 1
	if x < y:
	return -1

	def cmp_dicts(x, y):
	by_author = cmp_alpha(x['author'], y['author'])
	if by_author != 0:
	return by_author
	else:
	by_year = int(x['year']) - int(y['year'])
	if by_year != 0:
	return by_year
	else:
	by_title = cmp_alpha(x['title'], y['title'])
	return by_title

	def cmp_enum_dicts(x, y):
	return cmp_dicts(x[1], y[1])

	try:
	f = open(sys.argv[1])
	except:
	f = sys.stdin

	target = f.read()

	all_matches = re.findall(r'\[.+?\]', target, re.DOTALL)
	bibtex_matches = [m for m in all_matches if m.find('@') != -1]

	for match in bibtex_matches:
	if match.find('<!--NOSORT-->') > -1:
	break
	citations = re.sub('\[\|\]\|\s', '', match).split(';')
	if len(citations) > 1:
	citations = [c for c in citations if c != '']
	# print "Sorting citation : \"%s\"" % match
	citation_dicts = [bibtex_to_dict(c) for c in citations]
	# Sort the citation dicts, and get the indices of them in the correct order
	ordered_indices = [i[0] for i in sorted(enumerate(citation_dicts), cmp = cmp_enum_dicts)]
	ordered_citations = [citations[i] for i in ordered_indices]
	# print "\tas \"%s\"" % ordered_citations
	target = target.replace(match, "[" + "; ".join(ordered_citations) + "]")

	print target