Skip to content

Instantly share code, notes, and snippets.

@ernstki
Last active July 22, 2019 19:03
Show Gist options
  • Save ernstki/79faf30fe8a1600cc05f388c13b4b67d to your computer and use it in GitHub Desktop.
Save ernstki/79faf30fe8a1600cc05f388c13b4b67d to your computer and use it in GitHub Desktop.
titlecase - a Python AP-style title case capitalizer (works great with doclip.sh!)
#!/usr/bin/env python
# vim: fileencoding=utf-8
"""
Tests for titlecase.py
Invocation: python -m unittest test_titlecase.py
"""
import unittest
from titlecase import titlecase
class TestTitleCase(unittest.TestCase):
def test_ap_style(self):
"""
The AP stylebook says
- capitalize the principal words, including prepositions and
conjunctions of four or more letters.
- capitalize an article – the, a, an – or words of fewer than four
letters if it is the first or last word in a title.
(source: https://writers.stackexchange.com/a/4622)
"""
inputs = [
'the the', # British post-punk band
'the cat in the hat', # simple book title
'over the hills and through the woods', # ≥ 4 char preposition
'you and the horse you rode in on', # preposition at end
'you and the horse you rode in on!', # punctuation at end
'to boldly go, but, then again...', # preposition with comma
]
expected = [
'The The',
'The Cat in the Hat',
'Over the Hills and Through the Woods',
'You and the Horse You Rode in On',
'You and the Horse You Rode in On!',
'To Boldly Go, but, Then Again...',
]
for i in range(0, len(inputs)):
self.assertEqual(titlecase(inputs[i]), expected[i])
def test_chicago_style(self):
"""
But the Chicago Manual of Style says:
Use lowercase for articles (a, an, the), coordinating conjunctions
(and, but, or, for, nor), and prepositions, regardless of length,
unless they are the first or last word of the title.
"""
inputs = [
'over the hills and through the woods',
]
expected = [
'Over the Hills and through the Woods',
]
for i in range(0, len(inputs)):
self.assertEqual(titlecase(inputs[i], style='chicago'),
expected[i])
#!/usr/bin/env python
# vim: ft=utf-8
"""
Title-case the input on stdin according to AP or Chicago styleguide rules
Author: Kevin Ernst <ernstki -at- mail.uc.edu>
Source: https://gist.github.com/ernstki/79faf30fe8a1600cc05f388c13b4b67d
"""
from __future__ import print_function
import re
ARTS = ['a', 'an', 'the']
# source: https://en.wikibooks.org/wiki/English_in_Use/Prepositions,_Conjunctions,_and_Interjections
PREPS = ['about', 'above', 'across', 'after', 'against', 'along', 'amid',
'amidst', 'among', 'around', 'at', 'before', 'behind', 'below',
'beneath', 'beside', 'besides', 'between', 'beyond', 'during',
'except', 'for', 'from', 'in', 'into', 'of', 'off', 'on', 'outside',
'over', 'past', 'through', 'throughout', 'to', 'toward', 'towards',
'under', 'underneath', 'until', 'with', 'within', 'without']
CONJS = ['although', 'and', 'as', 'because', 'both', 'but', 'either', 'even',
'except', 'for', 'however', 'if', 'lest', 'neither', 'nevertheless',
'nor', 'notwithstanding', 'or', 'provided', 'save', 'seeing', 'since',
'so', 'than', 'that', 'then', 'though', 'unless', 'whereas',
'whether', 'yet']
# see https://writers.stackexchange.com/a/4622
AP_CAP_IF_THIS_LONG = 4 # AP stylebook says capitalize if >=4 characters
def titlecase(string, style=None):
"""
Return properly title-cased version of input string
If style is 'chicago', use Chicago Manual of Style rules; otherwise, AP.
"""
style = style if style else 'ap'
words = []
if style == 'chicago':
excludes = ARTS + PREPS + CONJS
else:
preps = [p for p in PREPS if len(p) < AP_CAP_IF_THIS_LONG]
conjs = [c for c in CONJS if len(c) < AP_CAP_IF_THIS_LONG]
excludes = ARTS + preps + conjs
for word in string.split():
bareword = re.sub('[^\w]', '', word) # remove punctuation
words.append(word if bareword in excludes else word.capitalize())
# always capitalize the first and last words, regardless:
words[-1] = words[-1].capitalize()
words[0] = words[0].capitalize()
return ' '.join(words)
if __name__ == '__main__':
import sys
import argparse
parser = argparse.ArgumentParser(description=__doc__,
epilog='https://gist.github.com/ernstki/79faf30fe8a1600cc05f388c13b4b67d')
parser.add_argument('-c', '-chicago', '--chicago',
action='store_const', const='chicago', default='ap',
help='use Chicago Manual of Style (default: AP)')
options = parser.parse_args()
print(titlecase(sys.stdin.read(), options.chicago))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment