Skip to content

Instantly share code, notes, and snippets.

@soldni
Created November 23, 2015 13:34
Show Gist options
  • Select an option

  • Save soldni/63bcd0c6a9d5aff8563b to your computer and use it in GitHub Desktop.

Select an option

Save soldni/63bcd0c6a9d5aff8563b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from __future__ import unicode_literals
import sys
import codecs
import re
import codecs
statecodes = (
("alabama", "al"), ("alaska", "ak"), ("arizona", "az"), ("arkansas", "ar"),
("california", "ca"), ("colorado", "co"), ("connecticut", "ct"),
("delaware", "de"), ("district of columbia", "dc"), ("florida", "fl"),
("georgia", "ga"), ("hawaii", "hi"), ("idaho", "id"), ("illinois", "il"),
("indiana", "in"), ("kansas", "ks"), ("oklahoma", "ok"), ("kentucky", "ky"),
("louisiana", "la"), ("maine", "me"), ("maryland", "md"), ("wisconsin", "wi"),
("massachusetts", "ma"), ("michigan", "mi"), ("minnesota", "mn"),
("mississippi", "ms"), ("missouri", "mo"), ("montana", "mt"),
("nebraska", "ne"), ("nevada", "nv"), ("new hampshire", "nh"),
("new jersey", "nj"), ("new mexico", "nm"), ("new york", "ny"),
("north carolina", "nc"), ("north dakota", "nd"), ("ohio", "oh"),
("iowa", "ia"), ("oregon", "or"), ("pennsylvania", "pa"), ("wyoming", "wy"),
("rhode island", "ri"), ("south carolina", "sc"), ("south dakota", "sd"),
("tennessee", "tn"), ("texas", "tx"), ("utah", "ut"), ("vermont", "vt"),
("west virginia", "wv"), ("virginia", "va"), ("washington", "wa"),
)
re_map = [(re.compile(state, re.IGNORECASE), code)
for state, code in statecodes]
with codecs.open(sys.argv[1], encoding='utf-8') as inf:
text = inf.read()
for re_state, code in re_map:
text = re_state.sub(code, text)
with codecs.open(sys.argv[1], mode='wb', encoding='utf-8') as outf:
outf.write(text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment