-
-
Save matthew-macgregor/0dcc1cfd40398dfa74f5 to your computer and use it in GitHub Desktop.
Convert dumb quotes to smart quotes in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Inspired by : https://gist.github.com/davidtheclark/5521432 | |
Converts dumb quotes to smart quotes, -- to EM_DASH and ... to ELLIPSES. | |
""" | |
import sys | |
import re | |
import os | |
class bcolors: | |
HEADER = '\033[95m' | |
OKBLUE = '\033[94m' | |
OKGREEN = '\033[92m' | |
WARNING = '\033[93m' | |
FAIL = '\033[91m' | |
ENDC = '\033[0m' | |
BOLD = '\033[1m' | |
UNDERLINE = '\033[4m' | |
EN_DASH = "\u2013" | |
EM_DASH = "\u2014" | |
L_SQUOTE = "\u2018" | |
R_SQUOTE = "\u2019" | |
L_DQUOTE = "\u201C" | |
R_DQUOTE = "\u201D" | |
def error(msg): | |
print( bcolors.FAIL + "ERROR: {}".format(msg) + bcolors.ENDC, file=sys.stderr ) | |
sys.exit(1) | |
def exists(filen): | |
return os.path.exists(filen) | |
def dumb_to_smart_quotes(string): | |
"""Takes a string and returns it with dumb quotes, single and double, | |
replaced by smart quotes. Accounts for the possibility of HTML tags | |
within the string.""" | |
# Replace -- with EM_DASH | |
string = string.replace('--', EM_DASH) | |
# Replace ... with ELLIPSIS | |
string = string.replace('...', ELLIPSIS) | |
# Find dumb double quotes coming directly after letters or punctuation, | |
# and replace them with right double quotes. | |
string = re.sub(r'([a-zA-Z0-9.,?!;:\'\"])"', r'\1{}'.format(R_DQUOTE), string) | |
# Find any remaining dumb double quotes and replace them with | |
# left double quotes. | |
string = string.replace('"', L_DQUOTE) | |
# Reverse: Find any SMART quotes that have been (mistakenly) placed around HTML | |
# attributes (following =) and replace them with dumb quotes. | |
string = re.sub(r'={}(.*?){}'.format(L_DQUOTE, R_DQUOTE), r'="\1"', string) | |
# Follow the same process with dumb/smart single quotes | |
string = re.sub(r"([a-zA-Z0-9.,?!;:\"\'])'", r'\1{}'.format(R_SQUOTE), string) | |
string = string.replace("'", L_SQUOTE) | |
string = re.sub(r'={}(.*?){}'.format(L_SQUOTE, R_SQUOTE), r"='\1'", string) | |
return string | |
if __name__ == '__main__': | |
if len(sys.argv) < 3: | |
print("Usage: python3 smartquote.py filename.txt smart.txt") | |
sys.exit(1) | |
filein = sys.argv[1] | |
fileout = sys.argv[2] | |
if filein == fileout: | |
error( "Input file cannot be the same as output file" ) | |
if not exists(filein): | |
error("File {} does not exist.".format(filein)) | |
if exists(fileout): | |
error("File {} already exists.".format(fileout)) | |
result = "" | |
with open(filein, 'r', encoding='utf-8') as f: | |
contents = f.read() | |
result = dumb_to_smart_quotes(contents) | |
with open(fileout, 'w', encoding='utf-8') as f: | |
f.write(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment