Created
February 16, 2022 02:32
-
-
Save dmdeluca/9cffec2edad3d9282dea534692f5b702 to your computer and use it in GitHub Desktop.
Convert dumb quotes to smart quotes in HTML string
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def convert_quotes(html: str): | |
"""Replace dumb quotes with smart quotes in HTML.""" | |
# set flags to false. | |
# we assume that the HTML is well-formed and that we are starting at the beginning. | |
in_html_tag = False | |
in_quote = False | |
# convert all quotes to basic quotes. this might not be necessary, | |
# but it is if there is any chance the quotes in the document need | |
# to be corrected. | |
html = html.replace('“','"') | |
html = html.replace('”','"') | |
formatted = "" | |
# iterate over each character, tracking whether inside tag or inside quote | |
for c in html: | |
if c == '<' or c == '>': | |
# go into tag mode. | |
in_html_tag = not in_html_tag | |
formatted += c | |
continue | |
if c == '"' and not in_html_tag: | |
# If we are in a quotation, end the quotation. Else, start the quotation. | |
if in_quote: | |
formatted += "”" | |
else: | |
formatted += "“" | |
in_quote = not in_quote | |
continue | |
# base case, just add the character. | |
formatted += c | |
return formatted |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment