Created
December 20, 2013 21:13
-
-
Save benknight/8061636 to your computer and use it in GitHub Desktop.
EducateQuotes from John Gruber's "smartypants"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sub EducateQuotes { | |
# | |
# Parameter: String. | |
# | |
# Returns: The string, with "educated" curly quote HTML entities. | |
# | |
# Example input: "Isn't this fun?" | |
# Example output: “Isn’t this fun?” | |
# | |
local $_ = shift; | |
# Tell perl not to gripe when we use $1 in substitutions, | |
# even when it's undefined. Use $^W instead of "no warnings" | |
# for compatibility with Perl 5.005: | |
local $^W = 0; | |
# Make our own "punctuation" character class, because the POSIX-style | |
# [:PUNCT:] is only available in Perl 5.6 or later: | |
my $punct_class = qr/[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]/; | |
# Special case if the very first character is a quote | |
# followed by punctuation at a non-word-break. Close the quotes by brute force: | |
s/^'(?=$punct_class\B)/’/; | |
s/^"(?=$punct_class\B)/”/; | |
# Special case for double sets of quotes, e.g.: | |
# <p>He said, "'Quoted' words in a larger quote."</p> | |
s/"'(?=\w)/“‘/g; | |
s/'"(?=\w)/‘“/g; | |
# Special case for decade abbreviations (the '80s): | |
s/'(?=\d{2}s)/’/g; | |
my $close_class = qr![^\ \t\r\n\[\{\(\-]!; | |
my $dec_dashes = qr/–|—/; | |
# Get most opening single quotes: | |
s { | |
( | |
\s | # a whitespace char, or | |
| # a non-breaking space entity, or | |
-- | # dashes, or | |
&[mn]dash; | # named dash entities | |
$dec_dashes | # or decimal entities | |
&\#x201[34]; # or hex | |
) | |
' # the quote | |
(?=\w) # followed by a word character | |
} {$1‘}xg; | |
# Single closing quotes: | |
s { | |
($close_class)? | |
' | |
(?(1)| # If $1 captured, then do nothing; | |
(?=\s | s\b) # otherwise, positive lookahead for a whitespace | |
) # char or an 's' at a word ending position. This | |
# is a special case to handle something like: | |
# "<i>Custer</i>'s Last Stand." | |
} {$1’}xgi; | |
# Any remaining single quotes should be opening ones: | |
s/'/‘/g; | |
# Get most opening double quotes: | |
s { | |
( | |
\s | # a whitespace char, or | |
| # a non-breaking space entity, or | |
-- | # dashes, or | |
&[mn]dash; | # named dash entities | |
$dec_dashes | # or decimal entities | |
&\#x201[34]; # or hex | |
) | |
" # the quote | |
(?=\w) # followed by a word character | |
} {$1“}xg; | |
# Double closing quotes: | |
s { | |
($close_class)? | |
" | |
(?(1)|(?=\s)) # If $1 captured, then do nothing; | |
# if not, then make sure the next char is whitespace. | |
} {$1”}xg; | |
# Any remaining quotes should be opening ones. | |
s/"/“/g; | |
return $_; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment