Created
March 10, 2012 23:11
-
-
Save lisamelton/2013845 to your computer and use it in GitHub Desktop.
Monkey patch kramdown to behave more like `wptexturize` from WordPress # instead of SmartyPants when parsing dashes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Hack by Don Melton, based on portions of kramdown by Thomas Leitner. | |
# | |
# Monkey patch kramdown to behave more like `wptexturize` from WordPress | |
# instead of SmartyPants when parsing dashes. | |
# | |
# Now double and single hyphen-minus character sequences surrounded by spaces, | |
# i.e. `' -- '` and `' - '`, are converted to em and en dashes, respectively, | |
# still surrounded by spaces. Old and new dash conversions are: | |
# | |
# `'---'` => em dash | |
# `' -- '` => em dash surrounded by spaces | |
# `'--'` => en dash | |
# `' - '` => en dash surrounded by spaces | |
# | |
# Note: kramdown conversion to LaTeX and HTML conversion back to kramdown | |
# formats are not patched. | |
require 'kramdown' | |
module Kramdown | |
module Parser | |
class Kramdown < Base | |
# Cloned from `define_parser` in `parser/kramdown.rb` to *not* check if | |
# named parser already exists. | |
def self.redefine_parser(name, start_re, span_start = nil, meth_name = "parse_#{name}") | |
@@parsers[name] = Data.new(name, start_re, span_start, meth_name) | |
end | |
end | |
class Kramdown | |
# Disable warnings while redefining constants. | |
original_verbosity = $VERBOSE | |
$VERBOSE = nil | |
# Add `' -- '` and `' - '` to list of typographic symbols. | |
# Note: Any non-Symbol values, e.g. `' — '`, are not strictly | |
# necessary in this array but are included for clarity. | |
TYPOGRAPHIC_SYMS = [['---', :mdash], [' -- ', ' — '], ['--', :ndash], [' - ', ' – '], | |
['...', :hellip], | |
['\\<<', '<<'], ['\\>>', '>>'], | |
['<< ', :laquo_space], [' >>', :raquo_space], | |
['<<', :laquo], ['>>', :raquo]] | |
# Reset other constants (copied verbatim from `typographic_symbol.rb`) | |
# because `TYPOGRAPHIC_SYMS` has been redefined. | |
TYPOGRAPHIC_SYMS_SUBST = Hash[*TYPOGRAPHIC_SYMS.flatten] | |
TYPOGRAPHIC_SYMS_RE = /#{TYPOGRAPHIC_SYMS.map {|k,v| Regexp.escape(k)}.join('|')}/ | |
# Re-enable warnings. | |
$VERBOSE = original_verbosity | |
# Augmented to handle new non-Symbol typographic symbols, i.e. `' -- '` | |
# and `' - '`. | |
def parse_typographic_syms | |
@src.pos += @src.matched_size | |
val = TYPOGRAPHIC_SYMS_SUBST[@src.matched] | |
if val.kind_of?(Symbol) | |
@tree.children << Element.new(:typographic_sym, val) | |
elsif @src.matched == ' -- ' | |
@tree.children << Element.new(:text, ' ') | |
@tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity('mdash')) | |
@tree.children << Element.new(:text, ' ') | |
elsif @src.matched == ' - ' | |
@tree.children << Element.new(:text, ' ') | |
@tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity('ndash')) | |
@tree.children << Element.new(:text, ' ') | |
elsif @src.matched == '\\<<' | |
@tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity('lt')) | |
@tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity('lt')) | |
else | |
@tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity('gt')) | |
@tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity('gt')) | |
end | |
end | |
# Replace old `:typographic_syms` parser to use augmented map and new | |
# regular expression which checks for an optional space before | |
# hyphen-minus character input. | |
redefine_parser(:typographic_syms, TYPOGRAPHIC_SYMS_RE, '[ \-]-|\\.\\.\\.|(?:\\\\| )?(?:<<|>>)') | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment