Skip to content

Instantly share code, notes, and snippets.

@bensaufley
Forked from goofmint/wpautop for ruby
Last active October 16, 2015 10:24
Show Gist options
  • Save bensaufley/2f87629b9c67f1bfb607 to your computer and use it in GitHub Desktop.
Save bensaufley/2f87629b9c67f1bfb607 to your computer and use it in GitHub Desktop.
wpautop for Ruby
###
# Replaces double line-breaks with paragraph elements.
#
# A group of regex replaces used to identify text formatted with newlines and
# replace double line-breaks with HTML paragraph tags. The remaining
# line-breaks after conversion become <<br />> tags, unless $br is set to '0'
# or 'false'.
#
# @since 0.71
#
# @param string pee The text which has to be formatted.
# @param bool br Optional. If set, this will convert all remaining line-breaks after paragraphing. Default true.
# @return string Text which has been converted into correct paragraph tags.
###
def wpautop(pee, br = true)
pre_tags = []
return if pee.strip == ''
pee = "#{pee}\n" # just to make things a little easier, pad the end
if pee.include? '<pre'
pee_parts = pee.split '</pre>'
last_pee = pee_parts.pop
pee = ''
pee_parts.each_with_index do |pee_part, i|
start = pee_part.index '<pre'
# Malformed html?
if start.nil?
pee += pee_part
next
end
name = "<pre wp-pre-tag-#{i}></pre>"
pre_tags[name] = pee_part[start..-1]
pee += pee_part[0..start] + name
end
pee += last_pee
end
# Extra text formatting
require 'htmlentities'
encoder = HTMLEntities.new(:expanded)
pee.gsub! /(?<=[^\s\n])((?<=\s)---?(?=\s)|--)(?=[^\s\n])/,'—'
pee.gsub! /(?<=[^\s\n]\s)--?(?=\s[^\s\n])/, '–'
pee.gsub! /(?<=\A|>)(.+?)(?=\z|<)/ do |match|
encoder.encode match, :decimal
end
pee.gsub!(/<br \/>\s*<br \/>/, "\n\n")
# Space things out a little
all_blocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)'
pee.gsub! Regexp.new("(<#{all_blocks}[^>]*>)"), "\n\\1"
pee.gsub! Regexp.new("(</#{all_blocks}>)"), "\\1\n\n"
if pee.include? '<option'
# no P/BR around option
pee.gsub! /\s*<option/, '<option'
pee.gsub! /<\/option>\s*/, '</option>'
end
if pee.include? '</object>'
# no P/BR around param
pee.gsub! /(<object[^>]*)\s*/, '\1'
pee.gsub! /\s*<\/object>/, '</object>'
pee.gsub! /\s*(<\/?(?:param|embed)[^>]*>)\s*/, '\1'
end
if pee.include?('<source') || pee.include?('<track')
# no P/BR around source and track
pee.gsub! /([<\[](?:audio|video)[^>\]]*[>\]])\s*/, '\1'
pee.gsub! /\s*([<\[]\/(?:audio|video)[>\]])/, '\1'
pee.gsub! /\s*(<(?:source|track)[^>]*>)\s*/, '\1'
end
pee.gsub! /\n\n+/, "\n\n" # take care of duplicates
# make paragraphs, including one at the end
pees = pee.split /\n\s*\n/
pee = ''
pees.each do |tinkle|
pee += "<p>#{tinkle.gsub(/\A\n*(.+?)\n*\z/m, '\1')}</p>\n"
end
pee.gsub! /<p>\s*<\/p>/, '' # under certain strange conditions it could create a P of entirely whitespace
pee.gsub! /<p>([^<]+)<\/(div|address|form)>/, '<p>\1</p></\2>'
pee.gsub! Regexp.new("<p>\s*(</?#{all_blocks}[^>]*>)\s*</p>"), '\1' # don't pee all over a tag
pee.gsub! /<p>(<li.+?)<\/p>/, '\1' # problem with nested lists
pee.gsub! /<p><blockquote([^>]*)>/i, '<blockquote\1><p>'
pee.gsub! '</blockquote></p>', '</p></blockquote>'
pee.gsub! Regexp.new("<p>\s*(</?#{all_blocks}[^>]*>)"), '\1'
pee.gsub! Regexp.new("(</?#{all_blocks}[^>]*>)\s*</p>"), '\1'
if br
pee.gsub! /<(script|style).*?<\/\\1>/ do |match|
match.gsub "\n", '<WPPreserveNewline />'
end
pee.gsub! /(?<!<br \/>)\s*\n/, "<br />\n"
pee.gsub! '<WPPreserveNewline />', "\n"
end
pee.gsub! Regexp.new("(</?#{all_blocks}[^>]*>)\s*<br />"), '\1'
pee.gsub! /<br \/>(\s*<\/?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)/, '\1'
pee.gsub! /\n<\/p>$/, '</p>'
unless pre_tags.empty?
pre_tags.each do |k, v|
pee.gsub! k, v
end
end
pee
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment