Created
October 29, 2008 15:11
-
-
Save qrush/20712 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'htmlentities' | |
class String | |
def slugify | |
self.gsub(/[^a-z0-9]+/i, '-').chomp('-') | |
end | |
def decode | |
HTMLEntities.new.decode(self) | |
end | |
def cutoff(desired = 50) | |
return self if self.length <= desired | |
split = self.split | |
while split.join(" ").size > desired | |
split.slice!(split.size - 1) | |
end | |
split.join(" ") << "…" | |
end | |
def sanitize!(okTags = '') | |
self.gsub!(self, self.sanitize(okTags)) | |
end | |
# http://ideoplex.com/id/1138/sanitize-html-in-ruby | |
# a href, b, br, i, p | |
def sanitize(okTags = '') | |
# no closing tag necessary for these | |
soloTags = ["br","hr"] | |
# Build hash of allowed tags with allowed attributes | |
tags = okTags.downcase().split(',').collect!{ |s| s.split(' ') } | |
allowed = Hash.new | |
tags.each do |s| | |
key = s.shift | |
allowed[key] = s | |
end | |
# Analyze all <> elements | |
stack = Array.new | |
result = self.gsub( /(<.*?>)/m ) do | element | | |
if element =~ /\A<\/(\w+)/ then | |
# </tag> | |
tag = $1.downcase | |
if allowed.include?(tag) && stack.include?(tag) then | |
# If allowed and on the stack | |
# Then pop down the stack | |
top = stack.pop | |
out = "</#{top}>" | |
until top == tag do | |
top = stack.pop | |
out << "</#{top}>" | |
end | |
out | |
end | |
elsif element =~ /\A<(\w+)\s*\/>/ | |
# <tag /> | |
tag = $1.downcase | |
if allowed.include?(tag) then | |
"<#{tag} />" | |
end | |
elsif element =~ /\A<(\w+)/ then | |
# <tag ...> | |
tag = $1.downcase | |
if allowed.include?(tag) then | |
if ! soloTags.include?(tag) then | |
stack.push(tag) | |
end | |
if allowed[tag].length == 0 then | |
# no allowed attributes | |
"<#{tag}>" | |
else | |
# allowed attributes? | |
out = "<#{tag}" | |
while ( $' =~ /(\w+)=("[^"]+")/ ) | |
attr = $1.downcase | |
valu = $2 | |
if allowed[tag].include?(attr) then | |
out << " #{attr}=#{valu}" | |
end | |
end | |
out << ">" | |
end | |
end | |
end | |
end | |
# eat up unmatched leading > | |
while result.sub!(/\A([^<]*)>/m) { $1 } do end | |
# eat up unmatched trailing < | |
while result.sub!(/<([^>]*)\Z/m) { $1 } do end | |
# clean up the stack | |
if stack.length > 0 then | |
result << "</#{stack.reverse.join('></')}>" | |
end | |
result | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment