qrush · October 29, 2008 15:11
diff --git a/gistfile1.rbx b/gistfile1.rbx
 require 'htmlentities'

 class String
  def slugify
    self.gsub(/[^a-z0-9]+/i, '-').chomp('-')
  end
  
  def decode
    HTMLEntities.new.decode(self)
  end
  
  def cutoff(desired = 50)
    return self if self.length <= desired
    
    split = self.split
    while split.join(" ").size > desired
      split.slice!(split.size - 1)
    end
    
    split.join(" ") << "…"
  end
  
  def sanitize!(okTags = '')
    self.gsub!(self, self.sanitize(okTags))
  end
  
  # http://ideoplex.com/id/1138/sanitize-html-in-ruby
  # a href, b, br, i, p
  def sanitize(okTags = '')
    # no closing tag necessary for these
    soloTags = ["br","hr"]

    # Build hash of allowed tags with allowed attributes
    tags = okTags.downcase().split(',').collect!{ |s| s.split(' ') }
    allowed = Hash.new
    tags.each do |s|
      key = s.shift
      allowed[key] = s
    end

    # Analyze all <> elements
    stack = Array.new
    result = self.gsub( /(<.*?>)/m ) do | element |
      if element =~ /\A<\/(\w+)/ then
        # </tag>
        tag = $1.downcase
        if allowed.include?(tag) && stack.include?(tag) then
          # If allowed and on the stack
          # Then pop down the stack
          top = stack.pop
          out = "</#{top}>"
          until top == tag do
            top = stack.pop
            out << "</#{top}>"
          end
          out
        end
      elsif element =~ /\A<(\w+)\s*\/>/
        # <tag />
        tag = $1.downcase
        if allowed.include?(tag) then
          "<#{tag} />"
        end
      elsif element =~ /\A<(\w+)/ then
        # <tag ...>
        tag = $1.downcase
        if allowed.include?(tag) then
          if ! soloTags.include?(tag) then
            stack.push(tag)
          end
          if allowed[tag].length == 0 then
            # no allowed attributes
            "<#{tag}>"
          else
            # allowed attributes?
            out = "<#{tag}"
            while ( $' =~ /(\w+)=("[^"]+")/ )
              attr = $1.downcase
              valu = $2
              if allowed[tag].include?(attr) then
                out << " #{attr}=#{valu}"
              end
            end
            out << ">"
          end
        end
      end
    end

    # eat up unmatched leading >
    while result.sub!(/\A([^<]*)>/m) { $1 } do end

    # eat up unmatched trailing <
    while result.sub!(/<([^>]*)\Z/m) { $1 } do end

    # clean up the stack
    if stack.length > 0 then
      result << "</#{stack.reverse.join('></')}>"
    end

    result
  end
  
 end
	require 'htmlentities'

	class String
	def slugify
	self.gsub(/[^a-z0-9]+/i, '-').chomp('-')
	end

	def decode
	HTMLEntities.new.decode(self)
	end

	def cutoff(desired = 50)
	return self if self.length <= desired

	split = self.split
	while split.join(" ").size > desired
	split.slice!(split.size - 1)
	end

	split.join(" ") << "…"
	end

	def sanitize!(okTags = '')
	self.gsub!(self, self.sanitize(okTags))
	end

	# http://ideoplex.com/id/1138/sanitize-html-in-ruby
	# a href, b, br, i, p
	def sanitize(okTags = '')
	# no closing tag necessary for these
	soloTags = ["br","hr"]

	# Build hash of allowed tags with allowed attributes
	tags = okTags.downcase().split(',').collect!{ \|s\| s.split(' ') }
	allowed = Hash.new
	tags.each do \|s\|
	key = s.shift
	allowed[key] = s
	end

	# Analyze all <> elements
	stack = Array.new
	result = self.gsub( /(<.*?>)/m ) do \| element \|
	if element =~ /\A<\/(\w+)/ then
	# </tag>
	tag = $1.downcase
	if allowed.include?(tag) && stack.include?(tag) then
	# If allowed and on the stack
	# Then pop down the stack
	top = stack.pop
	out = "</#{top}>"
	until top == tag do
	top = stack.pop
	out << "</#{top}>"
	end
	out
	end
	elsif element =~ /\A<(\w+)\s*\/>/
	# <tag />
	tag = $1.downcase
	if allowed.include?(tag) then
	"<#{tag} />"
	end
	elsif element =~ /\A<(\w+)/ then
	# <tag ...>
	tag = $1.downcase
	if allowed.include?(tag) then
	if ! soloTags.include?(tag) then
	stack.push(tag)
	end
	if allowed[tag].length == 0 then
	# no allowed attributes
	"<#{tag}>"
	else
	# allowed attributes?
	out = "<#{tag}"
	while ( $' =~ /(\w+)=("[^"]+")/ )
	attr = $1.downcase
	valu = $2
	if allowed[tag].include?(attr) then
	out << " #{attr}=#{valu}"
	end
	end
	out << ">"
	end
	end
	end
	end

	# eat up unmatched leading >
	while result.sub!(/\A([^<]*)>/m) { $1 } do end

	# eat up unmatched trailing <
	while result.sub!(/<([^>]*)\Z/m) { $1 } do end

	# clean up the stack
	if stack.length > 0 then
	result << "</#{stack.reverse.join('></')}>"
	end

	result
	end

	end