Created
June 20, 2011 19:24
-
-
Save itochan/1036353 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
class CGI | |
@@accept_charset="UTF-8" unless defined?(@@accept_charset) | |
# URL-encode a string. | |
# url_encoded_string = CGI::escape("'Stop!' said Fred") | |
# # => "%27Stop%21%27+said+Fred" | |
def CGI::escape(string) | |
string.gsub(/([^ a-zA-Z0-9_.-]+)/) do | |
'%' + $1.unpack('H2' * $1.bytesize).join('%').upcase | |
end.tr(' ', '+') | |
end | |
# URL-decode a string with encoding(optional). | |
# string = CGI::unescape("%27Stop%21%27+said+Fred") | |
# # => "'Stop!' said Fred" | |
def CGI::unescape(string,encoding=@@accept_charset) | |
str=string.tr('+', ' ').force_encoding(Encoding::ASCII_8BIT).gsub(/((?:%[0-9a-fA-F]{2})+)/) do | |
[$1.delete('%')].pack('H*') | |
end.force_encoding(encoding) | |
str.valid_encoding? ? str : str.force_encoding(string.encoding) | |
end | |
# The set of special characters and their escaped values | |
TABLE_FOR_ESCAPE_HTML__ = { | |
'"' => '"', | |
'\'' => ''', | |
'&' => '&', | |
'<' => '<', | |
'>' => '>', | |
' ' => ' ', | |
'¡' => '¡', | |
'¢' => '¢', | |
'£' => '£', | |
'¤' => '¤', | |
'¥' => '¥', | |
'¦' => '¦', | |
'§' => '§', | |
'¨' => '¨', | |
'©' => '©', | |
'ª' => 'ª', | |
'«' => '«', | |
'¬' => '¬', | |
'®' => '®', | |
'¯' => '¯', | |
'°' => '°', | |
'±' => '±', | |
'²' => '²', | |
'³' => '³', | |
'´' => '´', | |
'µ' => 'µ', | |
'¶' => '¶', | |
'·' => '·', | |
'¸' => '¸', | |
'¹' => '¹', | |
'º' => 'º', | |
'»' => '»', | |
'¼' => '¼', | |
'½' => '½', | |
'¾' => '¾', | |
'¿' => '¿', | |
'×' => '×', | |
'÷' => '÷', | |
'À' => 'À', | |
'Á' => 'Á', | |
'Â' => 'Â', | |
'Ã' => 'Ã', | |
'Ä' => 'Ä', | |
'Å' => 'Å', | |
'Æ' => 'Æ', | |
'Ç' => 'Ç', | |
'È' => 'È', | |
'É' => 'É', | |
'Ê' => 'Ê', | |
'Ë' => 'Ë', | |
'Ì' => 'Ì', | |
'Í' => 'Í', | |
'Î' => 'Î', | |
'Ï' => 'Ï', | |
'Ð' => 'Ð', | |
'Ñ' => 'Ñ', | |
'Ò' => 'Ò', | |
'Ó' => 'Ó', | |
'Ô' => 'Ô', | |
'Õ' => 'Õ', | |
'Ö' => 'Ö', | |
'Ø' => 'Ø', | |
'Ù' => 'Ù', | |
'Ú' => 'Ú', | |
'Û' => 'Û', | |
'Ü' => 'Ü', | |
'Ý' => 'Ý', | |
'Þ' => 'Þ', | |
'ß' => 'ß', | |
'à' => 'à', | |
'á' => 'á', | |
'â' => 'â', | |
'ã' => 'ã', | |
'ä' => 'ä', | |
'å' => 'å', | |
'æ' => 'æ', | |
'ç' => 'ç', | |
'è' => 'è', | |
'é' => 'é', | |
'ê' => 'ê', | |
'ë' => 'ë', | |
'ì' => 'ì', | |
'í' => 'í', | |
'î' => 'î', | |
'ï' => 'ï', | |
'ð' => 'ð', | |
'ñ' => 'ñ', | |
'ò' => 'ò', | |
'ó' => 'ó', | |
'ô' => 'ô', | |
'õ' => 'õ', | |
'ö' => 'ö', | |
'ø' => 'ø', | |
'ù' => 'ù', | |
'ú' => 'ú', | |
'û' => 'û', | |
'ü' => 'ü', | |
'ý' => 'ý', | |
'þ' => 'þ', | |
'ÿ' => 'ÿ', | |
'∀' => '∀', | |
'∂' => '∂', | |
'∃' => '∃', | |
'∅' => '∅', | |
'∇' => '∇', | |
'∈' => '∈', | |
'∉' => '∉', | |
'∋' => '∋', | |
'∏' => '∏', | |
'∑' => '∑', | |
'−' => '−', | |
'∗' => '∗', | |
'√' => '√', | |
'∝' => '∝', | |
'∞' => '∞', | |
'∠' => '∠', | |
'∧' => '∧', | |
'∨' => '∨', | |
'∩' => '∩', | |
'∪' => '∪', | |
'∫' => '∫', | |
'∴' => '∴', | |
'∼' => '∼', | |
'≅' => '≅', | |
'≈' => '≈', | |
'≠' => '≠', | |
'≡' => '≡', | |
'≤' => '≤', | |
'≥' => '≥', | |
'⊂' => '⊂', | |
'⊃' => '⊃', | |
'⊄' => '⊄', | |
'⊆' => '⊆', | |
'⊇' => '⊇', | |
'⊕' => '⊕', | |
'⊗' => '⊗', | |
'⊥' => '⊥', | |
'⋅' => '⋅', | |
'Α' => 'Α', | |
'Β' => 'Β', | |
'Γ' => 'Γ', | |
'Δ' => 'Δ', | |
'Ε' => 'Ε', | |
'Ζ' => 'Ζ', | |
'Η' => 'Η', | |
'Θ' => 'Θ', | |
'Ι' => 'Ι', | |
'Κ' => 'Κ', | |
'Λ' => 'Λ', | |
'Μ' => 'Μ', | |
'Ν' => 'Ν', | |
'Ξ' => 'Ξ', | |
'Ο' => 'Ο', | |
'Π' => 'Π', | |
'Ρ' => 'Ρ', | |
'Σ' => 'Σ', | |
'Τ' => 'Τ', | |
'Υ' => 'Υ', | |
'Φ' => 'Φ', | |
'Χ' => 'Χ', | |
'Ψ' => 'Ψ', | |
'Ω' => 'Ω', | |
'α' => 'α', | |
'β' => 'β', | |
'γ' => 'γ', | |
'δ' => 'δ', | |
'ε' => 'ε', | |
'ζ' => 'ζ', | |
'η' => 'η', | |
'θ' => 'θ', | |
'ι' => 'ι', | |
'κ' => 'κ', | |
'λ' => 'λ', | |
'μ' => 'μ', | |
'ν' => 'ν', | |
'ξ' => 'ξ', | |
'ο' => 'ο', | |
'π' => 'π', | |
'ρ' => 'ρ', | |
'ς' => 'ς', | |
'σ' => 'σ', | |
'τ' => 'τ', | |
'υ' => 'υ', | |
'φ' => 'φ', | |
'χ' => 'χ', | |
'ψ' => 'ψ', | |
'ω' => 'ω', | |
'ϑ' => 'ϑ', | |
'ϒ' => 'ϒ', | |
'ϖ' => 'ϖ', | |
'Œ' => 'Œ', | |
'œ' => 'œ', | |
'Š' => 'Š', | |
'š' => 'š', | |
'Ÿ' => 'Ÿ', | |
'ƒ' => 'ƒ', | |
'ˆ' => 'ˆ', | |
'˜' => '˜', | |
'–' => '–', | |
'—' => '—', | |
'‘' => '‘', | |
'’' => '’', | |
'‚' => '‚', | |
'“' => '“', | |
'”' => '”', | |
'„' => '„', | |
'†' => '†', | |
'‡' => '‡', | |
'•' => '•', | |
'…' => '…', | |
'‰' => '‰', | |
'′' => '′', | |
'″' => '″', | |
'‹' => '‹', | |
'›' => '›', | |
'‾' => '‾', | |
'€' => '€', | |
'™' => '™', | |
'←' => '←', | |
'↑' => '↑', | |
'→' => '→', | |
'↓' => '↓', | |
'↔' => '↔', | |
'↵' => '↵', | |
'⌈' => '⌈', | |
'⌉' => '⌉', | |
'⌊' => '⌊', | |
'⌋' => '⌋', | |
'◊' => '◊', | |
'♠' => '♠', | |
'♣' => '♣', | |
'♥' => '♥', | |
'♦' => '♦', | |
} | |
TABLE_FOR_UNESCAPE_HTML__ = Hash[TABLE_FOR_ESCAPE_HTML__.map{|k, v| [v, k] }] | |
# Escape special characters in HTML, namely &\"<> | |
# CGI::escapeHTML('Usage: foo "bar" <baz>') | |
# # => "Usage: foo "bar" <baz>" | |
def CGI::escapeHTML(string) | |
string.gsub(/[&\"<>]/, TABLE_FOR_ESCAPE_HTML__) | |
end | |
# Unescape a string that has been HTML-escaped | |
# CGI::unescapeHTML("Usage: foo "bar" <baz>") | |
# # => "Usage: foo \"bar\" <baz>" | |
def CGI::unescapeHTML(string) | |
enc = string.encoding | |
if [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].include?(enc) | |
return string.gsub(Regexp.new('&([0-9A-Za-z]+|\#[0-9]+|\#x[0-9A-Fa-f]+);'.encode(enc))) do | |
case $1.encode("US-ASCII") | |
when /\\A#0*(\\d+)\\z/ then $1.to_i.chr(enc) | |
when /\\A#x([0-9a-f]+)\\z/i then $1.hex.chr(enc) | |
else | |
TABLE_FOR_UNESCAPE_HTML__["&" + $1 + ";"] | |
end | |
end | |
end | |
asciicompat = Encoding.compatible?(string, "a") | |
string.gsub(/&([0-9A-Za-z]+|\#[0-9]+|\#x[0-9A-Fa-f]+);/) do | |
match = $1.dup | |
case match | |
when /[a-zA-Z0-9]+/ | |
if enc == Encoding::UTF_8 or | |
TABLE_FOR_UNESCAPE_HTML__['&' + $1 + ';'] | |
when /\A#0*(\d+)\z/ | |
n = $1.to_i | |
if enc == Encoding::UTF_8 or | |
enc == Encoding::ISO_8859_1 && n < 256 or | |
asciicompat && n < 128 | |
n.chr(enc) | |
else | |
"&##{$1};" | |
end | |
when /\A#x([0-9a-f]+)\z/i | |
n = $1.hex | |
if enc == Encoding::UTF_8 or | |
enc == Encoding::ISO_8859_1 && n < 256 or | |
asciicompat && n < 128 | |
n.chr(enc) | |
else | |
"&#x#{$1};" | |
end | |
else | |
"&#{match};" | |
end | |
end | |
end | |
# Synonym for CGI::escapeHTML(str) | |
def CGI::escape_html(str) | |
escapeHTML(str) | |
end | |
# Synonym for CGI::unescapeHTML(str) | |
def CGI::unescape_html(str) | |
unescapeHTML(str) | |
end | |
# Escape only the tags of certain HTML elements in +string+. | |
# | |
# Takes an element or elements or array of elements. Each element | |
# is specified by the name of the element, without angle brackets. | |
# This matches both the start and the end tag of that element. | |
# The attribute list of the open tag will also be escaped (for | |
# instance, the double-quotes surrounding attribute values). | |
# | |
# print CGI::escapeElement('<BR><A HREF="url"></A>', "A", "IMG") | |
# # "<BR><A HREF="url"></A>" | |
# | |
# print CGI::escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"]) | |
# # "<BR><A HREF="url"></A>" | |
def CGI::escapeElement(string, *elements) | |
elements = elements[0] if elements[0].kind_of?(Array) | |
unless elements.empty? | |
string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do | |
CGI::escapeHTML($&) | |
end | |
else | |
string | |
end | |
end | |
# Undo escaping such as that done by CGI::escapeElement() | |
# | |
# print CGI::unescapeElement( | |
# CGI::escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG") | |
# # "<BR><A HREF="url"></A>" | |
# | |
# print CGI::unescapeElement( | |
# CGI::escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"]) | |
# # "<BR><A HREF="url"></A>" | |
def CGI::unescapeElement(string, *elements) | |
elements = elements[0] if elements[0].kind_of?(Array) | |
unless elements.empty? | |
string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do | |
CGI::unescapeHTML($&) | |
end | |
else | |
string | |
end | |
end | |
# Synonym for CGI::escapeElement(str) | |
def CGI::escape_element(str) | |
escapeElement(str) | |
end | |
# Synonym for CGI::unescapeElement(str) | |
def CGI::unescape_element(str) | |
unescapeElement(str) | |
end | |
# Abbreviated day-of-week names specified by RFC 822 | |
RFC822_DAYS = %w[ Sun Mon Tue Wed Thu Fri Sat ] | |
# Abbreviated month names specified by RFC 822 | |
RFC822_MONTHS = %w[ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ] | |
# Format a +Time+ object as a String using the format specified by RFC 1123. | |
# | |
# CGI::rfc1123_date(Time.now) | |
# # Sat, 01 Jan 2000 00:00:00 GMT | |
def CGI::rfc1123_date(time) | |
t = time.clone.gmtime | |
return format("%s, %.2d %s %.4d %.2d:%.2d:%.2d GMT", | |
RFC822_DAYS[t.wday], t.day, RFC822_MONTHS[t.month-1], t.year, | |
t.hour, t.min, t.sec) | |
end | |
# Prettify (indent) an HTML string. | |
# | |
# +string+ is the HTML string to indent. +shift+ is the indentation | |
# unit to use; it defaults to two spaces. | |
# | |
# print CGI::pretty("<HTML><BODY></BODY></HTML>") | |
# # <HTML> | |
# # <BODY> | |
# # </BODY> | |
# # </HTML> | |
# | |
# print CGI::pretty("<HTML><BODY></BODY></HTML>", "\t") | |
# # <HTML> | |
# # <BODY> | |
# # </BODY> | |
# # </HTML> | |
# | |
def CGI::pretty(string, shift = " ") | |
lines = string.gsub(/(?!\A)<.*?>/m, "\n\\0").gsub(/<.*?>(?!\n)/m, "\\0\n") | |
end_pos = 0 | |
while end_pos = lines.index(/^<\/(\w+)/, end_pos) | |
element = $1.dup | |
start_pos = lines.rindex(/^\s*<#{element}/i, end_pos) | |
lines[start_pos ... end_pos] = "__" + lines[start_pos ... end_pos].gsub(/\n(?!\z)/, "\n" + shift) + "__" | |
end | |
lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1') | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment