mwaterfall · April 17, 2025 13:04 · bdkjones · May 27, 2024
diff --git a/StringExtensionHTML.swift b/StringExtensionHTML.swift
 // Very slightly adapted from http://stackoverflow.com/a/30141700/106244
 // 99.99% Credit to Martin R!

 // Mapping from XML/HTML character entity reference to character
 // From http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
 private let characterEntities : [String: Character] = [
    
    // XML predefined entities:
    "&quot;"     : "\"",
    "&amp;"      : "&",
    "&apos;"     : "'",
    "&lt;"       : "<",
    "&gt;"       : ">",
    
    // HTML character entity references:
    "&nbsp;"     : "\u{00A0}",
    "&iexcl;"    : "\u{00A1}",
    "&cent;"     : "\u{00A2}",
    "&pound;"    : "\u{00A3}",
    "&curren;"   : "\u{00A4}",
    "&yen;"      : "\u{00A5}",
    "&brvbar;"   : "\u{00A6}",
    "&sect;"     : "\u{00A7}",
    "&uml;"      : "\u{00A8}",
    "&copy;"     : "\u{00A9}",
    "&ordf;"     : "\u{00AA}",
    "&laquo;"    : "\u{00AB}",
    "&not;"      : "\u{00AC}",
    "&shy;"      : "\u{00AD}",
    "&reg;"      : "\u{00AE}",
    "&macr;"     : "\u{00AF}",
    "&deg;"      : "\u{00B0}",
    "&plusmn;"   : "\u{00B1}",
    "&sup2;"     : "\u{00B2}",
    "&sup3;"     : "\u{00B3}",
    "&acute;"    : "\u{00B4}",
    "&micro;"    : "\u{00B5}",
    "&para;"     : "\u{00B6}",
    "&middot;"   : "\u{00B7}",
    "&cedil;"    : "\u{00B8}",
    "&sup1;"     : "\u{00B9}",
    "&ordm;"     : "\u{00BA}",
    "&raquo;"    : "\u{00BB}",
    "&frac14;"   : "\u{00BC}",
    "&frac12;"   : "\u{00BD}",
    "&frac34;"   : "\u{00BE}",
    "&iquest;"   : "\u{00BF}",
    "&Agrave;"   : "\u{00C0}",
    "&Aacute;"   : "\u{00C1}",
    "&Acirc;"    : "\u{00C2}",
    "&Atilde;"   : "\u{00C3}",
    "&Auml;"     : "\u{00C4}",
    "&Aring;"    : "\u{00C5}",
    "&AElig;"    : "\u{00C6}",
    "&Ccedil;"   : "\u{00C7}",
    "&Egrave;"   : "\u{00C8}",
    "&Eacute;"   : "\u{00C9}",
    "&Ecirc;"    : "\u{00CA}",
    "&Euml;"     : "\u{00CB}",
    "&Igrave;"   : "\u{00CC}",
    "&Iacute;"   : "\u{00CD}",
    "&Icirc;"    : "\u{00CE}",
    "&Iuml;"     : "\u{00CF}",
    "&ETH;"      : "\u{00D0}",
    "&Ntilde;"   : "\u{00D1}",
    "&Ograve;"   : "\u{00D2}",
    "&Oacute;"   : "\u{00D3}",
    "&Ocirc;"    : "\u{00D4}",
    "&Otilde;"   : "\u{00D5}",
    "&Ouml;"     : "\u{00D6}",
    "&times;"    : "\u{00D7}",
    "&Oslash;"   : "\u{00D8}",
    "&Ugrave;"   : "\u{00D9}",
    "&Uacute;"   : "\u{00DA}",
    "&Ucirc;"    : "\u{00DB}",
    "&Uuml;"     : "\u{00DC}",
    "&Yacute;"   : "\u{00DD}",
    "&THORN;"    : "\u{00DE}",
    "&szlig;"    : "\u{00DF}",
    "&agrave;"   : "\u{00E0}",
    "&aacute;"   : "\u{00E1}",
    "&acirc;"    : "\u{00E2}",
    "&atilde;"   : "\u{00E3}",
    "&auml;"     : "\u{00E4}",
    "&aring;"    : "\u{00E5}",
    "&aelig;"    : "\u{00E6}",
    "&ccedil;"   : "\u{00E7}",
    "&egrave;"   : "\u{00E8}",
    "&eacute;"   : "\u{00E9}",
    "&ecirc;"    : "\u{00EA}",
    "&euml;"     : "\u{00EB}",
    "&igrave;"   : "\u{00EC}",
    "&iacute;"   : "\u{00ED}",
    "&icirc;"    : "\u{00EE}",
    "&iuml;"     : "\u{00EF}",
    "&eth;"      : "\u{00F0}",
    "&ntilde;"   : "\u{00F1}",
    "&ograve;"   : "\u{00F2}",
    "&oacute;"   : "\u{00F3}",
    "&ocirc;"    : "\u{00F4}",
    "&otilde;"   : "\u{00F5}",
    "&ouml;"     : "\u{00F6}",
    "&divide;"   : "\u{00F7}",
    "&oslash;"   : "\u{00F8}",
    "&ugrave;"   : "\u{00F9}",
    "&uacute;"   : "\u{00FA}",
    "&ucirc;"    : "\u{00FB}",
    "&uuml;"     : "\u{00FC}",
    "&yacute;"   : "\u{00FD}",
    "&thorn;"    : "\u{00FE}",
    "&yuml;"     : "\u{00FF}",
    "&OElig;"    : "\u{0152}",
    "&oelig;"    : "\u{0153}",
    "&Scaron;"   : "\u{0160}",
    "&scaron;"   : "\u{0161}",
    "&Yuml;"     : "\u{0178}",
    "&fnof;"     : "\u{0192}",
    "&circ;"     : "\u{02C6}",
    "&tilde;"    : "\u{02DC}",
    "&Alpha;"    : "\u{0391}",
    "&Beta;"     : "\u{0392}",
    "&Gamma;"    : "\u{0393}",
    "&Delta;"    : "\u{0394}",
    "&Epsilon;"  : "\u{0395}",
    "&Zeta;"     : "\u{0396}",
    "&Eta;"      : "\u{0397}",
    "&Theta;"    : "\u{0398}",
    "&Iota;"     : "\u{0399}",
    "&Kappa;"    : "\u{039A}",
    "&Lambda;"   : "\u{039B}",
    "&Mu;"       : "\u{039C}",
    "&Nu;"       : "\u{039D}",
    "&Xi;"       : "\u{039E}",
    "&Omicron;"  : "\u{039F}",
    "&Pi;"       : "\u{03A0}",
    "&Rho;"      : "\u{03A1}",
    "&Sigma;"    : "\u{03A3}",
    "&Tau;"      : "\u{03A4}",
    "&Upsilon;"  : "\u{03A5}",
    "&Phi;"      : "\u{03A6}",
    "&Chi;"      : "\u{03A7}",
    "&Psi;"      : "\u{03A8}",
    "&Omega;"    : "\u{03A9}",
    "&alpha;"    : "\u{03B1}",
    "&beta;"     : "\u{03B2}",
    "&gamma;"    : "\u{03B3}",
    "&delta;"    : "\u{03B4}",
    "&epsilon;"  : "\u{03B5}",
    "&zeta;"     : "\u{03B6}",
    "&eta;"      : "\u{03B7}",
    "&theta;"    : "\u{03B8}",
    "&iota;"     : "\u{03B9}",
    "&kappa;"    : "\u{03BA}",
    "&lambda;"   : "\u{03BB}",
    "&mu;"       : "\u{03BC}",
    "&nu;"       : "\u{03BD}",
    "&xi;"       : "\u{03BE}",
    "&omicron;"  : "\u{03BF}",
    "&pi;"       : "\u{03C0}",
    "&rho;"      : "\u{03C1}",
    "&sigmaf;"   : "\u{03C2}",
    "&sigma;"    : "\u{03C3}",
    "&tau;"      : "\u{03C4}",
    "&upsilon;"  : "\u{03C5}",
    "&phi;"      : "\u{03C6}",
    "&chi;"      : "\u{03C7}",
    "&psi;"      : "\u{03C8}",
    "&omega;"    : "\u{03C9}",
    "&thetasym;" : "\u{03D1}",
    "&upsih;"    : "\u{03D2}",
    "&piv;"      : "\u{03D6}",
    "&ensp;"     : "\u{2002}",
    "&emsp;"     : "\u{2003}",
    "&thinsp;"   : "\u{2009}",
    "&zwnj;"     : "\u{200C}",
    "&zwj;"      : "\u{200D}",
    "&lrm;"      : "\u{200E}",
    "&rlm;"      : "\u{200F}",
    "&ndash;"    : "\u{2013}",
    "&mdash;"    : "\u{2014}",
    "&lsquo;"    : "\u{2018}",
    "&rsquo;"    : "\u{2019}",
    "&sbquo;"    : "\u{201A}",
    "&ldquo;"    : "\u{201C}",
    "&rdquo;"    : "\u{201D}",
    "&bdquo;"    : "\u{201E}",
    "&dagger;"   : "\u{2020}",
    "&Dagger;"   : "\u{2021}",
    "&bull;"     : "\u{2022}",
    "&hellip;"   : "\u{2026}",
    "&permil;"   : "\u{2030}",
    "&prime;"    : "\u{2032}",
    "&Prime;"    : "\u{2033}",
    "&lsaquo;"   : "\u{2039}",
    "&rsaquo;"   : "\u{203A}",
    "&oline;"    : "\u{203E}",
    "&frasl;"    : "\u{2044}",
    "&euro;"     : "\u{20AC}",
    "&image;"    : "\u{2111}",
    "&weierp;"   : "\u{2118}",
    "&real;"     : "\u{211C}",
    "&trade;"    : "\u{2122}",
    "&alefsym;"  : "\u{2135}",
    "&larr;"     : "\u{2190}",
    "&uarr;"     : "\u{2191}",
    "&rarr;"     : "\u{2192}",
    "&darr;"     : "\u{2193}",
    "&harr;"     : "\u{2194}",
    "&crarr;"    : "\u{21B5}",
    "&lArr;"     : "\u{21D0}",
    "&uArr;"     : "\u{21D1}",
    "&rArr;"     : "\u{21D2}",
    "&dArr;"     : "\u{21D3}",
    "&hArr;"     : "\u{21D4}",
    "&forall;"   : "\u{2200}",
    "&part;"     : "\u{2202}",
    "&exist;"    : "\u{2203}",
    "&empty;"    : "\u{2205}",
    "&nabla;"    : "\u{2207}",
    "&isin;"     : "\u{2208}",
    "&notin;"    : "\u{2209}",
    "&ni;"       : "\u{220B}",
    "&prod;"     : "\u{220F}",
    "&sum;"      : "\u{2211}",
    "&minus;"    : "\u{2212}",
    "&lowast;"   : "\u{2217}",
    "&radic;"    : "\u{221A}",
    "&prop;"     : "\u{221D}",
    "&infin;"    : "\u{221E}",
    "&ang;"      : "\u{2220}",
    "&and;"      : "\u{2227}",
    "&or;"       : "\u{2228}",
    "&cap;"      : "\u{2229}",
    "&cup;"      : "\u{222A}",
    "&int;"      : "\u{222B}",
    "&there4;"   : "\u{2234}",
    "&sim;"      : "\u{223C}",
    "&cong;"     : "\u{2245}",
    "&asymp;"    : "\u{2248}",
    "&ne;"       : "\u{2260}",
    "&equiv;"    : "\u{2261}",
    "&le;"       : "\u{2264}",
    "&ge;"       : "\u{2265}",
    "&sub;"      : "\u{2282}",
    "&sup;"      : "\u{2283}",
    "&nsub;"     : "\u{2284}",
    "&sube;"     : "\u{2286}",
    "&supe;"     : "\u{2287}",
    "&oplus;"    : "\u{2295}",
    "&otimes;"   : "\u{2297}",
    "&perp;"     : "\u{22A5}",
    "&sdot;"     : "\u{22C5}",
    "&lceil;"    : "\u{2308}",
    "&rceil;"    : "\u{2309}",
    "&lfloor;"   : "\u{230A}",
    "&rfloor;"   : "\u{230B}",
    "&lang;"     : "\u{2329}",
    "&rang;"     : "\u{232A}",
    "&loz;"      : "\u{25CA}",
    "&spades;"   : "\u{2660}",
    "&clubs;"    : "\u{2663}",
    "&hearts;"   : "\u{2665}",
    "&diams;"    : "\u{2666}",
    
 ]

 extension String {
    
    /// Returns a new string made by replacing in the `String`
    /// all HTML character entity references with the corresponding
    /// character.
    var stringByDecodingHTMLEntities: String {
        return decodeHTMLEntities().decodedString
    }

    /// Returns a tuple containing the string made by relpacing in the 
    /// `String` all HTML character entity references with the corresponding
    /// character. Also returned is an array of offset information describing
    /// the location and length offsets for each replacement. This allows 
    /// for the correct adjust any attributes that may be associated with
    /// with substrings within the `String`
    func decodeHTMLEntities() -> (decodedString: String, replacementOffsets: [(index: String.Index, offset: String.Index.Distance)]) {
        
        // ===== Utility functions =====
        
        // Record the index offsets of each replacement
        // This allows anyone to correctly adjust any attributes that may be 
        // associated with substrings within the string
        var replacementOffsets: [(index: String.Index, offset: String.Index.Distance)] = []
        
        // Convert the number in the string to the corresponding
        // Unicode character, e.g.
        //    decodeNumeric("64", 10)   --> "@"
        //    decodeNumeric("20ac", 16) --> "€"
        func decodeNumeric(string : String, base : Int32) -> Character? {
            let code = UInt32(strtoul(string, nil, base))
            return Character(UnicodeScalar(code))
        }
        
        // Decode the HTML character entity to the corresponding
        // Unicode character, return `nil` for invalid input.
        //     decode("&#64;")    --> "@"
        //     decode("&#x20ac;") --> "€"
        //     decode("&lt;")     --> "<"
        //     decode("&foo;")    --> nil
        func decode(entity : String) -> Character? {
            if entity.hasPrefix("&#x") || entity.hasPrefix("&#X"){
                return decodeNumeric(entity.substringFromIndex(advance(entity.startIndex, 3)), 16)
            } else if entity.hasPrefix("&#") {
                return decodeNumeric(entity.substringFromIndex(advance(entity.startIndex, 2)), 10)
            } else {
                return characterEntities[entity]
            }
        }
        
        // ===== Method starts here =====
        
        var result = ""
        var position = startIndex
        
        // Find the next '&' and copy the characters preceding it to `result`:
        while let ampRange = self.rangeOfString("&", range: position ..< endIndex) {
            result.extend(self[position ..< ampRange.startIndex])
            position = ampRange.startIndex
            
            // Find the next ';' and copy everything from '&' to ';' into `entity`
            if let semiRange = self.rangeOfString(";", range: position ..< endIndex) {
                let entity = self[position ..< semiRange.endIndex]
                if let decoded = decode(entity) {
                    
                    // Replace by decoded character:
                    result.append(decoded)
                    
                    // Record offset
                    let offset = (index: semiRange.endIndex, offset: 1 - distance(position, semiRange.endIndex))
                    replacementOffsets.append(offset)
                    
                } else {
                    
                    // Invalid entity, copy verbatim:
                    result.extend(entity)
                    
                }
                position = semiRange.endIndex
            } else {
                // No matching ';'.
                break
            }
        }
        
        // Copy remaining characters to `result`:
        result.extend(self[position ..< endIndex])
        
        // Return results
        return (decodedString: result, replacementOffsets: replacementOffsets)
        
    }
    
 }
	// Very slightly adapted from http://stackoverflow.com/a/30141700/106244
	// 99.99% Credit to Martin R!

	// Mapping from XML/HTML character entity reference to character
	// From http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
	private let characterEntities : [String: Character] = [

	// XML predefined entities:
	""" : "\"",
	"&" : "&",
	"'" : "'",
	"<" : "<",
	">" : ">",

	// HTML character entity references:
	" " : "\u{00A0}",
	"¡" : "\u{00A1}",
	"¢" : "\u{00A2}",
	"£" : "\u{00A3}",
	"¤" : "\u{00A4}",
	"¥" : "\u{00A5}",
	"¦" : "\u{00A6}",
	"§" : "\u{00A7}",
	"¨" : "\u{00A8}",
	"©" : "\u{00A9}",
	"ª" : "\u{00AA}",
	"«" : "\u{00AB}",
	"¬" : "\u{00AC}",
	"" : "\u{00AD}",
	"®" : "\u{00AE}",
	"¯" : "\u{00AF}",
	"°" : "\u{00B0}",
	"±" : "\u{00B1}",
	"²" : "\u{00B2}",
	"³" : "\u{00B3}",
	"´" : "\u{00B4}",
	"µ" : "\u{00B5}",
	"¶" : "\u{00B6}",
	"·" : "\u{00B7}",
	"¸" : "\u{00B8}",
	"¹" : "\u{00B9}",
	"º" : "\u{00BA}",
	"»" : "\u{00BB}",
	"¼" : "\u{00BC}",
	"½" : "\u{00BD}",
	"¾" : "\u{00BE}",
	"¿" : "\u{00BF}",
	"À" : "\u{00C0}",
	"Á" : "\u{00C1}",
	"Â" : "\u{00C2}",
	"Ã" : "\u{00C3}",
	"Ä" : "\u{00C4}",
	"Å" : "\u{00C5}",
	"Æ" : "\u{00C6}",
	"Ç" : "\u{00C7}",
	"È" : "\u{00C8}",
	"É" : "\u{00C9}",
	"Ê" : "\u{00CA}",
	"Ë" : "\u{00CB}",
	"Ì" : "\u{00CC}",
	"Í" : "\u{00CD}",
	"Î" : "\u{00CE}",
	"Ï" : "\u{00CF}",
	"Ð" : "\u{00D0}",
	"Ñ" : "\u{00D1}",
	"Ò" : "\u{00D2}",
	"Ó" : "\u{00D3}",
	"Ô" : "\u{00D4}",
	"Õ" : "\u{00D5}",
	"Ö" : "\u{00D6}",
	"×" : "\u{00D7}",
	"Ø" : "\u{00D8}",
	"Ù" : "\u{00D9}",
	"Ú" : "\u{00DA}",
	"Û" : "\u{00DB}",
	"Ü" : "\u{00DC}",
	"Ý" : "\u{00DD}",
	"Þ" : "\u{00DE}",
	"ß" : "\u{00DF}",
	"à" : "\u{00E0}",
	"á" : "\u{00E1}",
	"â" : "\u{00E2}",
	"ã" : "\u{00E3}",
	"ä" : "\u{00E4}",
	"å" : "\u{00E5}",
	"æ" : "\u{00E6}",
	"ç" : "\u{00E7}",
	"è" : "\u{00E8}",
	"é" : "\u{00E9}",
	"ê" : "\u{00EA}",
	"ë" : "\u{00EB}",
	"ì" : "\u{00EC}",
	"í" : "\u{00ED}",
	"î" : "\u{00EE}",
	"ï" : "\u{00EF}",
	"ð" : "\u{00F0}",
	"ñ" : "\u{00F1}",
	"ò" : "\u{00F2}",
	"ó" : "\u{00F3}",
	"ô" : "\u{00F4}",
	"õ" : "\u{00F5}",
	"ö" : "\u{00F6}",
	"÷" : "\u{00F7}",
	"ø" : "\u{00F8}",
	"ù" : "\u{00F9}",
	"ú" : "\u{00FA}",
	"û" : "\u{00FB}",
	"ü" : "\u{00FC}",
	"ý" : "\u{00FD}",
	"þ" : "\u{00FE}",
	"ÿ" : "\u{00FF}",
	"&OElig;" : "\u{0152}",
	"&oelig;" : "\u{0153}",
	"&Scaron;" : "\u{0160}",
	"&scaron;" : "\u{0161}",
	"&Yuml;" : "\u{0178}",
	"&fnof;" : "\u{0192}",
	"&circ;" : "\u{02C6}",
	"&tilde;" : "\u{02DC}",
	"Α" : "\u{0391}",
	"Β" : "\u{0392}",
	"Γ" : "\u{0393}",
	"Δ" : "\u{0394}",
	"Ε" : "\u{0395}",
	"Ζ" : "\u{0396}",
	"Η" : "\u{0397}",
	"Θ" : "\u{0398}",
	"Ι" : "\u{0399}",
	"Κ" : "\u{039A}",
	"Λ" : "\u{039B}",
	"Μ" : "\u{039C}",
	"Ν" : "\u{039D}",
	"Ξ" : "\u{039E}",
	"Ο" : "\u{039F}",
	"Π" : "\u{03A0}",
	"Ρ" : "\u{03A1}",
	"Σ" : "\u{03A3}",
	"Τ" : "\u{03A4}",
	"Υ" : "\u{03A5}",
	"Φ" : "\u{03A6}",
	"Χ" : "\u{03A7}",
	"Ψ" : "\u{03A8}",
	"Ω" : "\u{03A9}",
	"α" : "\u{03B1}",
	"β" : "\u{03B2}",
	"γ" : "\u{03B3}",
	"δ" : "\u{03B4}",
	"ε" : "\u{03B5}",
	"ζ" : "\u{03B6}",
	"η" : "\u{03B7}",
	"θ" : "\u{03B8}",
	"ι" : "\u{03B9}",
	"κ" : "\u{03BA}",
	"λ" : "\u{03BB}",
	"μ" : "\u{03BC}",
	"ν" : "\u{03BD}",
	"ξ" : "\u{03BE}",
	"ο" : "\u{03BF}",
	"π" : "\u{03C0}",
	"ρ" : "\u{03C1}",
	"&sigmaf;" : "\u{03C2}",
	"σ" : "\u{03C3}",
	"τ" : "\u{03C4}",
	"υ" : "\u{03C5}",
	"φ" : "\u{03C6}",
	"χ" : "\u{03C7}",
	"ψ" : "\u{03C8}",
	"ω" : "\u{03C9}",
	"&thetasym;" : "\u{03D1}",
	"&upsih;" : "\u{03D2}",
	"ϖ" : "\u{03D6}",
	"&ensp;" : "\u{2002}",
	"&emsp;" : "\u{2003}",
	" " : "\u{2009}",
	"&zwnj;" : "\u{200C}",
	"&zwj;" : "\u{200D}",
	"&lrm;" : "\u{200E}",
	"&rlm;" : "\u{200F}",
	"–" : "\u{2013}",
	"—" : "\u{2014}",
	"‘" : "\u{2018}",
	"’" : "\u{2019}",
	"&sbquo;" : "\u{201A}",
	"“" : "\u{201C}",
	"”" : "\u{201D}",
	"&bdquo;" : "\u{201E}",
	"&dagger;" : "\u{2020}",
	"&Dagger;" : "\u{2021}",
	"•" : "\u{2022}",
	"…" : "\u{2026}",
	"&permil;" : "\u{2030}",
	"′" : "\u{2032}",
	"″" : "\u{2033}",
	"&lsaquo;" : "\u{2039}",
	"&rsaquo;" : "\u{203A}",
	"&oline;" : "\u{203E}",
	"&frasl;" : "\u{2044}",
	"€" : "\u{20AC}",
	"&image;" : "\u{2111}",
	"&weierp;" : "\u{2118}",
	"&real;" : "\u{211C}",
	"™" : "\u{2122}",
	"&alefsym;" : "\u{2135}",
	"←" : "\u{2190}",
	"↑" : "\u{2191}",
	"→" : "\u{2192}",
	"↓" : "\u{2193}",
	"↔" : "\u{2194}",
	"&crarr;" : "\u{21B5}",
	"⇐" : "\u{21D0}",
	"&uArr;" : "\u{21D1}",
	"⇒" : "\u{21D2}",
	"&dArr;" : "\u{21D3}",
	"⇔" : "\u{21D4}",
	"∀" : "\u{2200}",
	"∂" : "\u{2202}",
	"∃" : "\u{2203}",
	"∅" : "\u{2205}",
	"∇" : "\u{2207}",
	"∈" : "\u{2208}",
	"∉" : "\u{2209}",
	"&ni;" : "\u{220B}",
	"∏" : "\u{220F}",
	"∑" : "\u{2211}",
	"−" : "\u{2212}",
	"&lowast;" : "\u{2217}",
	"√" : "\u{221A}",
	"&prop;" : "\u{221D}",
	"∞" : "\u{221E}",
	"&ang;" : "\u{2220}",
	"&and;" : "\u{2227}",
	"&or;" : "\u{2228}",
	"∩" : "\u{2229}",
	"∪" : "\u{222A}",
	"∫" : "\u{222B}",
	"&there4;" : "\u{2234}",
	"&sim;" : "\u{223C}",
	"&cong;" : "\u{2245}",
	"≈" : "\u{2248}",
	"≠" : "\u{2260}",
	"&equiv;" : "\u{2261}",
	"≤" : "\u{2264}",
	"≥" : "\u{2265}",
	"⊂" : "\u{2282}",
	"⊃" : "\u{2283}",
	"&nsub;" : "\u{2284}",
	"&sube;" : "\u{2286}",
	"&supe;" : "\u{2287}",
	"&oplus;" : "\u{2295}",
	"&otimes;" : "\u{2297}",
	"&perp;" : "\u{22A5}",
	"⋅" : "\u{22C5}",
	"&lceil;" : "\u{2308}",
	"&rceil;" : "\u{2309}",
	"&lfloor;" : "\u{230A}",
	"&rfloor;" : "\u{230B}",
	"&lang;" : "\u{2329}",
	"&rang;" : "\u{232A}",
	"&loz;" : "\u{25CA}",
	"&spades;" : "\u{2660}",
	"&clubs;" : "\u{2663}",
	"&hearts;" : "\u{2665}",
	"&diams;" : "\u{2666}",

	]

	extension String {

	/// Returns a new string made by replacing in the `String`
	/// all HTML character entity references with the corresponding
	/// character.
	var stringByDecodingHTMLEntities: String {
	return decodeHTMLEntities().decodedString
	}

	/// Returns a tuple containing the string made by relpacing in the
	/// `String` all HTML character entity references with the corresponding
	/// character. Also returned is an array of offset information describing
	/// the location and length offsets for each replacement. This allows
	/// for the correct adjust any attributes that may be associated with
	/// with substrings within the `String`
	func decodeHTMLEntities() -> (decodedString: String, replacementOffsets: [(index: String.Index, offset: String.Index.Distance)]) {

	// ===== Utility functions =====

	// Record the index offsets of each replacement
	// This allows anyone to correctly adjust any attributes that may be
	// associated with substrings within the string
	var replacementOffsets: [(index: String.Index, offset: String.Index.Distance)] = []

	// Convert the number in the string to the corresponding
	// Unicode character, e.g.
	// decodeNumeric("64", 10) --> "@"
	// decodeNumeric("20ac", 16) --> "€"
	func decodeNumeric(string : String, base : Int32) -> Character? {
	let code = UInt32(strtoul(string, nil, base))
	return Character(UnicodeScalar(code))
	}

	// Decode the HTML character entity to the corresponding
	// Unicode character, return `nil` for invalid input.
	// decode("@") --> "@"
	// decode("€") --> "€"
	// decode("<") --> "<"
	// decode("&foo;") --> nil
	func decode(entity : String) -> Character? {
	if entity.hasPrefix("&#x") \|\| entity.hasPrefix("&#X"){
	return decodeNumeric(entity.substringFromIndex(advance(entity.startIndex, 3)), 16)
	} else if entity.hasPrefix("&#") {
	return decodeNumeric(entity.substringFromIndex(advance(entity.startIndex, 2)), 10)
	} else {
	return characterEntities[entity]
	}
	}

	// ===== Method starts here =====

	var result = ""
	var position = startIndex

	// Find the next '&' and copy the characters preceding it to `result`:
	while let ampRange = self.rangeOfString("&", range: position ..< endIndex) {
	result.extend(self[position ..< ampRange.startIndex])
	position = ampRange.startIndex

	// Find the next ';' and copy everything from '&' to ';' into `entity`
	if let semiRange = self.rangeOfString(";", range: position ..< endIndex) {
	let entity = self[position ..< semiRange.endIndex]
	if let decoded = decode(entity) {

	// Replace by decoded character:
	result.append(decoded)

	// Record offset
	let offset = (index: semiRange.endIndex, offset: 1 - distance(position, semiRange.endIndex))
	replacementOffsets.append(offset)

	} else {

	// Invalid entity, copy verbatim:
	result.extend(entity)

	}
	position = semiRange.endIndex
	} else {
	// No matching ';'.
	break
	}
	}

	// Copy remaining characters to `result`:
	result.extend(self[position ..< endIndex])

	// Return results
	return (decodedString: result, replacementOffsets: replacementOffsets)

	}

	}