Last active
February 10, 2017 00:30
-
-
Save norio-nomura/2a79822004e7c89228300cf19595ca99 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extension String { | |
public var unescapeHTMLUsingArrayOfUnichar: String { | |
var buffer = [unichar](repeating: 0, count: utf16.count) | |
NSString(string: self).getCharacters(&buffer) | |
var end = buffer.endIndex | |
let ampersand = unichar(UInt8(ascii: "&")) | |
let semicolon = unichar(UInt8(ascii: ";")) | |
let sharp = unichar(UInt8(ascii: "#")) | |
let hexPrefixes = ["X", "x"].map { unichar(UInt8(ascii: $0)) } | |
while let begin = buffer.prefix(upTo: end).reversed().index(of: ampersand).map({ buffer.index(before: $0.base) }) { | |
defer { end = begin } | |
// if we don't find a semicolon in the range, we don't have a sequence | |
guard let semicolonIndex = buffer[begin..<end].index(of: semicolon) else { continue } | |
let range = begin...semicolonIndex | |
// a squence must be longer than 3 (<) and less than 11 (ϑ) | |
guard 4...10 ~= range.count else { continue } | |
let character: unichar? | |
if buffer[begin + 1] == sharp { | |
let char2 = buffer[begin + 2] | |
if hexPrefixes.contains(char2) { | |
// Hex escape squences £ | |
let hexString = String(utf16Storage: buffer[begin + 3..<semicolonIndex]) | |
character = unichar(hexString, radix: 16) | |
} else { | |
// Decimal Sequences { | |
let decimalString = String(utf16Storage: buffer[begin + 2..<semicolonIndex]) | |
character = unichar(decimalString) | |
} | |
} else { | |
// "standard" sequences | |
let escapedNameRange = begin + 1..<semicolonIndex | |
let escapedName = String(utf16Storage: buffer[escapedNameRange]) | |
character = tableMap[escapedNameRange.count]?[escapedName] | |
} | |
if let character = character { | |
buffer[range] = [character] | |
} | |
} | |
return String(utf16Storage: buffer) | |
} | |
private init<T>(utf16Storage: T) where T: ContiguousStorage, T.Iterator.Element == unichar { | |
self = utf16Storage.withUnsafeBufferPointer { | |
String(utf16CodeUnits: $0.baseAddress!, count: $0.count) | |
} | |
} | |
} | |
private protocol ContiguousStorage: Sequence { | |
func withUnsafeBufferPointer<R>( | |
_ body: (UnsafeBufferPointer<Iterator.Element>) throws -> R | |
) rethrows -> R | |
} | |
extension Array: ContiguousStorage {} | |
extension ArraySlice: ContiguousStorage {} | |
extension ContiguousArray: ContiguousStorage {} | |
private func escapeMap(from array: [HTMLEscapeMap]) -> [String:unichar] { | |
var map = [String: unichar](minimumCapacity: array.count) | |
array.forEach { | |
map[$0.name] = $0.character.utf16.first! | |
} | |
return map | |
} | |
private let tableMap: [Int:[String:unichar]] = [ | |
2: escapeMap(from:unicodeHTMLEscapeMapNameLength_2), | |
3: escapeMap(from:unicodeHTMLEscapeMapNameLength_3), | |
4: escapeMap(from:unicodeHTMLEscapeMapNameLength_4), | |
5: escapeMap(from:unicodeHTMLEscapeMapNameLength_5), | |
6: escapeMap(from:unicodeHTMLEscapeMapNameLength_6), | |
7: escapeMap(from:unicodeHTMLEscapeMapNameLength_7), | |
8: escapeMap(from:unicodeHTMLEscapeMapNameLength_8), | |
] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment