Skip to content

Instantly share code, notes, and snippets.

@zonble
Last active November 27, 2019 15:56
Show Gist options
  • Save zonble/c6810d3a625d34360a22ba6eb0e9311b to your computer and use it in GitHub Desktop.
Save zonble/c6810d3a625d34360a22ba6eb0e9311b to your computer and use it in GitHub Desktop.
import Foundation
enum ConverterError: Error {
case outOfBounds
}
func UTF8ToUTF16(_ data: Data) -> [unichar] {
func decode(index: Int, length: Int) throws -> unichar {
if index + length > data.count {
throw ConverterError.outOfBounds
}
var rtn: unichar = 0
var currentCode = data[index]
for bitToClear in length..<8 {
currentCode &= ~(1 << bitToClear)
}
rtn |= unichar(currentCode) << Int(6 * (length-1))
for i in 1..<length {
rtn |= unichar((data[index + i] & ~(1 << 7))) << Int(6 * (length-i-1))
}
return rtn
}
let matches:[(UInt8, Int)] = [
(0b11111100, 6),
(0b11111000, 5),
(0b11110000, 4),
(0b11100000, 3),
(0b11000000, 2),
]
var index: Int = 0
var rtn = [unichar]()
mainLoop: while index < data.count {
let byte = data[index]
for match in matches {
if (match.0 & byte) == match.0 {
let length = match.1
do { rtn.append(try decode(index: index, length: length))
} catch { break mainLoop }
index += length; continue mainLoop
}
}
rtn.append(unichar(byte)); index += 1
}
return rtn
}
var str = "中文 Héllö World ⇾ ⇾"
let x = str.utf16.map { (i) -> Int in
return Int(i)
}
print(x)
let result = UTF8ToUTF16(str.data(using: String.Encoding.utf8)!)
print(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment