Last active
November 27, 2019 15:56
-
-
Save zonble/c6810d3a625d34360a22ba6eb0e9311b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
enum ConverterError: Error { | |
case outOfBounds | |
} | |
func UTF8ToUTF16(_ data: Data) -> [unichar] { | |
func decode(index: Int, length: Int) throws -> unichar { | |
if index + length > data.count { | |
throw ConverterError.outOfBounds | |
} | |
var rtn: unichar = 0 | |
var currentCode = data[index] | |
for bitToClear in length..<8 { | |
currentCode &= ~(1 << bitToClear) | |
} | |
rtn |= unichar(currentCode) << Int(6 * (length-1)) | |
for i in 1..<length { | |
rtn |= unichar((data[index + i] & ~(1 << 7))) << Int(6 * (length-i-1)) | |
} | |
return rtn | |
} | |
let matches:[(UInt8, Int)] = [ | |
(0b11111100, 6), | |
(0b11111000, 5), | |
(0b11110000, 4), | |
(0b11100000, 3), | |
(0b11000000, 2), | |
] | |
var index: Int = 0 | |
var rtn = [unichar]() | |
mainLoop: while index < data.count { | |
let byte = data[index] | |
for match in matches { | |
if (match.0 & byte) == match.0 { | |
let length = match.1 | |
do { rtn.append(try decode(index: index, length: length)) | |
} catch { break mainLoop } | |
index += length; continue mainLoop | |
} | |
} | |
rtn.append(unichar(byte)); index += 1 | |
} | |
return rtn | |
} | |
var str = "中文 Héllö World ⇾ ⇾" | |
let x = str.utf16.map { (i) -> Int in | |
return Int(i) | |
} | |
print(x) | |
let result = UTF8ToUTF16(str.data(using: String.Encoding.utf8)!) | |
print(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment