Last active
August 29, 2015 14:05
-
-
Save ilyannn/76934ffc80738685c22c to your computer and use it in GitHub Desktop.
UTF8 conversion routines test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env xcrun swift -O3 | |
// | |
// utf.swift | |
// | |
// | |
// Created by Ilya Nikokoshev on 8/22/14. | |
// | |
// | |
import Foundation | |
var value:UInt32 = 0x1FFFF | |
// The original. | |
func utf8_1(inout buffer: [UInt8]) { | |
// value is of type UInt32 | |
if value <= 0x007F { | |
buffer.append(UInt8(value)) | |
} | |
else if 0x0080 <= value && value <= 0x07FF { | |
buffer.append(UInt8(value / 64 + 192)) | |
buffer.append(UInt8(value % 64 + 128)) | |
} | |
else if (0x0800 <= value && value <= 0xD7FF) || (0xE000 <= value && value <= 0xFFFF) { | |
buffer.append(UInt8(value / 4096 + 224)) | |
buffer.append(UInt8((value % 4096) / 64 + 128)) | |
buffer.append(UInt8(value % 64 + 128)) | |
} | |
else { | |
buffer.append(UInt8(value / 262144 + 240)) | |
buffer.append(UInt8((value % 262144) / 4_096 + 128)) | |
buffer.append(UInt8((value % 4096) / 64 + 128)) | |
buffer.append(UInt8(value % 64 + 128)) | |
} | |
} | |
// Universal for any 32-bit number | |
func encode8(inout buffer:[UInt8]) { | |
var result:[UInt8] = [] | |
var current = value | |
func extract(bits:UInt32, plus:UInt32) { | |
result += [UInt8(current % (1 << bits) | plus)] | |
current >>= bits | |
} | |
// The ASCII set takes one byte. | |
if current < 1 << 7 { | |
extract(7, 0) | |
} else { | |
var factor:UInt32 = 1 << 6 | |
// Then we produce bytes of the form 10xxxxxx. | |
while current >= factor { | |
extract(6, 1 << 7) | |
factor >>= 1 | |
} | |
// And the first byte contains some metadata. | |
extract(6, 1 << 8 - 2 * factor) | |
} | |
buffer.extend(reverse(result)) | |
} | |
func utf8_$(inout buffer: [UInt8]) { | |
// Adds a number that fits into UInt8 into buffer. | |
let $:UInt32 -> () = { buffer.append(UInt8($0)) } | |
if value <= 0x007F { | |
$(value) | |
} | |
else if 0x0080 <= value && value <= 0x07FF { | |
$(value / 64 + 192) | |
$(value % 64 + 128) | |
} | |
else if (0x0800 <= value && value <= 0xD7FF) || (0xE000 <= value && value <= 0xFFFF) { | |
$(value / 4096 + 224) | |
$((value % 4096) / 64 + 128) | |
$(value % 64 + 128) | |
} | |
else { | |
$(value / 262144 + 240) | |
$((value % 262144) / 4_096 + 128) | |
$((value % 4096) / 64 + 128) | |
$(value % 64 + 128) | |
} | |
} | |
func utf8_2(inout buffer: [UInt8]) { | |
if value <= 0x007F { | |
buffer.append(UInt8(value)) | |
} | |
else if 0x0080 <= value && value <= 0x07FF { | |
buffer.append(UInt8(value &/ 64) &+ 192) | |
buffer.append(UInt8(value &% 64) &+ 128) | |
} | |
else if (0x0800 <= value && value <= 0xD7FF) || (0xE000 <= value && value <= 0xFFFF) { | |
buffer.append(UInt8(value &/ 4096) &+ 224) | |
buffer.append(UInt8((value &% 4096) &/ 64) &+ 128) | |
buffer.append(UInt8(value &% 64 &+ 128)) | |
} | |
else { | |
buffer.append(UInt8(value &/ 262144) &+ 240) | |
buffer.append(UInt8((value &% 262144) &/ 4096) &+ 128) | |
buffer.append(UInt8((value &% 4096) &/ 64) &+ 128) | |
buffer.append(UInt8(value &% 64) &+ 128) | |
} | |
} | |
var scalar = UnicodeScalar(value) | |
func utf8std(inout buffer: [UInt8]) { | |
buffer.extend(String(scalar).utf8) | |
} | |
var buffer_std:[UInt8] = [] | |
utf8std(&buffer_std) | |
var buffer_test1:[UInt8] = [] | |
var buffer_test2:[UInt8] = [] | |
func time(f:()->()) -> NSTimeInterval { | |
let start = NSDate() | |
var count = 0 | |
while count < 1000 { | |
count++ | |
f() | |
} | |
return -start.timeIntervalSinceNow | |
} | |
func test(prefix:String, f:(inout [UInt8]) -> ()) { | |
var buffer:[UInt8] = [] | |
println("\(prefix) \(time({f(&buffer)}))") | |
} | |
test("original ", utf8_1) | |
test("small cast", utf8_2) | |
test("with $ ", utf8_$) | |
test("universal ", encode8) | |
test("standard ", utf8std) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment