Last active
December 5, 2016 12:15
-
-
Save cevek/9842a8cd440af7641c5d2f4b20b061ca to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Utf8 parse performance: 1000 * 140kb ~ 140mb | |
// UTF8ToUTF16: 520ms | |
// Buffer.toString: 510ms | |
// Convert utf16 to utf8 performance: 1000 * 125k length | |
// UTF16ToUTF8: 420ms | |
// Buffer.toString: 1000ms | |
function UTF8ToUTF16(arr) { | |
var arrBuff = new ArrayBuffer(arr.length * 2); | |
var out = new Uint16Array(arrBuff); | |
var pos = 0; | |
var c = 0; | |
while (pos < arr.length) { | |
var c1 = arr[pos++]; | |
if (c1 < 128) { | |
out[c] = c1; | |
c++; | |
} | |
else if (c1 > 191 && c1 < 224) { | |
var c2 = arr[pos++]; | |
out[c] = (c1 & 31) << 6 | c2 & 63; | |
c++; | |
} | |
else if (c1 > 239 && c1 < 365) { | |
// Surrogate Pair | |
var c2 = arr[pos++]; | |
var c3 = arr[pos++]; | |
var c4 = arr[pos++]; | |
var u = ((c1 & 7) << 18 | (c2 & 63) << 12 | (c3 & 63) << 6 | c4 & 63) - 0x10000; | |
out[c] = (0xD800 + (u >> 10)); | |
c++; | |
out[c] = (0xDC00 + (u & 1023)); | |
c++; | |
} | |
else { | |
var c2 = arr[pos++]; | |
var c3 = arr[pos++]; | |
out[c] = (c1 & 15) << 12 | (c2 & 63) << 6 | c3 & 63; | |
c++; | |
} | |
} | |
return new Uint16Array(arrBuff, 0, c); | |
}; | |
function UTF16ToUTF8(arr) { | |
var arrBuff = new ArrayBuffer(arr.length * 2); | |
var out = new Uint8Array(arrBuff); | |
var p = 0; | |
for (var i = 0; i < arr.length; i++) { | |
var c = arr[i]; | |
if (c < 128) { | |
out[p++] = c; | |
} else if (c < 2048) { | |
out[p++] = (c >> 6) | 192; | |
out[p++] = (c & 63) | 128; | |
} else if (((c & 0xFC00) == 0xD800) && ((arr[i + 1] & 0xFC00) == 0xDC00)) { | |
// Surrogate Pair | |
c = 0x10000 + ((c & 0x03FF) << 10) + (arr[++i] & 0x03FF); | |
out[p++] = (c >> 18) | 240; | |
out[p++] = ((c >> 12) & 63) | 128; | |
out[p++] = ((c >> 6) & 63) | 128; | |
out[p++] = (c & 63) | 128; | |
} else { | |
out[p++] = (c >> 12) | 224; | |
out[p++] = ((c >> 6) & 63) | 128; | |
out[p++] = (c & 63) | 128; | |
} | |
} | |
return new Uint8Array(arrBuff, 0, p); | |
}; | |
var fs = require('fs'); | |
// make hot | |
var hotBuff = new Buffer('abcdабсд𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡”№•—', 'utf-8'); | |
for (var i = 0; i < 500; i++) { | |
var res = UTF8ToUTF16(hotBuff); | |
var res2 = UTF16ToUTF8(res); | |
} | |
var buff = fs.readFileSync('./test.html'); | |
var buffStr = buff.toString(); | |
var utf16Buff = UTF8ToUTF16(buff); | |
var utf8Buff = UTF16ToUTF8(utf16Buff); | |
// console.log(buff.length == utf8Buff.length, buffStr.length == utf16Buff.length); | |
console.time('perf'); | |
for (var i = 0; i < 1000; i++) { | |
// var res = utf8ToUTF16(buff); | |
// var res12 = UTF8ToUTF16(buff); | |
// var res23 = buff.toString(); | |
// new Buffer(buffStr); | |
var res10 = UTF16ToUTF8(utf16Buff); | |
// new Buffer(res10.buffer); | |
} | |
console.timeEnd('perf'); | |
// String.fromCharCode.apply(null, res); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment