Skip to content

Instantly share code, notes, and snippets.

@cevek
Last active December 5, 2016 12:15
Show Gist options
  • Save cevek/9842a8cd440af7641c5d2f4b20b061ca to your computer and use it in GitHub Desktop.
Save cevek/9842a8cd440af7641c5d2f4b20b061ca to your computer and use it in GitHub Desktop.
// Utf8 parse performance: 1000 * 140kb ~ 140mb
// UTF8ToUTF16: 520ms
// Buffer.toString: 510ms
// Convert utf16 to utf8 performance: 1000 * 125k length
// UTF16ToUTF8: 420ms
// Buffer.toString: 1000ms
function UTF8ToUTF16(arr) {
var arrBuff = new ArrayBuffer(arr.length * 2);
var out = new Uint16Array(arrBuff);
var pos = 0;
var c = 0;
while (pos < arr.length) {
var c1 = arr[pos++];
if (c1 < 128) {
out[c] = c1;
c++;
}
else if (c1 > 191 && c1 < 224) {
var c2 = arr[pos++];
out[c] = (c1 & 31) << 6 | c2 & 63;
c++;
}
else if (c1 > 239 && c1 < 365) {
// Surrogate Pair
var c2 = arr[pos++];
var c3 = arr[pos++];
var c4 = arr[pos++];
var u = ((c1 & 7) << 18 | (c2 & 63) << 12 | (c3 & 63) << 6 | c4 & 63) - 0x10000;
out[c] = (0xD800 + (u >> 10));
c++;
out[c] = (0xDC00 + (u & 1023));
c++;
}
else {
var c2 = arr[pos++];
var c3 = arr[pos++];
out[c] = (c1 & 15) << 12 | (c2 & 63) << 6 | c3 & 63;
c++;
}
}
return new Uint16Array(arrBuff, 0, c);
};
function UTF16ToUTF8(arr) {
var arrBuff = new ArrayBuffer(arr.length * 2);
var out = new Uint8Array(arrBuff);
var p = 0;
for (var i = 0; i < arr.length; i++) {
var c = arr[i];
if (c < 128) {
out[p++] = c;
} else if (c < 2048) {
out[p++] = (c >> 6) | 192;
out[p++] = (c & 63) | 128;
} else if (((c & 0xFC00) == 0xD800) && ((arr[i + 1] & 0xFC00) == 0xDC00)) {
// Surrogate Pair
c = 0x10000 + ((c & 0x03FF) << 10) + (arr[++i] & 0x03FF);
out[p++] = (c >> 18) | 240;
out[p++] = ((c >> 12) & 63) | 128;
out[p++] = ((c >> 6) & 63) | 128;
out[p++] = (c & 63) | 128;
} else {
out[p++] = (c >> 12) | 224;
out[p++] = ((c >> 6) & 63) | 128;
out[p++] = (c & 63) | 128;
}
}
return new Uint8Array(arrBuff, 0, p);
};
var fs = require('fs');
// make hot
var hotBuff = new Buffer('abcdабсд𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡”№•—', 'utf-8');
for (var i = 0; i < 500; i++) {
var res = UTF8ToUTF16(hotBuff);
var res2 = UTF16ToUTF8(res);
}
var buff = fs.readFileSync('./test.html');
var buffStr = buff.toString();
var utf16Buff = UTF8ToUTF16(buff);
var utf8Buff = UTF16ToUTF8(utf16Buff);
// console.log(buff.length == utf8Buff.length, buffStr.length == utf16Buff.length);
console.time('perf');
for (var i = 0; i < 1000; i++) {
// var res = utf8ToUTF16(buff);
// var res12 = UTF8ToUTF16(buff);
// var res23 = buff.toString();
// new Buffer(buffStr);
var res10 = UTF16ToUTF8(utf16Buff);
// new Buffer(res10.buffer);
}
console.timeEnd('perf');
// String.fromCharCode.apply(null, res);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment