Skip to content

Instantly share code, notes, and snippets.

@nuintun
Last active April 22, 2020 15:11
Show Gist options
  • Select an option

  • Save nuintun/874c13a9713660390aef1bc71b445ef9 to your computer and use it in GitHub Desktop.

Select an option

Save nuintun/874c13a9713660390aef1bc71b445ef9 to your computer and use it in GitHub Desktop.
UTF8 编码速度测试
// https://jsperf.com/utf8-encoder-decoder
var encoder = new TextEncoder();
var decoder = new TextDecoder();
// https://developer.mozilla.org/zh-CN/docs/Web/API/TextEncoder
var native = {
encode: encoder.encode.bind(encoder),
decode: decoder.decode.bind(decoder)
};
// https://github.com/google/closure-library/blob/master/closure/goog/crypt/crypt.js
var google = {
encode: function (text) {
var pos = 0;
var bytes = [];
var length = text.length;
for (var i = 0; i < length; i++) {
var code = text.charCodeAt(i);
if (code < 128) {
bytes[pos++] = code;
} else if (code < 2048) {
bytes[pos++] = (code >> 6) | 192;
bytes[pos++] = (code & 63) | 128;
} else if ((code & 0xfc00) === 0xd800 && i + 1 < length && (text.charCodeAt(i + 1) & 0xfc00) === 0xdc00) {
code = 0x10000 + ((code & 0x03ff) << 10) + (text.charCodeAt(++i) & 0x03ff);
bytes[pos++] = (code >> 18) | 240;
bytes[pos++] = ((code >> 12) & 63) | 128;
bytes[pos++] = ((code >> 6) & 63) | 128;
bytes[pos++] = (code & 63) | 128;
} else {
bytes[pos++] = (code >> 12) | 224;
bytes[pos++] = ((code >> 6) & 63) | 128;
bytes[pos++] = (code & 63) | 128;
}
}
return new Uint8Array(bytes);
},
decode: function (bytes) {
var pos = 0;
var output = '';
var length = bytes.length;
while (pos < length) {
var c1 = bytes[pos++];
if (c1 < 128) {
output += String.fromCharCode(c1);
} else if (c1 > 191 && c1 < 224) {
var c2 = bytes[pos++];
output += String.fromCharCode(((c1 & 31) << 6) | (c2 & 63));
} else if (c1 > 239 && c1 < 365) {
var c2 = bytes[pos++];
var c3 = bytes[pos++];
var c4 = bytes[pos++];
var u = (((c1 & 7) << 18) | ((c2 & 63) << 12) | ((c3 & 63) << 6) | (c4 & 63)) - 0x10000;
output += String.fromCharCode(0xd800 + (u >> 10));
output += String.fromCharCode(0xdc00 + (u & 1023));
} else {
var c2 = bytes[pos++];
var c3 = bytes[pos++];
output += String.fromCharCode(((c1 & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));
}
}
return output;
}
};
// https://github.com/google/flatbuffers/blob/master/js/flatbuffers.js
var fatbuffer = {
encode: function (s) {
var i = 0;
var utf8 = [];
while (i < s.length) {
var codePoint;
var a = s.charCodeAt(i++);
if (a < 0xd800 || a >= 0xdc00) {
codePoint = a;
} else {
var b = s.charCodeAt(i++);
codePoint = (a << 10) + b + (0x10000 - (0xd800 << 10) - 0xdc00);
}
if (codePoint < 0x80) {
utf8.push(codePoint);
} else {
if (codePoint < 0x800) {
utf8.push(((codePoint >> 6) & 0x1f) | 0xc0);
} else {
if (codePoint < 0x10000) {
utf8.push(((codePoint >> 12) & 0x0f) | 0xe0);
} else {
utf8.push(((codePoint >> 18) & 0x07) | 0xf0, ((codePoint >> 12) & 0x3f) | 0x80);
}
utf8.push(((codePoint >> 6) & 0x3f) | 0x80);
}
utf8.push((codePoint & 0x3f) | 0x80);
}
}
return new Uint8Array(utf8);
},
decode: function (bytes) {
var i = 0;
var result = '';
var length = bytes.length;
while (i < length) {
var codePoint;
var a = bytes[i++];
if (a < 0xc0) {
codePoint = a;
} else {
var b = bytes[i++];
if (a < 0xe0) {
codePoint = ((a & 0x1f) << 6) | (b & 0x3f);
} else {
var c = bytes[i++];
if (a < 0xf0) {
codePoint = ((a & 0x0f) << 12) | ((b & 0x3f) << 6) | (c & 0x3f);
} else {
var d = bytes[i++];
codePoint = ((a & 0x07) << 18) | ((b & 0x3f) << 12) | ((c & 0x3f) << 6) | (d & 0x3f);
}
}
}
if (codePoint < 0x10000) {
result += String.fromCharCode(codePoint);
} else {
codePoint -= 0x10000;
result += String.fromCharCode((codePoint >> 10) + 0xd800, (codePoint & ((1 << 10) - 1)) + 0xdc00);
}
}
return result;
}
};
// https://github.com/eidosam/TextEncoder-TextDecoder-Polyfill/blob/master/index.js
var polyfill = {
encode: function (string) {
if (!string) return [];
var i = 0;
var octets = [];
var length = string.length;
while (i < length) {
var c = 0;
var bits = 0;
var codePoint = string.codePointAt(i);
if (codePoint <= 0x0000007f) {
c = 0;
bits = 0x00;
} else if (codePoint <= 0x000007ff) {
c = 6;
bits = 0xc0;
} else if (codePoint <= 0x0000ffff) {
c = 12;
bits = 0xe0;
} else if (codePoint <= 0x001fffff) {
c = 18;
bits = 0xf0;
}
octets.push(bits | (codePoint >> c));
c -= 6;
while (c >= 0) {
octets.push(0x80 | ((codePoint >> c) & 0x3f));
c -= 6;
}
i += codePoint >= 0x10000 ? 2 : 1;
}
return new Uint8Array(octets);
},
decode: function (octets) {
if (!octets) return '';
var i = 0;
var string = '';
while (i < octets.length) {
var codePoint = 0;
var bytesNeeded = 0;
var octet = octets[i];
if (octet <= 0x7f) {
bytesNeeded = 0;
codePoint = octet & 0xff;
} else if (octet <= 0xdf) {
bytesNeeded = 1;
codePoint = octet & 0x1f;
} else if (octet <= 0xef) {
bytesNeeded = 2;
codePoint = octet & 0x0f;
} else if (octet <= 0xf4) {
bytesNeeded = 3;
codePoint = octet & 0x07;
}
if (octets.length - i - bytesNeeded > 0) {
var k = 0;
while (k < bytesNeeded) {
octet = octets[i + k + 1];
codePoint = (codePoint << 6) | (octet & 0x3f);
k += 1;
}
} else {
codePoint = 0xfffd;
bytesNeeded = octets.length - i;
}
string += String.fromCodePoint(codePoint);
i += bytesNeeded + 1;
}
return string;
}
};
var text = '武汉加油,中国加油。';
var bytes = new Uint8Array([
230,
173,
166,
230,
177,
137,
229,
138,
160,
230,
178,
185,
239,
188,
140,
228,
184,
173,
229,
155,
189,
229,
138,
160,
230,
178,
185,
227,
128,
130
]);
console.log(native.encode(text));
console.log(native.decode(bytes));
console.log(google.encode(text));
console.log(google.decode(bytes));
console.log(fatbuffer.encode(text));
console.log(fatbuffer.decode(bytes));
console.log(polyfill.encode(text));
console.log(polyfill.decode(bytes));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment