Last active
April 22, 2020 15:11
-
-
Save nuintun/874c13a9713660390aef1bc71b445ef9 to your computer and use it in GitHub Desktop.
UTF8 编码速度测试
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // https://jsperf.com/utf8-encoder-decoder | |
| var encoder = new TextEncoder(); | |
| var decoder = new TextDecoder(); | |
| // https://developer.mozilla.org/zh-CN/docs/Web/API/TextEncoder | |
| var native = { | |
| encode: encoder.encode.bind(encoder), | |
| decode: decoder.decode.bind(decoder) | |
| }; | |
| // https://github.com/google/closure-library/blob/master/closure/goog/crypt/crypt.js | |
| var google = { | |
| encode: function (text) { | |
| var pos = 0; | |
| var bytes = []; | |
| var length = text.length; | |
| for (var i = 0; i < length; i++) { | |
| var code = text.charCodeAt(i); | |
| if (code < 128) { | |
| bytes[pos++] = code; | |
| } else if (code < 2048) { | |
| bytes[pos++] = (code >> 6) | 192; | |
| bytes[pos++] = (code & 63) | 128; | |
| } else if ((code & 0xfc00) === 0xd800 && i + 1 < length && (text.charCodeAt(i + 1) & 0xfc00) === 0xdc00) { | |
| code = 0x10000 + ((code & 0x03ff) << 10) + (text.charCodeAt(++i) & 0x03ff); | |
| bytes[pos++] = (code >> 18) | 240; | |
| bytes[pos++] = ((code >> 12) & 63) | 128; | |
| bytes[pos++] = ((code >> 6) & 63) | 128; | |
| bytes[pos++] = (code & 63) | 128; | |
| } else { | |
| bytes[pos++] = (code >> 12) | 224; | |
| bytes[pos++] = ((code >> 6) & 63) | 128; | |
| bytes[pos++] = (code & 63) | 128; | |
| } | |
| } | |
| return new Uint8Array(bytes); | |
| }, | |
| decode: function (bytes) { | |
| var pos = 0; | |
| var output = ''; | |
| var length = bytes.length; | |
| while (pos < length) { | |
| var c1 = bytes[pos++]; | |
| if (c1 < 128) { | |
| output += String.fromCharCode(c1); | |
| } else if (c1 > 191 && c1 < 224) { | |
| var c2 = bytes[pos++]; | |
| output += String.fromCharCode(((c1 & 31) << 6) | (c2 & 63)); | |
| } else if (c1 > 239 && c1 < 365) { | |
| var c2 = bytes[pos++]; | |
| var c3 = bytes[pos++]; | |
| var c4 = bytes[pos++]; | |
| var u = (((c1 & 7) << 18) | ((c2 & 63) << 12) | ((c3 & 63) << 6) | (c4 & 63)) - 0x10000; | |
| output += String.fromCharCode(0xd800 + (u >> 10)); | |
| output += String.fromCharCode(0xdc00 + (u & 1023)); | |
| } else { | |
| var c2 = bytes[pos++]; | |
| var c3 = bytes[pos++]; | |
| output += String.fromCharCode(((c1 & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63)); | |
| } | |
| } | |
| return output; | |
| } | |
| }; | |
| // https://github.com/google/flatbuffers/blob/master/js/flatbuffers.js | |
| var fatbuffer = { | |
| encode: function (s) { | |
| var i = 0; | |
| var utf8 = []; | |
| while (i < s.length) { | |
| var codePoint; | |
| var a = s.charCodeAt(i++); | |
| if (a < 0xd800 || a >= 0xdc00) { | |
| codePoint = a; | |
| } else { | |
| var b = s.charCodeAt(i++); | |
| codePoint = (a << 10) + b + (0x10000 - (0xd800 << 10) - 0xdc00); | |
| } | |
| if (codePoint < 0x80) { | |
| utf8.push(codePoint); | |
| } else { | |
| if (codePoint < 0x800) { | |
| utf8.push(((codePoint >> 6) & 0x1f) | 0xc0); | |
| } else { | |
| if (codePoint < 0x10000) { | |
| utf8.push(((codePoint >> 12) & 0x0f) | 0xe0); | |
| } else { | |
| utf8.push(((codePoint >> 18) & 0x07) | 0xf0, ((codePoint >> 12) & 0x3f) | 0x80); | |
| } | |
| utf8.push(((codePoint >> 6) & 0x3f) | 0x80); | |
| } | |
| utf8.push((codePoint & 0x3f) | 0x80); | |
| } | |
| } | |
| return new Uint8Array(utf8); | |
| }, | |
| decode: function (bytes) { | |
| var i = 0; | |
| var result = ''; | |
| var length = bytes.length; | |
| while (i < length) { | |
| var codePoint; | |
| var a = bytes[i++]; | |
| if (a < 0xc0) { | |
| codePoint = a; | |
| } else { | |
| var b = bytes[i++]; | |
| if (a < 0xe0) { | |
| codePoint = ((a & 0x1f) << 6) | (b & 0x3f); | |
| } else { | |
| var c = bytes[i++]; | |
| if (a < 0xf0) { | |
| codePoint = ((a & 0x0f) << 12) | ((b & 0x3f) << 6) | (c & 0x3f); | |
| } else { | |
| var d = bytes[i++]; | |
| codePoint = ((a & 0x07) << 18) | ((b & 0x3f) << 12) | ((c & 0x3f) << 6) | (d & 0x3f); | |
| } | |
| } | |
| } | |
| if (codePoint < 0x10000) { | |
| result += String.fromCharCode(codePoint); | |
| } else { | |
| codePoint -= 0x10000; | |
| result += String.fromCharCode((codePoint >> 10) + 0xd800, (codePoint & ((1 << 10) - 1)) + 0xdc00); | |
| } | |
| } | |
| return result; | |
| } | |
| }; | |
| // https://github.com/eidosam/TextEncoder-TextDecoder-Polyfill/blob/master/index.js | |
| var polyfill = { | |
| encode: function (string) { | |
| if (!string) return []; | |
| var i = 0; | |
| var octets = []; | |
| var length = string.length; | |
| while (i < length) { | |
| var c = 0; | |
| var bits = 0; | |
| var codePoint = string.codePointAt(i); | |
| if (codePoint <= 0x0000007f) { | |
| c = 0; | |
| bits = 0x00; | |
| } else if (codePoint <= 0x000007ff) { | |
| c = 6; | |
| bits = 0xc0; | |
| } else if (codePoint <= 0x0000ffff) { | |
| c = 12; | |
| bits = 0xe0; | |
| } else if (codePoint <= 0x001fffff) { | |
| c = 18; | |
| bits = 0xf0; | |
| } | |
| octets.push(bits | (codePoint >> c)); | |
| c -= 6; | |
| while (c >= 0) { | |
| octets.push(0x80 | ((codePoint >> c) & 0x3f)); | |
| c -= 6; | |
| } | |
| i += codePoint >= 0x10000 ? 2 : 1; | |
| } | |
| return new Uint8Array(octets); | |
| }, | |
| decode: function (octets) { | |
| if (!octets) return ''; | |
| var i = 0; | |
| var string = ''; | |
| while (i < octets.length) { | |
| var codePoint = 0; | |
| var bytesNeeded = 0; | |
| var octet = octets[i]; | |
| if (octet <= 0x7f) { | |
| bytesNeeded = 0; | |
| codePoint = octet & 0xff; | |
| } else if (octet <= 0xdf) { | |
| bytesNeeded = 1; | |
| codePoint = octet & 0x1f; | |
| } else if (octet <= 0xef) { | |
| bytesNeeded = 2; | |
| codePoint = octet & 0x0f; | |
| } else if (octet <= 0xf4) { | |
| bytesNeeded = 3; | |
| codePoint = octet & 0x07; | |
| } | |
| if (octets.length - i - bytesNeeded > 0) { | |
| var k = 0; | |
| while (k < bytesNeeded) { | |
| octet = octets[i + k + 1]; | |
| codePoint = (codePoint << 6) | (octet & 0x3f); | |
| k += 1; | |
| } | |
| } else { | |
| codePoint = 0xfffd; | |
| bytesNeeded = octets.length - i; | |
| } | |
| string += String.fromCodePoint(codePoint); | |
| i += bytesNeeded + 1; | |
| } | |
| return string; | |
| } | |
| }; | |
| var text = '武汉加油,中国加油。'; | |
| var bytes = new Uint8Array([ | |
| 230, | |
| 173, | |
| 166, | |
| 230, | |
| 177, | |
| 137, | |
| 229, | |
| 138, | |
| 160, | |
| 230, | |
| 178, | |
| 185, | |
| 239, | |
| 188, | |
| 140, | |
| 228, | |
| 184, | |
| 173, | |
| 229, | |
| 155, | |
| 189, | |
| 229, | |
| 138, | |
| 160, | |
| 230, | |
| 178, | |
| 185, | |
| 227, | |
| 128, | |
| 130 | |
| ]); | |
| console.log(native.encode(text)); | |
| console.log(native.decode(bytes)); | |
| console.log(google.encode(text)); | |
| console.log(google.decode(bytes)); | |
| console.log(fatbuffer.encode(text)); | |
| console.log(fatbuffer.decode(bytes)); | |
| console.log(polyfill.encode(text)); | |
| console.log(polyfill.decode(bytes)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment