Last active
January 14, 2023 20:38
-
-
Save frangio/f5b3f86e4150e9d2bab49804bc03fb5b to your computer and use it in GitHub Desktop.
Fastest way I found to compute the UTF-8 length of a UTF-16 string.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// larger buffer => faster | |
const BUFFER_SIZE = 1024; | |
let buffer; | |
/** | |
* @param {string} text | |
* @returns {number} | |
*/ | |
function utf8Length(text) { | |
let length = 0; | |
let read = 0; | |
let enc = new TextEncoder(); | |
buffer ??= new Uint8Array(BUFFER_SIZE); // TODO: use a WeakRef so this can be gc'd | |
while (read < text.length) { | |
const p = enc.encodeInto(text.slice(read), buffer); | |
read += p.read; | |
length += p.written; | |
} | |
return length; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment