Last active
May 24, 2024 03:30
-
-
Save rhom6us/9a8d972c88be85f05c51b56869f34149 to your computer and use it in GitHub Desktop.
pack uint8 to utf8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function () { | |
const UTF8_SIGNIFICANT_BITS_SIZE = 7; | |
const BITS_IN_BYTE = 8; | |
globalThis.setChar = function (char, excludeCharacters) { | |
if (typeof excludeCharacters === 'string') { | |
return setChar(char, Array.prototype.map.call(excludeCharacters, c => c.charCodeAt(0))); | |
} | |
if (typeof char === 'string') { | |
if (char.length !== 1) { | |
throw new RangeError(); | |
} | |
return setChar(char.charCodeAt(0), excludeCharacters); | |
} | |
if (Array.isArray(excludeCharacters)) { | |
return setChar(char, new Set(excludeCharacters)); | |
} | |
if (!(typeof char === 'number')) { | |
throw new TypeError(); | |
} | |
if (!(excludeCharacters instanceof Set)) { | |
throw new TypeError(); | |
} | |
return excludeCharacters.has(char) ? 0x8000 | char : char; | |
} | |
Uint8Array.prototype.toUtf8 = function (excludeCharacters = new Set()) { | |
const result = new Uint16Array(this.length + Math.trunc((this.length / UTF8_SIGNIFICANT_BITS_SIZE)) + 1) | |
let residue = 0 | |
let residueIndex = 0 | |
let charIndex = 0 | |
for (const byte of this) { | |
residueIndex += 1 | |
result[charIndex++] = setChar(residue | (byte >>> residueIndex), excludeCharacters) | |
residue = byte << (UTF8_SIGNIFICANT_BITS_SIZE - residueIndex) & 0x7f; | |
if (residueIndex >= UTF8_SIGNIFICANT_BITS_SIZE) { | |
result[charIndex++] = setChar(residue, excludeCharacters) | |
residueIndex = 0 | |
residue = 0 | |
} | |
} | |
if (residueIndex > 0) { | |
result[charIndex] = setChar(residue, excludeCharacters) | |
} | |
return String.fromCharCode(...result); | |
} | |
String.prototype.unpackUint8Array = function () { | |
return Array.prototype.map.call(this, p => p.charCodeAt(0)).reduce((p, charCode) => { | |
p.bitsIndex += UTF8_SIGNIFICANT_BITS_SIZE; | |
p.bits <<= UTF8_SIGNIFICANT_BITS_SIZE; | |
p.bits |= charCode & 0x7f; | |
if (p.bitsIndex >= BITS_IN_BYTE) { | |
p.bitsIndex -= BITS_IN_BYTE; | |
p.result[p.byteIndex] = p.bits >>> p.bitsIndex; | |
p.bits ^= p.result[p.byteIndex++] << p.bitsIndex | |
} | |
return p; | |
}, { bits: 0, byteIndex: 0, bitsIndex: 0, result: new Uint8Array(this.length - Math.trunc((this.length / BITS_IN_BYTE)) - 1) }) | |
.result; | |
} | |
return (function tests() { | |
function getBytes(str) { | |
return new Uint8Array(Array.prototype.map.call(str, p => p.charCodeAt(0))); | |
} | |
const str = "asdf\nqwerty"; | |
console.assert(str === String.fromCharCode(...getBytes(str).toUtf8('\n').unpackUint8Array())); | |
}()); | |
}()); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment