Skip to content

Instantly share code, notes, and snippets.

@rhom6us
Last active May 24, 2024 03:30
Show Gist options
  • Save rhom6us/9a8d972c88be85f05c51b56869f34149 to your computer and use it in GitHub Desktop.
Save rhom6us/9a8d972c88be85f05c51b56869f34149 to your computer and use it in GitHub Desktop.
pack uint8 to utf8
(function () {
const UTF8_SIGNIFICANT_BITS_SIZE = 7;
const BITS_IN_BYTE = 8;
globalThis.setChar = function (char, excludeCharacters) {
if (typeof excludeCharacters === 'string') {
return setChar(char, Array.prototype.map.call(excludeCharacters, c => c.charCodeAt(0)));
}
if (typeof char === 'string') {
if (char.length !== 1) {
throw new RangeError();
}
return setChar(char.charCodeAt(0), excludeCharacters);
}
if (Array.isArray(excludeCharacters)) {
return setChar(char, new Set(excludeCharacters));
}
if (!(typeof char === 'number')) {
throw new TypeError();
}
if (!(excludeCharacters instanceof Set)) {
throw new TypeError();
}
return excludeCharacters.has(char) ? 0x8000 | char : char;
}
Uint8Array.prototype.toUtf8 = function (excludeCharacters = new Set()) {
const result = new Uint16Array(this.length + Math.trunc((this.length / UTF8_SIGNIFICANT_BITS_SIZE)) + 1)
let residue = 0
let residueIndex = 0
let charIndex = 0
for (const byte of this) {
residueIndex += 1
result[charIndex++] = setChar(residue | (byte >>> residueIndex), excludeCharacters)
residue = byte << (UTF8_SIGNIFICANT_BITS_SIZE - residueIndex) & 0x7f;
if (residueIndex >= UTF8_SIGNIFICANT_BITS_SIZE) {
result[charIndex++] = setChar(residue, excludeCharacters)
residueIndex = 0
residue = 0
}
}
if (residueIndex > 0) {
result[charIndex] = setChar(residue, excludeCharacters)
}
return String.fromCharCode(...result);
}
String.prototype.unpackUint8Array = function () {
return Array.prototype.map.call(this, p => p.charCodeAt(0)).reduce((p, charCode) => {
p.bitsIndex += UTF8_SIGNIFICANT_BITS_SIZE;
p.bits <<= UTF8_SIGNIFICANT_BITS_SIZE;
p.bits |= charCode & 0x7f;
if (p.bitsIndex >= BITS_IN_BYTE) {
p.bitsIndex -= BITS_IN_BYTE;
p.result[p.byteIndex] = p.bits >>> p.bitsIndex;
p.bits ^= p.result[p.byteIndex++] << p.bitsIndex
}
return p;
}, { bits: 0, byteIndex: 0, bitsIndex: 0, result: new Uint8Array(this.length - Math.trunc((this.length / BITS_IN_BYTE)) - 1) })
.result;
}
return (function tests() {
function getBytes(str) {
return new Uint8Array(Array.prototype.map.call(str, p => p.charCodeAt(0)));
}
const str = "asdf\nqwerty";
console.assert(str === String.fromCharCode(...getBytes(str).toUtf8('\n').unpackUint8Array()));
}());
}());
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment