Last active
February 12, 2025 21:01
-
-
Save felipecsl/f0c01de20a147ac91d149bdb77b31fa3 to your computer and use it in GitHub Desktop.
Smuggling/fingerprinting text with Unicode variation selectors
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Source: https://paulbutler.org/2025/smuggling-arbitrary-data-through-an-emoji/ | |
function byteToVariationSelector(byte) { | |
if (byte < 16) { | |
return String.fromCodePoint(0xFE00 + byte); | |
} else { | |
return String.fromCodePoint(0xE0100 + (byte - 16)); | |
} | |
} | |
function encode(base, str) { | |
let utf8Encode = new TextEncoder(); | |
let bytes = utf8Encode.encode(str); | |
let result = base; | |
for (let byte of bytes) { | |
result += byteToVariationSelector(byte); | |
} | |
return result; | |
} | |
function variationSelectorToByte(variationSelector) { | |
const codePoint = variationSelector.codePointAt(0); | |
if (codePoint >= 0xFE00 && codePoint <= 0xFE0F) { | |
return codePoint - 0xFE00; | |
} else if (codePoint >= 0xE0100 && codePoint <= 0xE01EF) { | |
return codePoint - 0xE0100 + 16; | |
} else { | |
return null; | |
} | |
} | |
function decode(variationSelectors) { | |
let result = []; | |
for (let char of variationSelectors) { | |
let byte = variationSelectorToByte(char); | |
if (byte !== null) { | |
result.push(byte); | |
} else if (result.length > 0) { | |
return String.fromCharCode(...result); | |
} | |
// Ignore non-variation selectors until we have encountered the first one. | |
} | |
return String.fromCharCode(...result); | |
} | |
encode('😊', 'hello world'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment