Created
March 17, 2025 22:08
-
-
Save Wind010/cda9b416bac81e60dc2c88dd96547923 to your computer and use it in GitHub Desktop.
Encoding and decoding text using Unicode variation selectors that can be used for smuggling messages.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Based off: https://paulbutler.org/2025/smuggling-arbitrary-data-through-an-emoji/ | |
import argparse | |
ENCODING = 'utf-8' | |
HEX_RANGE = 16 | |
CODE_POINT_RANGE = 256 # Variation selectors | |
# Variation selectors block https://unicode.org/charts/nameslist/n_FE00.html | |
# VS1..=VS16 | |
VARIATION_SELECTOR_START = 0xFE00 | |
VARIATION_SELECTOR_END = 0xFE0F | |
# Variation selectors supplement https://unicode.org/charts/nameslist/n_E0100.html | |
# VS17..=VS256 | |
VARIATION_SELECTOR_SUPPLEMENT_START = 0xE0100 | |
VARIATION_SELECTOR_SUPPLEMENT_END = 0xE01EF | |
def to_variation_selector(byte: int) -> str: | |
if 0 <= byte < HEX_RANGE: | |
return chr(VARIATION_SELECTOR_START + byte) | |
elif HEX_RANGE <= byte < CODE_POINT_RANGE: | |
return chr(VARIATION_SELECTOR_SUPPLEMENT_START + byte - HEX_RANGE) | |
else: | |
return None | |
def from_variation_selector(code_point: int) -> int: | |
if VARIATION_SELECTOR_START <= code_point <= VARIATION_SELECTOR_END: | |
return code_point - VARIATION_SELECTOR_START | |
elif VARIATION_SELECTOR_SUPPLEMENT_START <= code_point <= VARIATION_SELECTOR_SUPPLEMENT_END: | |
return code_point - VARIATION_SELECTOR_SUPPLEMENT_START + HEX_RANGE | |
else: | |
return None | |
def encode(base_char: str, text: str) -> str: | |
bytes = text.encode(ENCODING) | |
encoded_text = base_char | |
for byte in bytes: | |
selector = to_variation_selector(byte) | |
if selector is not None: | |
encoded_text += selector | |
else: | |
raise ValueError(f"Byte value {byte} out of range for variation selector") | |
return encoded_text | |
def decode(encoded_text: str) -> str: | |
decoded_bytes = [] | |
chars = list(encoded_text) | |
for char in chars: | |
code_point = ord(char) | |
byte = from_variation_selector(code_point) | |
if byte is not None: | |
decoded_bytes.append(byte) | |
decoded_array = bytes(decoded_bytes) | |
return decoded_array.decode('utf-8') | |
def main(): | |
parser = argparse.ArgumentParser(description="Encode or decode variation selectors.") | |
group = parser.add_mutually_exclusive_group(required=True) | |
group.add_argument("--encode", action="store_true", help="Encode the input text") | |
group.add_argument("--decode", action="store_true", help="Decode the input variation selectors") | |
parser.add_argument("--input", help="Input string or encoded string for decoding") | |
parser.add_argument("--base", help="Base character for encoding (required for --encode action)", default='😏') | |
args = parser.parse_args() | |
if args.encode: | |
if args.base is None: | |
parser.error("--base is required for --encode action.") | |
encoded_string = encode(args.base, text = args.input) | |
print(encoded_string) | |
elif args.decode: | |
decoded_text = decode(args.input) | |
print(decoded_text) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment