Skip to content

Instantly share code, notes, and snippets.

@Wind010
Created March 17, 2025 22:08
Show Gist options
  • Save Wind010/cda9b416bac81e60dc2c88dd96547923 to your computer and use it in GitHub Desktop.
Save Wind010/cda9b416bac81e60dc2c88dd96547923 to your computer and use it in GitHub Desktop.
Encoding and decoding text using Unicode variation selectors that can be used for smuggling messages.
# Based off: https://paulbutler.org/2025/smuggling-arbitrary-data-through-an-emoji/
import argparse
ENCODING = 'utf-8'
HEX_RANGE = 16
CODE_POINT_RANGE = 256 # Variation selectors
# Variation selectors block https://unicode.org/charts/nameslist/n_FE00.html
# VS1..=VS16
VARIATION_SELECTOR_START = 0xFE00
VARIATION_SELECTOR_END = 0xFE0F
# Variation selectors supplement https://unicode.org/charts/nameslist/n_E0100.html
# VS17..=VS256
VARIATION_SELECTOR_SUPPLEMENT_START = 0xE0100
VARIATION_SELECTOR_SUPPLEMENT_END = 0xE01EF
def to_variation_selector(byte: int) -> str:
if 0 <= byte < HEX_RANGE:
return chr(VARIATION_SELECTOR_START + byte)
elif HEX_RANGE <= byte < CODE_POINT_RANGE:
return chr(VARIATION_SELECTOR_SUPPLEMENT_START + byte - HEX_RANGE)
else:
return None
def from_variation_selector(code_point: int) -> int:
if VARIATION_SELECTOR_START <= code_point <= VARIATION_SELECTOR_END:
return code_point - VARIATION_SELECTOR_START
elif VARIATION_SELECTOR_SUPPLEMENT_START <= code_point <= VARIATION_SELECTOR_SUPPLEMENT_END:
return code_point - VARIATION_SELECTOR_SUPPLEMENT_START + HEX_RANGE
else:
return None
def encode(base_char: str, text: str) -> str:
bytes = text.encode(ENCODING)
encoded_text = base_char
for byte in bytes:
selector = to_variation_selector(byte)
if selector is not None:
encoded_text += selector
else:
raise ValueError(f"Byte value {byte} out of range for variation selector")
return encoded_text
def decode(encoded_text: str) -> str:
decoded_bytes = []
chars = list(encoded_text)
for char in chars:
code_point = ord(char)
byte = from_variation_selector(code_point)
if byte is not None:
decoded_bytes.append(byte)
decoded_array = bytes(decoded_bytes)
return decoded_array.decode('utf-8')
def main():
parser = argparse.ArgumentParser(description="Encode or decode variation selectors.")
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument("--encode", action="store_true", help="Encode the input text")
group.add_argument("--decode", action="store_true", help="Decode the input variation selectors")
parser.add_argument("--input", help="Input string or encoded string for decoding")
parser.add_argument("--base", help="Base character for encoding (required for --encode action)", default='😏')
args = parser.parse_args()
if args.encode:
if args.base is None:
parser.error("--base is required for --encode action.")
encoded_string = encode(args.base, text = args.input)
print(encoded_string)
elif args.decode:
decoded_text = decode(args.input)
print(decoded_text)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment