Created
May 11, 2022 21:03
-
-
Save wjandrea/2ad5710bb2fd657739fcd585e0287d46 to your computer and use it in GitHub Desktop.
The code for https://puzzling.stackexchange.com/q/116097/63368
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Use Unicode flag emoji homoglyphs to re-encode UTF-8. | |
To encode, a string is first encoded as UTF-8, then each byte is | |
broken down into bits, and each bit is encoded as a flag: | |
The more well-known country flag in each pair represents 0, and the | |
less well-known one represents 1. Pairs are cycled through. | |
There are 8 pairs total, which is the most I could find without repeats. | |
(Norway has two homoglyphs, so I only included one.) | |
Note that if any of these countries change their flags in the future, | |
this'll stop working, because the glyphs aren't specified in Unicode. | |
It's implementation-dependent what each REGIONAL INDICATOR SYMBOL | |
sequence represents and what it looks like. | |
Created for Puzzling Stack Exchange and posted there: | |
https://puzzling.stackexchange.com/q/116097/63368 | |
""" | |
from itertools import cycle as _cycle | |
from typing import ( | |
Dict as _Dict, | |
Iterable as _Iterable, | |
List as _List, | |
Tuple as _Tuple, | |
) | |
def _convert_country_code_to_ris(country_code: str) -> str: | |
r""" | |
Convert ASCII uppercase letters to REGIONAL INDICATOR SYMBOLs. | |
>>> _convert_country_code_to_ris('CA') | |
'\U0001f1e8\U0001f1e6' | |
""" | |
assert all('A' <= c <= 'Z' for c in country_code) | |
shift: int = ord('\N{REGIONAL INDICATOR SYMBOL LETTER A}') - ord('A') | |
return ''.join(chr(ord(c)+shift) for c in country_code) | |
_COUNTRY_CODES: _List[_Tuple[str, str]] = [ | |
('RO', 'TD'), # Romania and Chad | |
('ID', 'MC'), # Indonesia and Monaco - Not identical, but close enough | |
('US', 'UM'), # USA and USA Minor Outlying Islands | |
('NO', 'SJ'), # Norway and Svalbard & Jan Mayen | |
('FR', 'MF'), # France and St. Martin | |
('AU', 'HM'), # Australia and Heard & McDonald Islands | |
('ES', 'EA'), # Spain and Ceuta & Melilla | |
('FR', 'CP'), # France and Clipperton Island | |
# ('NO', 'BV'), # Norway and Bouvet Island | |
] | |
FLAG_PAIRS: _List[_Tuple[str, str]] = [ | |
(_convert_country_code_to_ris(t0), _convert_country_code_to_ris(t1)) | |
for t0, t1 in _COUNTRY_CODES] | |
def encode(message: str) -> _Iterable[str]: | |
""" | |
Encode message to UTF-8 then flag homoglyphs. | |
Yield each encoded byte as a string. | |
>>> list(encode('ba')) | |
['๐ท๐ด๐น๐ฉ๐น๐ฉ๐ท๐ด๐ท๐ด๐ท๐ด๐น๐ฉ๐ท๐ด', '๐ฎ๐ฉ๐ฒ๐จ๐ฒ๐จ๐ฎ๐ฉ๐ฎ๐ฉ๐ฎ๐ฉ๐ฎ๐ฉ๐ฒ๐จ'] | |
""" | |
bytes_: bytes = message.encode('utf-8') | |
for int_, (flag0, flag1) in zip(bytes_, _cycle(FLAG_PAIRS)): | |
flag_encoding: _Dict[str, str] = {'0': flag0, '1': flag1} | |
trans: _Dict[int, str] = str.maketrans(flag_encoding) | |
flag_byte = f'{int_:08b}'.translate(trans) | |
yield flag_byte | |
def decode(flag_encoded: _Iterable[str]) -> str: | |
""" | |
Decode message from flag homoglyphs then UTF-8. | |
I.e. do the opposite of `encode()`. | |
>>> decode(encode('ba')) | |
'ba' | |
""" | |
message_ints: _List[int] = [] | |
for flag_byte, (flag0, flag1) in zip(flag_encoded, _cycle(FLAG_PAIRS)): | |
flag_decoding: _Dict[str, str] = {flag0: '0', flag1: '1'} | |
binary_repr: str = flag_byte | |
for flag, bit in flag_decoding.items(): | |
binary_repr = binary_repr.replace(flag, bit) | |
if binary_repr == flag_byte: | |
# Nothing was replaced | |
raise ValueError(f"Could not decode: {flag_byte!r}") | |
int_: int = int(binary_repr, 2) | |
message_ints.append(int_) | |
message: str = bytes(message_ints).decode('utf-8') | |
return message | |
def main() -> None: | |
"""Make question content and print.""" | |
# Make body | |
body_message = 'You have solved this puzzle!' | |
body = list(encode(body_message)) | |
print(*body, sep='\n') | |
assert body_message == decode(body) | |
print() | |
# Make title - all Romania/Chad, which requires some special encode/decode | |
title_message = 'TD' | |
title = list(next(iter(encode(c))) for c in title_message) | |
print(*title) | |
assert title_message == ''.join(decode([c]) for c in title) | |
if __name__ == '__main__': | |
main() |
BTW, if this seems over-engineered, it's cause I'm practicing writing rigorous Python (typing, assertions, documentation, etc).
Please don't justify yourself, thank you for this funny encoder ;)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
BTW, if this seems over-engineered, it's cause I'm practicing writing rigorous Python (typing, assertions, documentation, etc).