Last active
September 5, 2025 21:23
-
-
Save mgaitan/53c3b2988b7e6e7a7c2215e0bee8138b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Nothing to see here | |
But, damn, it sure is a catchy rhythm. | |
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# zw_secret.py | |
# Hide/reveal messages using zero-width characters (ZWSP/ZWNJ). | |
# Encoding: bit 0 -> U+200B (ZWSP), bit 1 -> U+200C (ZWNJ) | |
import argparse | |
import concurrent.futures as cf | |
import glob | |
import re | |
from dataclasses import dataclass | |
from pathlib import Path | |
from typing import Iterable, Iterator, Sequence | |
ZW0 = "\u200b" # Zero Width Space -> bit 0 | |
ZW1 = "\u200c" # Zero Width Non-Joiner -> bit 1 | |
# Regex to find runs of zero-width chars quickly | |
ZW_RUN_RE = re.compile(f"[{re.escape(ZW0 + ZW1)}]+") | |
# ---------------- core encode/decode ---------------- | |
def encode_text_to_zw(s: str) -> str: | |
"""Encode UTF-8 string to zero-width bitstream (ZWSP=0, ZWNJ=1).""" | |
b = s.encode("utf-8") | |
bits = "".join(f"{byte:08b}" for byte in b) | |
return "".join(ZW0 if bit == "0" else ZW1 for bit in bits) | |
def decode_bits(bits: str) -> str: | |
"""Decode '0'/'1' bits to UTF-8 text (truncate to whole bytes).""" | |
n = len(bits) - (len(bits) % 8) | |
if n <= 0: | |
return "" | |
data = bytes(int(bits[i:i+8], 2) for i in range(0, n, 8)) | |
return data.decode("utf-8", errors="ignore") | |
def decode_zw_to_text(s: str) -> str: | |
"""Decode any ZWSP/ZWNJ in s back into text.""" | |
if not s: | |
return "" | |
bits = "".join("0" if ch == ZW0 else "1" if ch == ZW1 else "" for ch in s) | |
if not bits: | |
return "" | |
return decode_bits(bits) | |
# ---------------- file IO helpers ---------------- | |
def append_payload_to_file(path: Path, payload: str) -> None: | |
"""Append payload (invisible chars) to end of file.""" | |
text = path.read_text(encoding="utf-8", errors="ignore") | |
text += payload | |
path.write_text(text, encoding="utf-8") | |
def inject_into_marker(path: Path, payload: str, marker: str) -> bool: | |
""" | |
Append payload to the first line equal to 'marker' (whitespace-insensitive). | |
Return True if marker found, else False. | |
""" | |
lines = path.read_text(encoding="utf-8", errors="ignore").splitlines() | |
for i, line in enumerate(lines): | |
if line.strip() == marker.strip(): | |
lines[i] = line + payload | |
path.write_text("\n".join(lines) + "\n", encoding="utf-8") | |
return True | |
return False | |
# ---------------- fast decode per file ---------------- | |
@dataclass | |
class Found: | |
path: Path | |
line: int | |
message: str | |
def decode_file_fast(path: Path) -> list[Found]: | |
""" | |
Scan entire file once with regex to find ZW runs. | |
Compute line number for each match efficiently. | |
""" | |
try: | |
text = path.read_text(encoding="utf-8", errors="ignore") | |
except Exception: | |
return [] | |
if not text: | |
return [] | |
found: list[Found] = [] | |
# Precompute newline indices for quick line lookup | |
nl_positions = [m.start() for m in re.finditer("\n", text)] | |
def pos_to_line(pos: int) -> int: | |
import bisect | |
return bisect.bisect_right(nl_positions, pos) + 1 # 1-based | |
for m in ZW_RUN_RE.finditer(text): | |
payload = m.group(0) | |
msg = decode_zw_to_text(payload) | |
if msg: | |
line_no = pos_to_line(m.start()) | |
found.append(Found(path=path, line=line_no, message=msg)) | |
return found | |
# ---------------- glob expansion ---------------- | |
def expand_globs( | |
patterns: Sequence[str], | |
recursive: bool, | |
excludes: Sequence[str] | None = None, | |
) -> list[Path]: | |
""" | |
Expand glob patterns into a unique, ordered list of files. | |
- If a pattern points to a directory: | |
- recursive=False -> dir/* (non-recursive) | |
- recursive=True -> dir/**/* (recursive) | |
- Otherwise, use glob() as provided by the pattern. | |
""" | |
results: list[Path] = [] | |
def add_matches(pat: str) -> None: | |
for s in glob.glob(pat, recursive=recursive): | |
p = Path(s) | |
if p.is_file(): | |
results.append(p) | |
for pat in patterns: | |
p = Path(pat) | |
if p.exists() and p.is_dir() and ("*" not in pat and "?" not in pat and "[" not in pat): | |
# Directory without explicit glob: expand to files inside | |
if recursive: | |
add_matches(str(p / "**/*")) | |
else: | |
add_matches(str(p / "*")) | |
else: | |
add_matches(pat) | |
# Apply excludes (patterns evaluated recursively for simplicity) | |
if excludes: | |
excluded: set[str] = set() | |
for ex in excludes: | |
for s in glob.glob(ex, recursive=True): | |
excluded.add(str(Path(s).resolve())) | |
uniq: list[Path] = [] | |
seen: set[str] = set() | |
for f in results: | |
rp = str(f.resolve()) | |
if rp in excluded: | |
continue | |
if rp not in seen: | |
uniq.append(f) | |
seen.add(rp) | |
return uniq | |
# De-dup while preserving order | |
uniq: list[Path] = [] | |
seen: set[str] = set() | |
for f in results: | |
rp = str(f.resolve()) | |
if rp in seen: | |
continue | |
uniq.append(f) | |
seen.add(rp) | |
return uniq | |
# ---------------- CLI commands ---------------- | |
def cmd_inject(args: argparse.Namespace) -> int: | |
payload = encode_text_to_zw(args.message) | |
if args.files: | |
for f in args.files: | |
p = Path(f) | |
if args.marker: | |
ok = inject_into_marker(p, payload, args.marker) | |
if not ok: | |
append_payload_to_file(p, payload) | |
else: | |
append_payload_to_file(p, payload) | |
if args.verbose: | |
print(f"[injected] {p}") | |
else: | |
# No files → print payload to stdout (invisible) | |
print(payload, end="") | |
return 0 | |
def cmd_decode(args: argparse.Namespace) -> int: | |
targets = expand_globs(args.inputs, recursive=args.recursive, excludes=args.exclude or []) | |
if not targets: | |
return 1 | |
exit_code = 1 | |
if args.jobs and args.jobs > 1: | |
with cf.ProcessPoolExecutor(max_workers=args.jobs) as pool: | |
for results in pool.map(decode_file_fast, targets, chunksize=16): | |
for item in results: | |
print(f"{item.path}:{item.line}: {item.message}") | |
exit_code = 0 | |
else: | |
for p in targets: | |
for item in decode_file_fast(p): | |
print(f"{p}:{item.line}: {item.message}") | |
exit_code = 0 | |
return exit_code | |
def build_parser() -> argparse.ArgumentParser: | |
p = argparse.ArgumentParser( | |
prog="zw_secret", | |
description="Hide/reveal messages using zero-width characters (ZWSP/ZWNJ).", | |
) | |
sub = p.add_subparsers(dest="cmd", required=True) | |
# inject | |
pi = sub.add_parser("inject", help="Inject a message into files or stdout.") | |
pi.add_argument("-m", "--message", required=True, help="Message to hide (UTF-8).") | |
pi.add_argument("files", nargs="*", help="Target files (optional).") | |
pi.add_argument("--marker", help="Inject at a line equal to this marker if present.") | |
pi.add_argument("-v", "--verbose", action="store_true", help="Verbose output.") | |
pi.set_defaults(func=cmd_inject) | |
# decode | |
pd = sub.add_parser( | |
"decode", | |
help="Reveal hidden messages from glob patterns and/or directories.", | |
) | |
pd.add_argument( | |
"inputs", | |
nargs="+", | |
help=( | |
"Glob patterns and/or directories. Examples:\n" | |
" '*.py' # current dir, not recursive\n" | |
" src/*.py # only src/ top-level .py\n" | |
" 'src/**/*.py' # recursive with ** (also pass --recursive)\n" | |
" src docs/*.md # mix patterns and dirs" | |
), | |
) | |
pd.add_argument( | |
"--recursive", | |
action="store_true", | |
help="Enable recursive globbing (needed for patterns with **).", | |
) | |
pd.add_argument( | |
"--exclude", | |
nargs="*", | |
help="Exclude patterns (can be given multiple); evaluated recursively.", | |
) | |
pd.add_argument( | |
"-j", | |
"--jobs", | |
type=int, | |
default=1, | |
help="Parallel jobs (processes) for decoding; default: 1", | |
) | |
pd.set_defaults(func=cmd_decode) | |
return p | |
def main() -> int: | |
parser = build_parser() | |
args = parser.parse_args() | |
return args.func(args) | |
if __name__ == "__main__": | |
raise SystemExit(main()) |
Author
mgaitan
commented
Sep 5, 2025

Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment