Skip to content

Instantly share code, notes, and snippets.

@mgaitan
Last active September 5, 2025 21:23
Show Gist options
  • Save mgaitan/53c3b2988b7e6e7a7c2215e0bee8138b to your computer and use it in GitHub Desktop.
Save mgaitan/53c3b2988b7e6e7a7c2215e0bee8138b to your computer and use it in GitHub Desktop.
# Nothing to see here ​‌​​​​​‌​‌‌​‌‌​​​‌‌‌​‌​​​‌‌​​​​‌​​‌​​​​​​‌‌​​​‌‌​‌‌​‌‌‌‌​‌‌​‌​​‌​‌‌​‌‌​‌​‌‌​​‌​‌​‌‌‌​​‌​​‌‌​​​​‌​​‌​​​​​‌‌‌‌​​​​‌​​‌‌‌‌‌‌​​​‌‌‌​‌​‌‌​‌​‌​​‌​‌‌‌​​​‌​‌‌‌​​​‌​‌‌‌​
But, damn, it sure is a catchy rhythm.
​‌​​‌​‌‌​‌‌​​​​‌​‌‌‌​​‌​​‌‌​‌​​‌​‌‌​‌‌‌​​‌‌​​​​‌​​‌​​​​​​‌‌​​‌​‌​‌‌‌​​‌‌​​‌​​​​​​‌‌​​​​‌​‌‌​‌‌​​​‌‌‌​‌​​​‌‌​​​​‌​​‌​​​​​​‌‌​​​‌‌​‌‌​‌‌‌‌​‌‌​‌​​‌​‌‌​‌‌​‌​‌‌​​‌​‌​‌‌‌​​‌​​‌‌​​​​‌
#!/usr/bin/env python3
# zw_secret.py
# Hide/reveal messages using zero-width characters (ZWSP/ZWNJ).
# Encoding: bit 0 -> U+200B (ZWSP), bit 1 -> U+200C (ZWNJ)
import argparse
import concurrent.futures as cf
import glob
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable, Iterator, Sequence
ZW0 = "\u200b" # Zero Width Space -> bit 0
ZW1 = "\u200c" # Zero Width Non-Joiner -> bit 1
# Regex to find runs of zero-width chars quickly
ZW_RUN_RE = re.compile(f"[{re.escape(ZW0 + ZW1)}]+")
# ---------------- core encode/decode ----------------
def encode_text_to_zw(s: str) -> str:
"""Encode UTF-8 string to zero-width bitstream (ZWSP=0, ZWNJ=1)."""
b = s.encode("utf-8")
bits = "".join(f"{byte:08b}" for byte in b)
return "".join(ZW0 if bit == "0" else ZW1 for bit in bits)
def decode_bits(bits: str) -> str:
"""Decode '0'/'1' bits to UTF-8 text (truncate to whole bytes)."""
n = len(bits) - (len(bits) % 8)
if n <= 0:
return ""
data = bytes(int(bits[i:i+8], 2) for i in range(0, n, 8))
return data.decode("utf-8", errors="ignore")
def decode_zw_to_text(s: str) -> str:
"""Decode any ZWSP/ZWNJ in s back into text."""
if not s:
return ""
bits = "".join("0" if ch == ZW0 else "1" if ch == ZW1 else "" for ch in s)
if not bits:
return ""
return decode_bits(bits)
# ---------------- file IO helpers ----------------
def append_payload_to_file(path: Path, payload: str) -> None:
"""Append payload (invisible chars) to end of file."""
text = path.read_text(encoding="utf-8", errors="ignore")
text += payload
path.write_text(text, encoding="utf-8")
def inject_into_marker(path: Path, payload: str, marker: str) -> bool:
"""
Append payload to the first line equal to 'marker' (whitespace-insensitive).
Return True if marker found, else False.
"""
lines = path.read_text(encoding="utf-8", errors="ignore").splitlines()
for i, line in enumerate(lines):
if line.strip() == marker.strip():
lines[i] = line + payload
path.write_text("\n".join(lines) + "\n", encoding="utf-8")
return True
return False
# ---------------- fast decode per file ----------------
@dataclass
class Found:
path: Path
line: int
message: str
def decode_file_fast(path: Path) -> list[Found]:
"""
Scan entire file once with regex to find ZW runs.
Compute line number for each match efficiently.
"""
try:
text = path.read_text(encoding="utf-8", errors="ignore")
except Exception:
return []
if not text:
return []
found: list[Found] = []
# Precompute newline indices for quick line lookup
nl_positions = [m.start() for m in re.finditer("\n", text)]
def pos_to_line(pos: int) -> int:
import bisect
return bisect.bisect_right(nl_positions, pos) + 1 # 1-based
for m in ZW_RUN_RE.finditer(text):
payload = m.group(0)
msg = decode_zw_to_text(payload)
if msg:
line_no = pos_to_line(m.start())
found.append(Found(path=path, line=line_no, message=msg))
return found
# ---------------- glob expansion ----------------
def expand_globs(
patterns: Sequence[str],
recursive: bool,
excludes: Sequence[str] | None = None,
) -> list[Path]:
"""
Expand glob patterns into a unique, ordered list of files.
- If a pattern points to a directory:
- recursive=False -> dir/* (non-recursive)
- recursive=True -> dir/**/* (recursive)
- Otherwise, use glob() as provided by the pattern.
"""
results: list[Path] = []
def add_matches(pat: str) -> None:
for s in glob.glob(pat, recursive=recursive):
p = Path(s)
if p.is_file():
results.append(p)
for pat in patterns:
p = Path(pat)
if p.exists() and p.is_dir() and ("*" not in pat and "?" not in pat and "[" not in pat):
# Directory without explicit glob: expand to files inside
if recursive:
add_matches(str(p / "**/*"))
else:
add_matches(str(p / "*"))
else:
add_matches(pat)
# Apply excludes (patterns evaluated recursively for simplicity)
if excludes:
excluded: set[str] = set()
for ex in excludes:
for s in glob.glob(ex, recursive=True):
excluded.add(str(Path(s).resolve()))
uniq: list[Path] = []
seen: set[str] = set()
for f in results:
rp = str(f.resolve())
if rp in excluded:
continue
if rp not in seen:
uniq.append(f)
seen.add(rp)
return uniq
# De-dup while preserving order
uniq: list[Path] = []
seen: set[str] = set()
for f in results:
rp = str(f.resolve())
if rp in seen:
continue
uniq.append(f)
seen.add(rp)
return uniq
# ---------------- CLI commands ----------------
def cmd_inject(args: argparse.Namespace) -> int:
payload = encode_text_to_zw(args.message)
if args.files:
for f in args.files:
p = Path(f)
if args.marker:
ok = inject_into_marker(p, payload, args.marker)
if not ok:
append_payload_to_file(p, payload)
else:
append_payload_to_file(p, payload)
if args.verbose:
print(f"[injected] {p}")
else:
# No files → print payload to stdout (invisible)
print(payload, end="")
return 0
def cmd_decode(args: argparse.Namespace) -> int:
targets = expand_globs(args.inputs, recursive=args.recursive, excludes=args.exclude or [])
if not targets:
return 1
exit_code = 1
if args.jobs and args.jobs > 1:
with cf.ProcessPoolExecutor(max_workers=args.jobs) as pool:
for results in pool.map(decode_file_fast, targets, chunksize=16):
for item in results:
print(f"{item.path}:{item.line}: {item.message}")
exit_code = 0
else:
for p in targets:
for item in decode_file_fast(p):
print(f"{p}:{item.line}: {item.message}")
exit_code = 0
return exit_code
def build_parser() -> argparse.ArgumentParser:
p = argparse.ArgumentParser(
prog="zw_secret",
description="Hide/reveal messages using zero-width characters (ZWSP/ZWNJ).",
)
sub = p.add_subparsers(dest="cmd", required=True)
# inject
pi = sub.add_parser("inject", help="Inject a message into files or stdout.")
pi.add_argument("-m", "--message", required=True, help="Message to hide (UTF-8).")
pi.add_argument("files", nargs="*", help="Target files (optional).")
pi.add_argument("--marker", help="Inject at a line equal to this marker if present.")
pi.add_argument("-v", "--verbose", action="store_true", help="Verbose output.")
pi.set_defaults(func=cmd_inject)
# decode
pd = sub.add_parser(
"decode",
help="Reveal hidden messages from glob patterns and/or directories.",
)
pd.add_argument(
"inputs",
nargs="+",
help=(
"Glob patterns and/or directories. Examples:\n"
" '*.py' # current dir, not recursive\n"
" src/*.py # only src/ top-level .py\n"
" 'src/**/*.py' # recursive with ** (also pass --recursive)\n"
" src docs/*.md # mix patterns and dirs"
),
)
pd.add_argument(
"--recursive",
action="store_true",
help="Enable recursive globbing (needed for patterns with **).",
)
pd.add_argument(
"--exclude",
nargs="*",
help="Exclude patterns (can be given multiple); evaluated recursively.",
)
pd.add_argument(
"-j",
"--jobs",
type=int,
default=1,
help="Parallel jobs (processes) for decoding; default: 1",
)
pd.set_defaults(func=cmd_decode)
return p
def main() -> int:
parser = build_parser()
args = parser.parse_args()
return args.func(args)
if __name__ == "__main__":
raise SystemExit(main())
@mgaitan
Copy link
Author

mgaitan commented Sep 5, 2025

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment