mgaitan · September 5, 2025 21:23 · mgaitan · Sep 5, 2025
diff --git a/example.txt b/example.txt
 # Nothing to see here ‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌

 But, damn, it sure is a catchy rhythm.
 ‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌
diff --git a/zw_secret.py b/zw_secret.py
 #!/usr/bin/env python3
 # zw_secret.py
 # Hide/reveal messages using zero-width characters (ZWSP/ZWNJ).
 # Encoding: bit 0 -> U+200B (ZWSP), bit 1 -> U+200C (ZWNJ)

 import argparse
 import concurrent.futures as cf
 import glob
 import re
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Iterable, Iterator, Sequence

 ZW0 = "\u200b"  # Zero Width Space -> bit 0
 ZW1 = "\u200c"  # Zero Width Non-Joiner -> bit 1

 # Regex to find runs of zero-width chars quickly
 ZW_RUN_RE = re.compile(f"[{re.escape(ZW0 + ZW1)}]+")


 # ---------------- core encode/decode ----------------

 def encode_text_to_zw(s: str) -> str:
    """Encode UTF-8 string to zero-width bitstream (ZWSP=0, ZWNJ=1)."""
    b = s.encode("utf-8")
    bits = "".join(f"{byte:08b}" for byte in b)
    return "".join(ZW0 if bit == "0" else ZW1 for bit in bits)


 def decode_bits(bits: str) -> str:
    """Decode '0'/'1' bits to UTF-8 text (truncate to whole bytes)."""
    n = len(bits) - (len(bits) % 8)
    if n <= 0:
        return ""
    data = bytes(int(bits[i:i+8], 2) for i in range(0, n, 8))
    return data.decode("utf-8", errors="ignore")


 def decode_zw_to_text(s: str) -> str:
    """Decode any ZWSP/ZWNJ in s back into text."""
    if not s:
        return ""
    bits = "".join("0" if ch == ZW0 else "1" if ch == ZW1 else "" for ch in s)
    if not bits:
        return ""
    return decode_bits(bits)


 # ---------------- file IO helpers ----------------

 def append_payload_to_file(path: Path, payload: str) -> None:
    """Append payload (invisible chars) to end of file."""
    text = path.read_text(encoding="utf-8", errors="ignore")
    text += payload
    path.write_text(text, encoding="utf-8")


 def inject_into_marker(path: Path, payload: str, marker: str) -> bool:
    """
    Append payload to the first line equal to 'marker' (whitespace-insensitive).
    Return True if marker found, else False.
    """
    lines = path.read_text(encoding="utf-8", errors="ignore").splitlines()
    for i, line in enumerate(lines):
        if line.strip() == marker.strip():
            lines[i] = line + payload
            path.write_text("\n".join(lines) + "\n", encoding="utf-8")
            return True
    return False


 # ---------------- fast decode per file ----------------

 @dataclass
 class Found:
    path: Path
    line: int
    message: str


 def decode_file_fast(path: Path) -> list[Found]:
    """
    Scan entire file once with regex to find ZW runs.
    Compute line number for each match efficiently.
    """
    try:
        text = path.read_text(encoding="utf-8", errors="ignore")
    except Exception:
        return []

    if not text:
        return []

    found: list[Found] = []

    # Precompute newline indices for quick line lookup
    nl_positions = [m.start() for m in re.finditer("\n", text)]

    def pos_to_line(pos: int) -> int:
        import bisect
        return bisect.bisect_right(nl_positions, pos) + 1  # 1-based

    for m in ZW_RUN_RE.finditer(text):
        payload = m.group(0)
        msg = decode_zw_to_text(payload)
        if msg:
            line_no = pos_to_line(m.start())
            found.append(Found(path=path, line=line_no, message=msg))

    return found


 # ---------------- glob expansion ----------------

 def expand_globs(
    patterns: Sequence[str],
    recursive: bool,
    excludes: Sequence[str] | None = None,
 ) -> list[Path]:
    """
    Expand glob patterns into a unique, ordered list of files.
    - If a pattern points to a directory:
        - recursive=False -> dir/* (non-recursive)
        - recursive=True  -> dir/**/* (recursive)
    - Otherwise, use glob() as provided by the pattern.
    """
    results: list[Path] = []

    def add_matches(pat: str) -> None:
        for s in glob.glob(pat, recursive=recursive):
            p = Path(s)
            if p.is_file():
                results.append(p)

    for pat in patterns:
        p = Path(pat)
        if p.exists() and p.is_dir() and ("*" not in pat and "?" not in pat and "[" not in pat):
            # Directory without explicit glob: expand to files inside
            if recursive:
                add_matches(str(p / "**/*"))
            else:
                add_matches(str(p / "*"))
        else:
            add_matches(pat)

    # Apply excludes (patterns evaluated recursively for simplicity)
    if excludes:
        excluded: set[str] = set()
        for ex in excludes:
            for s in glob.glob(ex, recursive=True):
                excluded.add(str(Path(s).resolve()))
        uniq: list[Path] = []
        seen: set[str] = set()
        for f in results:
            rp = str(f.resolve())
            if rp in excluded:
                continue
            if rp not in seen:
                uniq.append(f)
                seen.add(rp)
        return uniq

    # De-dup while preserving order
    uniq: list[Path] = []
    seen: set[str] = set()
    for f in results:
        rp = str(f.resolve())
        if rp in seen:
            continue
        uniq.append(f)
        seen.add(rp)
    return uniq


 # ---------------- CLI commands ----------------

 def cmd_inject(args: argparse.Namespace) -> int:
    payload = encode_text_to_zw(args.message)
    if args.files:
        for f in args.files:
            p = Path(f)
            if args.marker:
                ok = inject_into_marker(p, payload, args.marker)
                if not ok:
                    append_payload_to_file(p, payload)
            else:
                append_payload_to_file(p, payload)
            if args.verbose:
                print(f"[injected] {p}")
    else:
        # No files → print payload to stdout (invisible)
        print(payload, end="")
    return 0


 def cmd_decode(args: argparse.Namespace) -> int:
    targets = expand_globs(args.inputs, recursive=args.recursive, excludes=args.exclude or [])
    if not targets:
        return 1

    exit_code = 1

    if args.jobs and args.jobs > 1:
        with cf.ProcessPoolExecutor(max_workers=args.jobs) as pool:
            for results in pool.map(decode_file_fast, targets, chunksize=16):
                for item in results:
                    print(f"{item.path}:{item.line}: {item.message}")
                    exit_code = 0
    else:
        for p in targets:
            for item in decode_file_fast(p):
                print(f"{p}:{item.line}: {item.message}")
                exit_code = 0

    return exit_code


 def build_parser() -> argparse.ArgumentParser:
    p = argparse.ArgumentParser(
        prog="zw_secret",
        description="Hide/reveal messages using zero-width characters (ZWSP/ZWNJ).",
    )
    sub = p.add_subparsers(dest="cmd", required=True)

    # inject
    pi = sub.add_parser("inject", help="Inject a message into files or stdout.")
    pi.add_argument("-m", "--message", required=True, help="Message to hide (UTF-8).")
    pi.add_argument("files", nargs="*", help="Target files (optional).")
    pi.add_argument("--marker", help="Inject at a line equal to this marker if present.")
    pi.add_argument("-v", "--verbose", action="store_true", help="Verbose output.")
    pi.set_defaults(func=cmd_inject)

    # decode
    pd = sub.add_parser(
        "decode",
        help="Reveal hidden messages from glob patterns and/or directories.",
    )
    pd.add_argument(
        "inputs",
        nargs="+",
        help=(
            "Glob patterns and/or directories. Examples:\n"
            "  '*.py'            # current dir, not recursive\n"
            "  src/*.py          # only src/ top-level .py\n"
            "  'src/**/*.py'     # recursive with ** (also pass --recursive)\n"
            "  src docs/*.md     # mix patterns and dirs"
        ),
    )
    pd.add_argument(
        "--recursive",
        action="store_true",
        help="Enable recursive globbing (needed for patterns with **).",
    )
    pd.add_argument(
        "--exclude",
        nargs="*",
        help="Exclude patterns (can be given multiple); evaluated recursively.",
    )
    pd.add_argument(
        "-j",
        "--jobs",
        type=int,
        default=1,
        help="Parallel jobs (processes) for decoding; default: 1",
    )
    pd.set_defaults(func=cmd_decode)

    return p


 def main() -> int:
    parser = build_parser()
    args = parser.parse_args()
    return args.func(args)


 if __name__ == "__main__":
    raise SystemExit(main())
	# Nothing to see here ‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌

	But, damn, it sure is a catchy rhythm.
	‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌
	#!/usr/bin/env python3
	# zw_secret.py
	# Hide/reveal messages using zero-width characters (ZWSP/ZWNJ).
	# Encoding: bit 0 -> U+200B (ZWSP), bit 1 -> U+200C (ZWNJ)

	import argparse
	import concurrent.futures as cf
	import glob
	import re
	from dataclasses import dataclass
	from pathlib import Path
	from typing import Iterable, Iterator, Sequence

	ZW0 = "\u200b" # Zero Width Space -> bit 0
	ZW1 = "\u200c" # Zero Width Non-Joiner -> bit 1

	# Regex to find runs of zero-width chars quickly
	ZW_RUN_RE = re.compile(f"[{re.escape(ZW0 + ZW1)}]+")


	# ---------------- core encode/decode ----------------

	def encode_text_to_zw(s: str) -> str:
	"""Encode UTF-8 string to zero-width bitstream (ZWSP=0, ZWNJ=1)."""
	b = s.encode("utf-8")
	bits = "".join(f"{byte:08b}" for byte in b)
	return "".join(ZW0 if bit == "0" else ZW1 for bit in bits)


	def decode_bits(bits: str) -> str:
	"""Decode '0'/'1' bits to UTF-8 text (truncate to whole bytes)."""
	n = len(bits) - (len(bits) % 8)
	if n <= 0:
	return ""
	data = bytes(int(bits[i:i+8], 2) for i in range(0, n, 8))
	return data.decode("utf-8", errors="ignore")


	def decode_zw_to_text(s: str) -> str:
	"""Decode any ZWSP/ZWNJ in s back into text."""
	if not s:
	return ""
	bits = "".join("0" if ch == ZW0 else "1" if ch == ZW1 else "" for ch in s)
	if not bits:
	return ""
	return decode_bits(bits)


	# ---------------- file IO helpers ----------------

	def append_payload_to_file(path: Path, payload: str) -> None:
	"""Append payload (invisible chars) to end of file."""
	text = path.read_text(encoding="utf-8", errors="ignore")
	text += payload
	path.write_text(text, encoding="utf-8")


	def inject_into_marker(path: Path, payload: str, marker: str) -> bool:
	"""
	Append payload to the first line equal to 'marker' (whitespace-insensitive).
	Return True if marker found, else False.
	"""
	lines = path.read_text(encoding="utf-8", errors="ignore").splitlines()
	for i, line in enumerate(lines):
	if line.strip() == marker.strip():
	lines[i] = line + payload
	path.write_text("\n".join(lines) + "\n", encoding="utf-8")
	return True
	return False


	# ---------------- fast decode per file ----------------

	@dataclass
	class Found:
	path: Path
	line: int
	message: str


	def decode_file_fast(path: Path) -> list[Found]:
	"""
	Scan entire file once with regex to find ZW runs.
	Compute line number for each match efficiently.
	"""
	try:
	text = path.read_text(encoding="utf-8", errors="ignore")
	except Exception:
	return []

	if not text:
	return []

	found: list[Found] = []

	# Precompute newline indices for quick line lookup
	nl_positions = [m.start() for m in re.finditer("\n", text)]

	def pos_to_line(pos: int) -> int:
	import bisect
	return bisect.bisect_right(nl_positions, pos) + 1 # 1-based

	for m in ZW_RUN_RE.finditer(text):
	payload = m.group(0)
	msg = decode_zw_to_text(payload)
	if msg:
	line_no = pos_to_line(m.start())
	found.append(Found(path=path, line=line_no, message=msg))

	return found


	# ---------------- glob expansion ----------------

	def expand_globs(
	patterns: Sequence[str],
	recursive: bool,
	excludes: Sequence[str] \| None = None,
	) -> list[Path]:
	"""
	Expand glob patterns into a unique, ordered list of files.
	- If a pattern points to a directory:
	- recursive=False -> dir/* (non-recursive)
	- recursive=True -> dir/*/ (recursive)
	- Otherwise, use glob() as provided by the pattern.
	"""
	results: list[Path] = []

	def add_matches(pat: str) -> None:
	for s in glob.glob(pat, recursive=recursive):
	p = Path(s)
	if p.is_file():
	results.append(p)

	for pat in patterns:
	p = Path(pat)
	if p.exists() and p.is_dir() and ("*" not in pat and "?" not in pat and "[" not in pat):
	# Directory without explicit glob: expand to files inside
	if recursive:
	add_matches(str(p / "*/"))
	else:
	add_matches(str(p / "*"))
	else:
	add_matches(pat)

	# Apply excludes (patterns evaluated recursively for simplicity)
	if excludes:
	excluded: set[str] = set()
	for ex in excludes:
	for s in glob.glob(ex, recursive=True):
	excluded.add(str(Path(s).resolve()))
	uniq: list[Path] = []
	seen: set[str] = set()
	for f in results:
	rp = str(f.resolve())
	if rp in excluded:
	continue
	if rp not in seen:
	uniq.append(f)
	seen.add(rp)
	return uniq

	# De-dup while preserving order
	uniq: list[Path] = []
	seen: set[str] = set()
	for f in results:
	rp = str(f.resolve())
	if rp in seen:
	continue
	uniq.append(f)
	seen.add(rp)
	return uniq


	# ---------------- CLI commands ----------------

	def cmd_inject(args: argparse.Namespace) -> int:
	payload = encode_text_to_zw(args.message)
	if args.files:
	for f in args.files:
	p = Path(f)
	if args.marker:
	ok = inject_into_marker(p, payload, args.marker)
	if not ok:
	append_payload_to_file(p, payload)
	else:
	append_payload_to_file(p, payload)
	if args.verbose:
	print(f"[injected] {p}")
	else:
	# No files → print payload to stdout (invisible)
	print(payload, end="")
	return 0


	def cmd_decode(args: argparse.Namespace) -> int:
	targets = expand_globs(args.inputs, recursive=args.recursive, excludes=args.exclude or [])
	if not targets:
	return 1

	exit_code = 1

	if args.jobs and args.jobs > 1:
	with cf.ProcessPoolExecutor(max_workers=args.jobs) as pool:
	for results in pool.map(decode_file_fast, targets, chunksize=16):
	for item in results:
	print(f"{item.path}:{item.line}: {item.message}")
	exit_code = 0
	else:
	for p in targets:
	for item in decode_file_fast(p):
	print(f"{p}:{item.line}: {item.message}")
	exit_code = 0

	return exit_code


	def build_parser() -> argparse.ArgumentParser:
	p = argparse.ArgumentParser(
	prog="zw_secret",
	description="Hide/reveal messages using zero-width characters (ZWSP/ZWNJ).",
	)
	sub = p.add_subparsers(dest="cmd", required=True)

	# inject
	pi = sub.add_parser("inject", help="Inject a message into files or stdout.")
	pi.add_argument("-m", "--message", required=True, help="Message to hide (UTF-8).")
	pi.add_argument("files", nargs="*", help="Target files (optional).")
	pi.add_argument("--marker", help="Inject at a line equal to this marker if present.")
	pi.add_argument("-v", "--verbose", action="store_true", help="Verbose output.")
	pi.set_defaults(func=cmd_inject)

	# decode
	pd = sub.add_parser(
	"decode",
	help="Reveal hidden messages from glob patterns and/or directories.",
	)
	pd.add_argument(
	"inputs",
	nargs="+",
	help=(
	"Glob patterns and/or directories. Examples:\n"
	" '*.py' # current dir, not recursive\n"
	" src/*.py # only src/ top-level .py\n"
	" 'src/*/.py' # recursive with ** (also pass --recursive)\n"
	" src docs/*.md # mix patterns and dirs"
	),
	)
	pd.add_argument(
	"--recursive",
	action="store_true",
	help="Enable recursive globbing (needed for patterns with **).",
	)
	pd.add_argument(
	"--exclude",
	nargs="*",
	help="Exclude patterns (can be given multiple); evaluated recursively.",
	)
	pd.add_argument(
	"-j",
	"--jobs",
	type=int,
	default=1,
	help="Parallel jobs (processes) for decoding; default: 1",
	)
	pd.set_defaults(func=cmd_decode)

	return p


	def main() -> int:
	parser = build_parser()
	args = parser.parse_args()
	return args.func(args)


	if __name__ == "__main__":
	raise SystemExit(main())