aculich · April 7, 2026 17:19
diff --git a/01-upstream-incident-report-1.0.4.md b/01-upstream-incident-report-1.0.4.md
diff --git a/02-specstory_sanitized_repro.py b/02-specstory_sanitized_repro.py
 #!/usr/bin/env python3
 """
 Collect and sanitize SpecStory runaway process evidence on macOS.

 Outputs:
  - specstory-memory-fd-report.md
  - specstory-evidence-<timestamp>/ (sanitized raw command outputs)
 """

 from __future__ import annotations

 import argparse
 import datetime as dt
 import json
 import os
 import re
 import subprocess
 import sys
 import time
 from collections import Counter
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple


 JWT_RE = re.compile(r"\b[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b")
 CLOUD_TOKEN_ARG_RE = re.compile(r"(--cloud-token\s+)(\S+)")
 CLOUD_TOKEN_URL_RE = re.compile(r"([?&]cloud[-_]?token=)([^&\s]+)", re.IGNORECASE)


 def sanitize(text: str) -> str:
    redacted = CLOUD_TOKEN_ARG_RE.sub(r"\1[REDACTED]", text)
    redacted = CLOUD_TOKEN_URL_RE.sub(r"\1[REDACTED]", redacted)
    redacted = JWT_RE.sub("[REDACTED_JWT]", redacted)
    return redacted


 def cursor_version_report_line() -> str:
    """Best-effort Cursor IDE version for markdown header (macOS; Help → About / cursor --version)."""
    try:
        r = subprocess.run(
            ["cursor", "--version"],
            capture_output=True,
            text=True,
            timeout=8,
        )
        if r.returncode == 0 and (r.stdout or "").strip():
            return sanitize((r.stdout or "").strip().replace("\n", " | "))
    except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
        pass
    pkg = Path("/Applications/Cursor.app/Contents/Resources/app/package.json")
    if pkg.is_file():
        try:
            data = json.loads(pkg.read_text(encoding="utf-8"))
            ver = data.get("version")
            if ver:
                return sanitize(f"Cursor.app package.json version: {ver}")
        except (OSError, json.JSONDecodeError, TypeError):
            pass
    return "unknown (no `cursor` on PATH and no /Applications/Cursor.app package.json)"


 def run(cmd: List[str], timeout: int = 60) -> Tuple[int, str, str]:
    p = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
    out = sanitize(p.stdout or "")
    err = sanitize(p.stderr or "")
    return p.returncode, out, err


 def write_text(path: Path, text: str) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(text, encoding="utf-8")


 def etime_to_seconds(etime: str) -> int:
    # [[dd-]hh:]mm:ss
    days = 0
    rest = etime
    if "-" in etime:
        d, rest = etime.split("-", 1)
        days = int(d)
    parts = [int(p) for p in rest.split(":")]
    if len(parts) == 3:
        hours, minutes, seconds = parts
    elif len(parts) == 2:
        hours = 0
        minutes, seconds = parts
    else:
        hours = 0
        minutes = 0
        seconds = parts[0]
    return days * 86400 + hours * 3600 + minutes * 60 + seconds


 @dataclass
 class Proc:
    pid: int
    ppid: int
    etime: str
    rss_kb: int
    cpu: float
    cmd: str


 def parse_proc_list() -> Dict[int, Proc]:
    rc, out, err = run(["ps", "-axo", "pid=,ppid=,etime=,rss=,%cpu=,command="], timeout=30)
    if rc != 0:
        raise RuntimeError(f"ps failed: {err.strip()}")
    procs: Dict[int, Proc] = {}
    for line in out.splitlines():
        line = line.strip()
        if not line:
            continue
        parts = line.split(None, 5)
        if len(parts) < 6:
            continue
        pid, ppid, etime, rss, cpu, cmd = parts
        try:
            procs[int(pid)] = Proc(
                pid=int(pid),
                ppid=int(ppid),
                etime=etime,
                rss_kb=int(rss),
                cpu=float(cpu),
                cmd=cmd,
            )
        except ValueError:
            continue
    return procs


 def is_main_cursor_cmd(cmd: str) -> bool:
    return bool(re.match(r"^/Applications/Cursor\.app/Contents/MacOS/Cursor(\s|$)", cmd))


 def classify_specstory(proc: Proc, by_pid: Dict[int, Proc], current_cursor_pid: Optional[int]) -> str:
    if proc.ppid == 1:
        return "orphan_ppid_1"
    seen = set()
    p = proc.pid
    has_cursor = False
    on_current = False
    while p in by_pid and p not in seen:
        seen.add(p)
        cur = by_pid[p]
        if is_main_cursor_cmd(cur.cmd):
            has_cursor = True
            if current_cursor_pid and cur.pid == current_cursor_pid:
                on_current = True
        if cur.ppid in (0, 1):
            break
        p = cur.ppid
    if on_current:
        return "current_cursor_lineage"
    if has_cursor:
        return "old_cursor_lineage"
    return "non_cursor_lineage"


 def mode_from_cmd(cmd: str) -> str:
    s = f" {cmd} "
    if " watch " in s:
        return "watch"
    if " sync " in s:
        return "sync"
    return "other"


 def version_from_cmd(cmd: str) -> str:
    # Extension install path embeds VSIX semver: .../specstory.specstory-vscode-<semver>-universal/...
    if "specstory-vscode-1.0.5" in cmd:
        return "1.0.5"
    if "specstory-vscode-1.0.4" in cmd:
        return "1.0.4"
    if "specstory-vscode-1.0.3" in cmd:
        return "1.0.3"
    return "unknown"


 def parse_lsof(pid: int) -> Dict[str, object]:
    rc, out, err = run(["lsof", "-nP", "-p", str(pid)], timeout=60)
    if rc != 0:
        return {"error": err.strip() or f"lsof exited {rc}"}
    lines = out.splitlines()
    if not lines:
        return {"rows": 0, "max_fd": 0, "top_names": [], "raw": out}
    rows = lines[1:]
    fd_nums = []
    name_counts: Counter[str] = Counter()
    for line in rows:
        parts = line.split()
        if len(parts) < 9:
            continue
        m = re.match(r"(\d+)", parts[3])
        if m:
            fd_nums.append(int(m.group(1)))
        name = " ".join(parts[8:])
        name_counts[name] += 1
    return {
        "rows": len(rows),
        "max_fd": max(fd_nums) if fd_nums else 0,
        "top_names": name_counts.most_common(20),
        "raw": out,
    }


 def fd_growth(pid: int, rounds: int = 5, interval_s: int = 2) -> List[Dict[str, int]]:
    samples = []
    for i in range(rounds):
        info = parse_lsof(pid)
        if "error" in info:
            samples.append({"t": i * interval_s, "rows": -1, "db": -1, "wal": -1, "max_fd": -1})
        else:
            top_names = dict(info["top_names"])  # type: ignore[index]
            db = top_names.get(
                "/Users/me/Library/Application Support/Cursor/User/globalStorage/state.vscdb", 0
            )
            wal = top_names.get(
                "/Users/me/Library/Application Support/Cursor/User/globalStorage/state.vscdb-wal", 0
            )
            samples.append(
                {
                    "t": i * interval_s,
                    "rows": int(info["rows"]),  # type: ignore[arg-type]
                    "db": int(db),
                    "wal": int(wal),
                    "max_fd": int(info["max_fd"]),  # type: ignore[arg-type]
                }
            )
        if i < rounds - 1:
            time.sleep(interval_s)
    return samples


 def ps_detail(pid: int) -> str:
    _, out, err = run(
        [
            "ps",
            "-p",
            str(pid),
            "-o",
            "pid,ppid,user,lstart,etime,%cpu,%mem,vsz,rss,state,command",
        ],
        timeout=30,
    )
    return out if out.strip() else err


 def thread_count(pid: int) -> int:
    rc, out, _ = run(["ps", "-M", "-p", str(pid)], timeout=30)
    if rc != 0:
        return -1
    lines = [ln for ln in out.splitlines() if ln.strip()]
    return max(0, len(lines) - 1)


 def sample_process(pid: int, seconds: int = 5) -> Tuple[str, str]:
    _, out, err = run(["sample", str(pid), str(seconds), "1"], timeout=max(90, seconds + 30))
    combined = (out + ("\n" + err if err else "")).strip()
    m = re.search(r"written to file (.+)", combined)
    sample_excerpt = ""
    if m:
        sample_path = Path(m.group(1).strip())
        if sample_path.exists():
            text = sanitize(sample_path.read_text(encoding="utf-8", errors="replace"))
            # Keep header + early call graph for attachability
            sample_excerpt = "\n".join(text.splitlines()[:220])
    if not sample_excerpt:
        sample_excerpt = combined[:12000]
    return combined, sample_excerpt


 def detect_screenshots() -> List[str]:
    candidates = [
        "/Users/me/shottr/SCR-20260406-ofeq.png",
        "/Users/me/shottr/SCR-20260406-ofvf.png",
        "/Users/me/shottr/SCR-20260406-ofvf-2.png",
        "/Users/me/shottr/SCR-20260406-ofxk.png",
        "/Users/me/shottr/SCR-20260406-ofxk-2.png",
        "/Users/me/shottr/SCR-20260406-ofyf.png",
        "/Users/me/shottr/SCR-20260406-ofyf-2.png",
        "/Users/me/.cursor/projects/Users-me-tools-specstory-and-friends/assets/image-d00b7709-84ac-44eb-b890-2763c727bf39.png",
    ]
    return [p for p in candidates if Path(p).exists()]


 def markdown_table(rows: List[Dict[str, object]], keys: List[str]) -> str:
    if not rows:
        return "_none_\n"
    header = "| " + " | ".join(keys) + " |"
    sep = "| " + " | ".join(["---"] * len(keys)) + " |"
    body = []
    for row in rows:
        body.append("| " + " | ".join(str(row.get(k, "")) for k in keys) + " |")
    return "\n".join([header, sep] + body) + "\n"


 def main() -> int:
    parser = argparse.ArgumentParser(description="Create sanitized SpecStory memory/FD repro report.")
    parser.add_argument("--pid", type=int, default=0, help="Target specstory PID (default: auto-pick highest RSS current lineage).")
    parser.add_argument("--out", default="specstory-memory-fd-report.md", help="Markdown report output path.")
    parser.add_argument(
        "--quick",
        action="store_true",
        help="Shorter FD growth poll (2×1s) and shorter sample (1s) for smoke tests.",
    )
    args = parser.parse_args()

    now = dt.datetime.now()
    stamp = now.strftime("%Y%m%d-%H%M%S")
    evidence_dir = Path(f"specstory-evidence-{stamp}")
    evidence_dir.mkdir(parents=True, exist_ok=True)

    by_pid = parse_proc_list()
    main_cursor = [p for p in by_pid.values() if is_main_cursor_cmd(p.cmd)]
    current_cursor = min(main_cursor, key=lambda p: etime_to_seconds(p.etime)) if main_cursor else None
    current_cursor_pid = current_cursor.pid if current_cursor else None

    spec = [p for p in by_pid.values() if "specstory_darwin_arm64" in p.cmd]
    if not spec:
        write_text(Path(args.out), "# SpecStory Memory/FD Report\n\nNo `specstory_darwin_arm64` processes found.\n")
        return 0

    classified = []
    for p in spec:
        cls = classify_specstory(p, by_pid, current_cursor_pid)
        classified.append(
            {
                "pid": p.pid,
                "ppid": p.ppid,
                "etime": p.etime,
                "rss_mb": round(p.rss_kb / 1024, 1),
                "cpu": p.cpu,
                "class": cls,
                "mode": mode_from_cmd(p.cmd),
                "version": version_from_cmd(p.cmd),
                "cmd": sanitize(p.cmd),
            }
        )

    target_pid = args.pid if args.pid else 0
    if target_pid == 0:
        current_lineage = [r for r in classified if r["class"] == "current_cursor_lineage"]
        source = current_lineage if current_lineage else classified
        target_pid = int(max(source, key=lambda r: float(r["rss_mb"]))["pid"])
    if target_pid not in by_pid:
        raise RuntimeError(f"Target pid {target_pid} no longer exists.")

    target_proc = by_pid[target_pid]
    sibling_candidates = [
        r for r in classified if r["ppid"] == target_proc.ppid and int(r["pid"]) != target_pid
    ]
    sibling_pid = int(max(sibling_candidates, key=lambda r: float(r["rss_mb"]))["pid"]) if sibling_candidates else 0

    summary = {
        "timestamp": now.isoformat(),
        "current_cursor_pid": current_cursor_pid,
        "total_specstory": len(classified),
        "by_class": Counter([str(r["class"]) for r in classified]),
        "by_mode": Counter([str(r["mode"]) for r in classified]),
        "by_version": Counter([str(r["version"]) for r in classified]),
        "target_pid": target_pid,
        "sibling_pid": sibling_pid if sibling_pid else None,
    }
    write_text(evidence_dir / "summary.json", json.dumps(summary, indent=2))

    target_ps = ps_detail(target_pid)
    parent_ps = ps_detail(target_proc.ppid)
    target_threads = thread_count(target_pid)
    parent_threads = thread_count(target_proc.ppid)

    target_lsof = parse_lsof(target_pid)
    sibling_lsof = parse_lsof(sibling_pid) if sibling_pid else {"error": "no sibling with same parent"}
    fd_rounds, fd_interval = (2, 1) if args.quick else (5, 2)
    sample_secs = 1 if args.quick else 5
    growth = fd_growth(target_pid, rounds=fd_rounds, interval_s=fd_interval)
    sample_raw, sample_excerpt = sample_process(target_pid, seconds=sample_secs)

    write_text(evidence_dir / f"ps-{target_pid}.txt", target_ps)
    write_text(evidence_dir / f"ps-{target_proc.ppid}.txt", parent_ps)
    write_text(evidence_dir / f"lsof-{target_pid}.txt", str(target_lsof.get("raw", "")))
    write_text(evidence_dir / f"lsof-{sibling_pid}.txt", str(sibling_lsof.get("raw", "")))
    write_text(evidence_dir / f"sample-{target_pid}-raw.txt", sample_raw)
    write_text(evidence_dir / f"sample-{target_pid}-excerpt.txt", sample_excerpt)
    write_text(evidence_dir / f"fd-growth-{target_pid}.json", json.dumps(growth, indent=2))

    screenshots = detect_screenshots()

    class_rows = []
    class_counts = Counter([str(r["class"]) for r in classified])
    for k in sorted(class_counts.keys()):
        rss_sum = sum(float(r["rss_mb"]) for r in classified if str(r["class"]) == k)
        class_rows.append({"class": k, "count": class_counts[k], "rss_mb_sum": round(rss_sum, 1)})

    top_by_rss = sorted(classified, key=lambda r: float(r["rss_mb"]), reverse=True)[:15]
    top_simple = [{k: r[k] for k in ["pid", "ppid", "class", "mode", "version", "rss_mb", "cpu", "etime"]} for r in top_by_rss]

    lsof_top_target = []
    if "top_names" in target_lsof:
        for name, count in target_lsof["top_names"][:12]:  # type: ignore[index]
            lsof_top_target.append({"count": count, "name": name})

    lsof_top_sibling = []
    if "top_names" in sibling_lsof:
        for name, count in sibling_lsof["top_names"][:12]:  # type: ignore[index]
            lsof_top_sibling.append({"count": count, "name": name})

    md = []
    md.append("# SpecStory memory/FD repro report\n")
    md.append(f"- Generated: `{now.isoformat()}`")
    md.append(f"- Host OS: `{os.uname().sysname} {os.uname().release}`")
    md.append(f"- Cursor IDE: `{cursor_version_report_line()}`")
    md.append(f"- Current Cursor PID: `{current_cursor_pid}`")
    md.append(f"- Total `specstory_darwin_arm64` processes: `{len(classified)}`")
    md.append(f"- Target PID inspected: `{target_pid}`")
    md.append("")

    md.append("## Process classification\n")
    md.append(markdown_table(class_rows, ["class", "count", "rss_mb_sum"]))
    md.append(markdown_table(top_simple, ["pid", "ppid", "class", "mode", "version", "rss_mb", "cpu", "etime"]))

    md.append("## Target process details\n")
    md.append(f"- Target thread count (`ps -M`): `{target_threads}`")
    md.append(f"- Parent PID: `{target_proc.ppid}`")
    md.append(f"- Parent thread count (`ps -M`): `{parent_threads}`")
    md.append("")
    md.append("```text")
    md.append(target_ps.rstrip())
    md.append("```")
    md.append("")
    md.append("```text")
    md.append(parent_ps.rstrip())
    md.append("```")

    md.append("\n## Open file descriptor evidence (target)\n")
    if "error" in target_lsof:
        md.append(f"- lsof error: `{target_lsof['error']}`")
    else:
        md.append(f"- lsof rows: `{target_lsof['rows']}`")  # type: ignore[index]
        md.append(f"- max fd: `{target_lsof['max_fd']}`")  # type: ignore[index]
        md.append("")
        md.append(markdown_table(lsof_top_target, ["count", "name"]))

    md.append("## Open file descriptor evidence (sibling)\n")
    if "error" in sibling_lsof:
        md.append(f"- sibling lsof error: `{sibling_lsof['error']}`")
    else:
        md.append(f"- sibling pid: `{sibling_pid}`")
        md.append(f"- lsof rows: `{sibling_lsof['rows']}`")  # type: ignore[index]
        md.append(f"- max fd: `{sibling_lsof['max_fd']}`")  # type: ignore[index]
        md.append("")
        md.append(markdown_table(lsof_top_sibling, ["count", "name"]))

    md.append("## FD growth poll (target)\n")
    md.append(markdown_table(growth, ["t", "rows", "db", "wal", "max_fd"]))

    md.append("## Stack sample excerpt (sanitized)\n")
    md.append("```text")
    md.append(sample_excerpt.rstrip())
    md.append("```")

    md.append("\n## Screenshot references\n")
    if screenshots:
        for s in screenshots:
            md.append(f"- `{s}`")
    else:
        md.append("- _No known screenshot paths found on disk at report time._")

    md.append("\n## Sanitization notes\n")
    md.append("- All `--cloud-token` CLI values are redacted as `[REDACTED]`.")
    md.append("- JWT-like strings are redacted as `[REDACTED_JWT]`.")
    md.append("- Raw evidence files in the evidence directory are sanitized before write.")

    md.append("\n## Evidence bundle files\n")
    for p in sorted(evidence_dir.glob("*")):
        md.append(f"- `{p}`")

    report_path = Path(args.out)
    write_text(report_path, "\n".join(md) + "\n")

    print(f"Wrote report: {report_path.resolve()}")
    print(f"Wrote sanitized evidence bundle: {evidence_dir.resolve()}")
    return 0


 if __name__ == "__main__":
    try:
        raise SystemExit(main())
    except KeyboardInterrupt:
        print("Interrupted.", file=sys.stderr)
        raise SystemExit(130)
diff --git a/03-vsix-1.0.4-provenance.md b/03-vsix-1.0.4-provenance.md
diff --git a/04-upstream-open-source-request.md b/04-upstream-open-source-request.md
Observation	Detail
Duplicate FDs	~150+ each on `state.vscdb` and `state.vscdb-wal` for a single `watch` PID
Growth	FD totals rose between consecutive samples during steady-state use
Multi-process	Dozens of `specstory_darwin_arm64` processes visible; mix of current extension-host lineage and orphans (suggesting lifecycle / restart issues across windows or crashes)
Memory	Individual `watch` processes in the hundreds of MB RSS range in our sample
Artifact	SHA256
VSIX `specstory.specstory-vscode-1.0.4.vsix`	`e72b6b707d429b8fed72ab470a073506ccfa16ee09cf590c47f39cf8f2eb71c6`
`bin/specstory_darwin_arm64`	`c14ef0fd618678e71292d784add1496cdd56bf539e1003f0551788fd502581b9`
	#!/usr/bin/env python3
	"""
	Collect and sanitize SpecStory runaway process evidence on macOS.

	Outputs:
	- specstory-memory-fd-report.md
	- specstory-evidence-<timestamp>/ (sanitized raw command outputs)
	"""

	from __future__ import annotations

	import argparse
	import datetime as dt
	import json
	import os
	import re
	import subprocess
	import sys
	import time
	from collections import Counter
	from dataclasses import dataclass
	from pathlib import Path
	from typing import Dict, List, Optional, Tuple


	JWT_RE = re.compile(r"\b[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b")
	CLOUD_TOKEN_ARG_RE = re.compile(r"(--cloud-token\s+)(\S+)")
	CLOUD_TOKEN_URL_RE = re.compile(r"([?&]cloud[-_]?token=)([^&\s]+)", re.IGNORECASE)


	def sanitize(text: str) -> str:
	redacted = CLOUD_TOKEN_ARG_RE.sub(r"\1[REDACTED]", text)
	redacted = CLOUD_TOKEN_URL_RE.sub(r"\1[REDACTED]", redacted)
	redacted = JWT_RE.sub("[REDACTED_JWT]", redacted)
	return redacted


	def cursor_version_report_line() -> str:
	"""Best-effort Cursor IDE version for markdown header (macOS; Help → About / cursor --version)."""
	try:
	r = subprocess.run(
	["cursor", "--version"],
	capture_output=True,
	text=True,
	timeout=8,
	)
	if r.returncode == 0 and (r.stdout or "").strip():
	return sanitize((r.stdout or "").strip().replace("\n", " \| "))
	except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
	pass
	pkg = Path("/Applications/Cursor.app/Contents/Resources/app/package.json")
	if pkg.is_file():
	try:
	data = json.loads(pkg.read_text(encoding="utf-8"))
	ver = data.get("version")
	if ver:
	return sanitize(f"Cursor.app package.json version: {ver}")
	except (OSError, json.JSONDecodeError, TypeError):
	pass
	return "unknown (no `cursor` on PATH and no /Applications/Cursor.app package.json)"


	def run(cmd: List[str], timeout: int = 60) -> Tuple[int, str, str]:
	p = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
	out = sanitize(p.stdout or "")
	err = sanitize(p.stderr or "")
	return p.returncode, out, err


	def write_text(path: Path, text: str) -> None:
	path.parent.mkdir(parents=True, exist_ok=True)
	path.write_text(text, encoding="utf-8")


	def etime_to_seconds(etime: str) -> int:
	# [[dd-]hh:]mm:ss
	days = 0
	rest = etime
	if "-" in etime:
	d, rest = etime.split("-", 1)
	days = int(d)
	parts = [int(p) for p in rest.split(":")]
	if len(parts) == 3:
	hours, minutes, seconds = parts
	elif len(parts) == 2:
	hours = 0
	minutes, seconds = parts
	else:
	hours = 0
	minutes = 0
	seconds = parts[0]
	return days * 86400 + hours * 3600 + minutes * 60 + seconds


	@dataclass
	class Proc:
	pid: int
	ppid: int
	etime: str
	rss_kb: int
	cpu: float
	cmd: str


	def parse_proc_list() -> Dict[int, Proc]:
	rc, out, err = run(["ps", "-axo", "pid=,ppid=,etime=,rss=,%cpu=,command="], timeout=30)
	if rc != 0:
	raise RuntimeError(f"ps failed: {err.strip()}")
	procs: Dict[int, Proc] = {}
	for line in out.splitlines():
	line = line.strip()
	if not line:
	continue
	parts = line.split(None, 5)
	if len(parts) < 6:
	continue
	pid, ppid, etime, rss, cpu, cmd = parts
	try:
	procs[int(pid)] = Proc(
	pid=int(pid),
	ppid=int(ppid),
	etime=etime,
	rss_kb=int(rss),
	cpu=float(cpu),
	cmd=cmd,
	)
	except ValueError:
	continue
	return procs


	def is_main_cursor_cmd(cmd: str) -> bool:
	return bool(re.match(r"^/Applications/Cursor\.app/Contents/MacOS/Cursor(\s\|$)", cmd))


	def classify_specstory(proc: Proc, by_pid: Dict[int, Proc], current_cursor_pid: Optional[int]) -> str:
	if proc.ppid == 1:
	return "orphan_ppid_1"
	seen = set()
	p = proc.pid
	has_cursor = False
	on_current = False
	while p in by_pid and p not in seen:
	seen.add(p)
	cur = by_pid[p]
	if is_main_cursor_cmd(cur.cmd):
	has_cursor = True
	if current_cursor_pid and cur.pid == current_cursor_pid:
	on_current = True
	if cur.ppid in (0, 1):
	break
	p = cur.ppid
	if on_current:
	return "current_cursor_lineage"
	if has_cursor:
	return "old_cursor_lineage"
	return "non_cursor_lineage"


	def mode_from_cmd(cmd: str) -> str:
	s = f" {cmd} "
	if " watch " in s:
	return "watch"
	if " sync " in s:
	return "sync"
	return "other"


	def version_from_cmd(cmd: str) -> str:
	# Extension install path embeds VSIX semver: .../specstory.specstory-vscode-<semver>-universal/...
	if "specstory-vscode-1.0.5" in cmd:
	return "1.0.5"
	if "specstory-vscode-1.0.4" in cmd:
	return "1.0.4"
	if "specstory-vscode-1.0.3" in cmd:
	return "1.0.3"
	return "unknown"


	def parse_lsof(pid: int) -> Dict[str, object]:
	rc, out, err = run(["lsof", "-nP", "-p", str(pid)], timeout=60)
	if rc != 0:
	return {"error": err.strip() or f"lsof exited {rc}"}
	lines = out.splitlines()
	if not lines:
	return {"rows": 0, "max_fd": 0, "top_names": [], "raw": out}
	rows = lines[1:]
	fd_nums = []
	name_counts: Counter[str] = Counter()
	for line in rows:
	parts = line.split()
	if len(parts) < 9:
	continue
	m = re.match(r"(\d+)", parts[3])
	if m:
	fd_nums.append(int(m.group(1)))
	name = " ".join(parts[8:])
	name_counts[name] += 1
	return {
	"rows": len(rows),
	"max_fd": max(fd_nums) if fd_nums else 0,
	"top_names": name_counts.most_common(20),
	"raw": out,
	}


	def fd_growth(pid: int, rounds: int = 5, interval_s: int = 2) -> List[Dict[str, int]]:
	samples = []
	for i in range(rounds):
	info = parse_lsof(pid)
	if "error" in info:
	samples.append({"t": i * interval_s, "rows": -1, "db": -1, "wal": -1, "max_fd": -1})
	else:
	top_names = dict(info["top_names"]) # type: ignore[index]
	db = top_names.get(
	"/Users/me/Library/Application Support/Cursor/User/globalStorage/state.vscdb", 0
	)
	wal = top_names.get(
	"/Users/me/Library/Application Support/Cursor/User/globalStorage/state.vscdb-wal", 0
	)
	samples.append(
	{
	"t": i * interval_s,
	"rows": int(info["rows"]), # type: ignore[arg-type]
	"db": int(db),
	"wal": int(wal),
	"max_fd": int(info["max_fd"]), # type: ignore[arg-type]
	}
	)
	if i < rounds - 1:
	time.sleep(interval_s)
	return samples


	def ps_detail(pid: int) -> str:
	_, out, err = run(
	[
	"ps",
	"-p",
	str(pid),
	"-o",
	"pid,ppid,user,lstart,etime,%cpu,%mem,vsz,rss,state,command",
	],
	timeout=30,
	)
	return out if out.strip() else err


	def thread_count(pid: int) -> int:
	rc, out, _ = run(["ps", "-M", "-p", str(pid)], timeout=30)
	if rc != 0:
	return -1
	lines = [ln for ln in out.splitlines() if ln.strip()]
	return max(0, len(lines) - 1)


	def sample_process(pid: int, seconds: int = 5) -> Tuple[str, str]:
	_, out, err = run(["sample", str(pid), str(seconds), "1"], timeout=max(90, seconds + 30))
	combined = (out + ("\n" + err if err else "")).strip()
	m = re.search(r"written to file (.+)", combined)
	sample_excerpt = ""
	if m:
	sample_path = Path(m.group(1).strip())
	if sample_path.exists():
	text = sanitize(sample_path.read_text(encoding="utf-8", errors="replace"))
	# Keep header + early call graph for attachability
	sample_excerpt = "\n".join(text.splitlines()[:220])
	if not sample_excerpt:
	sample_excerpt = combined[:12000]
	return combined, sample_excerpt


	def detect_screenshots() -> List[str]:
	candidates = [
	"/Users/me/shottr/SCR-20260406-ofeq.png",
	"/Users/me/shottr/SCR-20260406-ofvf.png",
	"/Users/me/shottr/SCR-20260406-ofvf-2.png",
	"/Users/me/shottr/SCR-20260406-ofxk.png",
	"/Users/me/shottr/SCR-20260406-ofxk-2.png",
	"/Users/me/shottr/SCR-20260406-ofyf.png",
	"/Users/me/shottr/SCR-20260406-ofyf-2.png",
	"/Users/me/.cursor/projects/Users-me-tools-specstory-and-friends/assets/image-d00b7709-84ac-44eb-b890-2763c727bf39.png",
	]
	return [p for p in candidates if Path(p).exists()]


	def markdown_table(rows: List[Dict[str, object]], keys: List[str]) -> str:
	if not rows:
	return "_none_\n"
	header = "\| " + " \| ".join(keys) + " \|"
	sep = "\| " + " \| ".join(["---"] * len(keys)) + " \|"
	body = []
	for row in rows:
	body.append("\| " + " \| ".join(str(row.get(k, "")) for k in keys) + " \|")
	return "\n".join([header, sep] + body) + "\n"


	def main() -> int:
	parser = argparse.ArgumentParser(description="Create sanitized SpecStory memory/FD repro report.")
	parser.add_argument("--pid", type=int, default=0, help="Target specstory PID (default: auto-pick highest RSS current lineage).")
	parser.add_argument("--out", default="specstory-memory-fd-report.md", help="Markdown report output path.")
	parser.add_argument(
	"--quick",
	action="store_true",
	help="Shorter FD growth poll (2×1s) and shorter sample (1s) for smoke tests.",
	)
	args = parser.parse_args()

	now = dt.datetime.now()
	stamp = now.strftime("%Y%m%d-%H%M%S")
	evidence_dir = Path(f"specstory-evidence-{stamp}")
	evidence_dir.mkdir(parents=True, exist_ok=True)

	by_pid = parse_proc_list()
	main_cursor = [p for p in by_pid.values() if is_main_cursor_cmd(p.cmd)]
	current_cursor = min(main_cursor, key=lambda p: etime_to_seconds(p.etime)) if main_cursor else None
	current_cursor_pid = current_cursor.pid if current_cursor else None

	spec = [p for p in by_pid.values() if "specstory_darwin_arm64" in p.cmd]
	if not spec:
	write_text(Path(args.out), "# SpecStory Memory/FD Report\n\nNo `specstory_darwin_arm64` processes found.\n")
	return 0

	classified = []
	for p in spec:
	cls = classify_specstory(p, by_pid, current_cursor_pid)
	classified.append(
	{
	"pid": p.pid,
	"ppid": p.ppid,
	"etime": p.etime,
	"rss_mb": round(p.rss_kb / 1024, 1),
	"cpu": p.cpu,
	"class": cls,
	"mode": mode_from_cmd(p.cmd),
	"version": version_from_cmd(p.cmd),
	"cmd": sanitize(p.cmd),
	}
	)

	target_pid = args.pid if args.pid else 0
	if target_pid == 0:
	current_lineage = [r for r in classified if r["class"] == "current_cursor_lineage"]
	source = current_lineage if current_lineage else classified
	target_pid = int(max(source, key=lambda r: float(r["rss_mb"]))["pid"])
	if target_pid not in by_pid:
	raise RuntimeError(f"Target pid {target_pid} no longer exists.")

	target_proc = by_pid[target_pid]
	sibling_candidates = [
	r for r in classified if r["ppid"] == target_proc.ppid and int(r["pid"]) != target_pid
	]
	sibling_pid = int(max(sibling_candidates, key=lambda r: float(r["rss_mb"]))["pid"]) if sibling_candidates else 0

	summary = {
	"timestamp": now.isoformat(),
	"current_cursor_pid": current_cursor_pid,
	"total_specstory": len(classified),
	"by_class": Counter([str(r["class"]) for r in classified]),
	"by_mode": Counter([str(r["mode"]) for r in classified]),
	"by_version": Counter([str(r["version"]) for r in classified]),
	"target_pid": target_pid,
	"sibling_pid": sibling_pid if sibling_pid else None,
	}
	write_text(evidence_dir / "summary.json", json.dumps(summary, indent=2))

	target_ps = ps_detail(target_pid)
	parent_ps = ps_detail(target_proc.ppid)
	target_threads = thread_count(target_pid)
	parent_threads = thread_count(target_proc.ppid)

	target_lsof = parse_lsof(target_pid)
	sibling_lsof = parse_lsof(sibling_pid) if sibling_pid else {"error": "no sibling with same parent"}
	fd_rounds, fd_interval = (2, 1) if args.quick else (5, 2)
	sample_secs = 1 if args.quick else 5
	growth = fd_growth(target_pid, rounds=fd_rounds, interval_s=fd_interval)
	sample_raw, sample_excerpt = sample_process(target_pid, seconds=sample_secs)

	write_text(evidence_dir / f"ps-{target_pid}.txt", target_ps)
	write_text(evidence_dir / f"ps-{target_proc.ppid}.txt", parent_ps)
	write_text(evidence_dir / f"lsof-{target_pid}.txt", str(target_lsof.get("raw", "")))
	write_text(evidence_dir / f"lsof-{sibling_pid}.txt", str(sibling_lsof.get("raw", "")))
	write_text(evidence_dir / f"sample-{target_pid}-raw.txt", sample_raw)
	write_text(evidence_dir / f"sample-{target_pid}-excerpt.txt", sample_excerpt)
	write_text(evidence_dir / f"fd-growth-{target_pid}.json", json.dumps(growth, indent=2))

	screenshots = detect_screenshots()

	class_rows = []
	class_counts = Counter([str(r["class"]) for r in classified])
	for k in sorted(class_counts.keys()):
	rss_sum = sum(float(r["rss_mb"]) for r in classified if str(r["class"]) == k)
	class_rows.append({"class": k, "count": class_counts[k], "rss_mb_sum": round(rss_sum, 1)})

	top_by_rss = sorted(classified, key=lambda r: float(r["rss_mb"]), reverse=True)[:15]
	top_simple = [{k: r[k] for k in ["pid", "ppid", "class", "mode", "version", "rss_mb", "cpu", "etime"]} for r in top_by_rss]

	lsof_top_target = []
	if "top_names" in target_lsof:
	for name, count in target_lsof["top_names"][:12]: # type: ignore[index]
	lsof_top_target.append({"count": count, "name": name})

	lsof_top_sibling = []
	if "top_names" in sibling_lsof:
	for name, count in sibling_lsof["top_names"][:12]: # type: ignore[index]
	lsof_top_sibling.append({"count": count, "name": name})

	md = []
	md.append("# SpecStory memory/FD repro report\n")
	md.append(f"- Generated: `{now.isoformat()}`")
	md.append(f"- Host OS: `{os.uname().sysname} {os.uname().release}`")
	md.append(f"- Cursor IDE: `{cursor_version_report_line()}`")
	md.append(f"- Current Cursor PID: `{current_cursor_pid}`")
	md.append(f"- Total `specstory_darwin_arm64` processes: `{len(classified)}`")
	md.append(f"- Target PID inspected: `{target_pid}`")
	md.append("")

	md.append("## Process classification\n")
	md.append(markdown_table(class_rows, ["class", "count", "rss_mb_sum"]))
	md.append(markdown_table(top_simple, ["pid", "ppid", "class", "mode", "version", "rss_mb", "cpu", "etime"]))

	md.append("## Target process details\n")
	md.append(f"- Target thread count (`ps -M`): `{target_threads}`")
	md.append(f"- Parent PID: `{target_proc.ppid}`")
	md.append(f"- Parent thread count (`ps -M`): `{parent_threads}`")
	md.append("")
	md.append("```text")
	md.append(target_ps.rstrip())
	md.append("```")
	md.append("")
	md.append("```text")
	md.append(parent_ps.rstrip())
	md.append("```")

	md.append("\n## Open file descriptor evidence (target)\n")
	if "error" in target_lsof:
	md.append(f"- lsof error: `{target_lsof['error']}`")
	else:
	md.append(f"- lsof rows: `{target_lsof['rows']}`") # type: ignore[index]
	md.append(f"- max fd: `{target_lsof['max_fd']}`") # type: ignore[index]
	md.append("")
	md.append(markdown_table(lsof_top_target, ["count", "name"]))

	md.append("## Open file descriptor evidence (sibling)\n")
	if "error" in sibling_lsof:
	md.append(f"- sibling lsof error: `{sibling_lsof['error']}`")
	else:
	md.append(f"- sibling pid: `{sibling_pid}`")
	md.append(f"- lsof rows: `{sibling_lsof['rows']}`") # type: ignore[index]
	md.append(f"- max fd: `{sibling_lsof['max_fd']}`") # type: ignore[index]
	md.append("")
	md.append(markdown_table(lsof_top_sibling, ["count", "name"]))

	md.append("## FD growth poll (target)\n")
	md.append(markdown_table(growth, ["t", "rows", "db", "wal", "max_fd"]))

	md.append("## Stack sample excerpt (sanitized)\n")
	md.append("```text")
	md.append(sample_excerpt.rstrip())
	md.append("```")

	md.append("\n## Screenshot references\n")
	if screenshots:
	for s in screenshots:
	md.append(f"- `{s}`")
	else:
	md.append("- _No known screenshot paths found on disk at report time._")

	md.append("\n## Sanitization notes\n")
	md.append("- All `--cloud-token` CLI values are redacted as `[REDACTED]`.")
	md.append("- JWT-like strings are redacted as `[REDACTED_JWT]`.")
	md.append("- Raw evidence files in the evidence directory are sanitized before write.")

	md.append("\n## Evidence bundle files\n")
	for p in sorted(evidence_dir.glob("*")):
	md.append(f"- `{p}`")

	report_path = Path(args.out)
	write_text(report_path, "\n".join(md) + "\n")

	print(f"Wrote report: {report_path.resolve()}")
	print(f"Wrote sanitized evidence bundle: {evidence_dir.resolve()}")
	return 0


	if __name__ == "__main__":
	try:
	raise SystemExit(main())
	except KeyboardInterrupt:
	print("Interrupted.", file=sys.stderr)
	raise SystemExit(130)
Field	Value
Path	`specstory-investigation/vsix-1.0.4/specstory.specstory-vscode-1.0.4.vsix`
SHA256	`e72b6b707d429b8fed72ab470a073506ccfa16ee09cf590c47f39cf8f2eb71c6`
Size	~54.7 MB
Binary	SHA256
`bin/specstory_darwin_arm64`	`c14ef0fd618678e71292d784add1496cdd56bf539e1003f0551788fd502581b9`
`bin/specstory_darwin_x86_64`	`2312adb706707d8367332ed23f37b4f6ea18f5d750bdd597a17902db225346ef`
`bin/specstory_linux_arm64`	`ff8605e2b48ab7edc2fe801de4cd37853074cca10f289d3e5f39942b259806ec`
`bin/specstory_linux_x86_64`	`d59bf0a3cfdfb0bfe5f8bd4afec7495c484e06fcd215c6bae7bb933fb8018b94`
`bin/specstory_windows_arm64.exe`	`77ab5d65a67c48c8ef5401d3c6af16daac8b75b2868ba7e989236a59283d6c79`
`bin/specstory_windows_x86_64.exe`	`d02f0964a26de4e54ff64223aae1170a9160699456af84b7237374747a8d3c67`
In scope	Rationale
`watch` command implementation	Long-lived process; primary suspect for handle churn
DB / storage adapters for Cursor / VS Code chat history	Directly relates to `state.vscdb` access patterns
Tests for FD / connection counts	Prevents regressions
Out of scope (optional)	Rationale
Cloud sync server	Can remain proprietary
Proprietary ML / ranking	Not needed to audit sqlite FD behavior