nikola43 · April 12, 2026 19:49
diff --git a/graphify-setup.py b/graphify-setup.py
 #!/usr/bin/env python3
 """
 graphify-setup — one-command project indexing for AI-assisted development.

 Generates a knowledge graph, Obsidian vault, wiki, and CLAUDE.md with
 embedded architecture snapshot. Works with any codebase.

 Usage:
    python graphify-setup.py init   [PATH] [--no-obsidian] [--no-wiki]
    python graphify-setup.py update [PATH]
    python graphify-setup.py status [PATH]
    python graphify-setup.py refresh-claude [PATH]
    python graphify-setup.py watch  [PATH] [--debounce SEC]
 """

 __version__ = "1.0.0"

 import os
 import sys
 import json
 import time
 import signal
 import logging
 import argparse
 import subprocess
 from datetime import datetime, timezone
 from pathlib import Path

 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    datefmt="%H:%M:%S",
 )
 log = logging.getLogger("graphify-setup")

 # ── configuration ─────────────────────────────────────────────────────

 IGNORE_PATTERNS = """\
 # Dependencies
 node_modules/
 vendor/
 .venv/
 venv/
 env/
 __pycache__/
 .mypy_cache/
 .pytest_cache/
 .ruff_cache/

 # Build output
 dist/
 build/
 target/
 out/
 .next/
 .nuxt/
 .output/
 coverage/
 *.egg-info/

 # Version control & CI
 .git/
 .github/
 .gitlab/

 # Graphify output (avoid self-reference)
 graphify-out/

 # Lock files (no semantic value)
 *.lock
 package-lock.json
 yarn.lock
 pnpm-lock.yaml
 Gemfile.lock
 Pipfile.lock
 poetry.lock
 composer.lock

 # Environment & secrets
 .env
 .env.*
 *.pem
 *.key

 # Logs & temp
 *.log
 *.tmp
 *.swp
 .DS_Store
 Thumbs.db

 # Graphify tooling (not part of the project)
 graphify-setup.py
 CLAUDE.md
 """

 # ── CLAUDE.md template ────────────────────────────────────────────────
 # This is the core token-saving mechanism. Every byte here is loaded into
 # every Claude session, so it must be dense and actionable.

 CLAUDE_MD_TEMPLATE = """\
 # graphify
 - **graphify** (`~/.claude/skills/graphify/SKILL.md`) — any input to knowledge graph. Trigger: `/graphify`
 When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"` before doing anything else.
 This project has a graphify knowledge graph at graphify-out/.

 ## Architecture
 <!-- AUTO-GENERATED by graphify-setup.py — do not edit below this line -->
 {architecture}
 <!-- END AUTO-GENERATED -->

 ## Rules

 Before answering architecture or codebase questions, check graphify-out/ first.
 If graphify-out/wiki/index.md exists, navigate it instead of reading raw files.

 ### Lookup order (stop at first sufficient answer)
 1. This file — architecture snapshot above
 2. `/graphify query "<question>"` — graph traversal for relationships
 3. `/graphify explain "<node>"` or `/graphify path "A" "B"` — targeted lookups
 4. `graphify-out/wiki/<community>.md` — summarized per-module knowledge
 5. Raw file read — LAST RESORT, only the minimum files needed

 ### Raw file reads: FORBIDDEN unless
 - User explicitly asks: "show the code", "read the file", "open X"
 - User wants to EDIT a file (must read target before editing)
 - Graph query returned no matching nodes for the question
 - Debugging a runtime error that requires actual source

 ### Token discipline
 - Max 3 raw file reads per response — if you need more, ask the user
 - Never read a file just to summarize it — the wiki already has summaries
 - Never glob/grep the codebase before checking if the graph has the answer
 - Use `graphify query --budget 1500` to cap traversal output

 ### After modifying code in this session
 Run to keep graph current (AST-only, no LLM cost):
 ```
 python3 -c "from graphify.watch import _rebuild_code; from pathlib import Path; _rebuild_code(Path('.'))"
 ```

 ### Full rebuild commands
 - Code changes only: `/graphify . --update`
 - Added docs/images: `/graphify . --mode deep --update`
 - Full re-index: `python graphify-setup.py init`
 """

 # ── helpers ───────────────────────────────────────────────────────────


 def _timer():
    """Context-manager-style timer. Returns a callable that gives elapsed seconds."""
    start = time.monotonic()
    return lambda: round(time.monotonic() - start, 1)


 def ensure_graphify():
    """Import graphify or install it. Exit on failure."""
    try:
        import graphify  # noqa: F401
        return
    except ImportError:
        pass

    log.info("graphify not found — installing...")
    ret = subprocess.run(
        [sys.executable, "-m", "pip", "install", "graphifyy", "-q"],
        capture_output=True, text=True,
    )
    if ret.returncode != 0:
        # Try with --break-system-packages for managed environments
        ret = subprocess.run(
            [sys.executable, "-m", "pip", "install", "graphifyy", "-q", "--break-system-packages"],
            capture_output=True, text=True,
        )
    try:
        import graphify  # noqa: F401
    except ImportError:
        log.error(f"Failed to install graphify. pip output:\n{ret.stderr}")
        log.error("Try manually: pip install graphifyy")
        sys.exit(1)


 def write_ignore(project_dir: Path):
    """Create .graphifyignore if missing."""
    p = project_dir / ".graphifyignore"
    if p.exists():
        log.info(".graphifyignore exists, skipping")
        return
    p.write_text(IGNORE_PATTERNS)
    log.info("Created .graphifyignore")


 def detect_files(project_dir: Path) -> dict:
    """Run graphify file detection."""
    from graphify.detect import detect

    out_dir = project_dir / "graphify-out"
    out_dir.mkdir(parents=True, exist_ok=True)
    result = detect(project_dir)
    (out_dir / ".graphify_detect.json").write_text(json.dumps(result, indent=2))
    return result


 def run_ast(project_dir: Path, detection: dict) -> dict:
    """AST extraction for code files."""
    from graphify.extract import collect_files, extract

    code_files = []
    for f in detection.get("files", {}).get("code", []):
        p = Path(f)
        if p.is_dir():
            code_files.extend(collect_files(p))
        elif p.exists():
            code_files.append(p)

    out_dir = project_dir / "graphify-out"
    if not code_files:
        empty = {"nodes": [], "edges": [], "input_tokens": 0, "output_tokens": 0}
        (out_dir / ".graphify_ast.json").write_text(json.dumps(empty))
        log.info("No code files — skipping AST")
        return empty

    result = extract(code_files)
    (out_dir / ".graphify_ast.json").write_text(json.dumps(result, indent=2))
    log.info(f"AST: {len(result['nodes'])} nodes, {len(result['edges'])} edges")
    return result


 def run_cache_and_semantic(project_dir: Path, detection: dict):
    """Check cache, merge semantic results."""
    from graphify.cache import check_semantic_cache

    all_files = [f for files in detection["files"].values() for f in files]
    cached_nodes, cached_edges, cached_hyperedges, uncached = check_semantic_cache(all_files)

    out_dir = project_dir / "graphify-out"
    cached = {
        "nodes": cached_nodes or [],
        "edges": cached_edges or [],
        "hyperedges": cached_hyperedges or [],
    }
    (out_dir / ".graphify_cached.json").write_text(json.dumps(cached))

    hit = len(all_files) - len(uncached)
    log.info(f"Cache: {hit} hit, {len(uncached)} uncached")

    # Deduplicate cached nodes
    seen = set()
    deduped = []
    for n in cached["nodes"]:
        if n["id"] not in seen:
            seen.add(n["id"])
            deduped.append(n)

    semantic = {
        "nodes": deduped,
        "edges": cached["edges"],
        "hyperedges": cached["hyperedges"],
        "input_tokens": 0,
        "output_tokens": 0,
    }
    (out_dir / ".graphify_semantic.json").write_text(json.dumps(semantic, indent=2))
    log.info(f"Semantic: {len(deduped)} nodes from cache")
    return uncached


 def merge_extractions(project_dir: Path) -> dict:
    """Merge AST + semantic into final extraction."""
    out_dir = project_dir / "graphify-out"
    ast = json.loads((out_dir / ".graphify_ast.json").read_text())
    sem = json.loads((out_dir / ".graphify_semantic.json").read_text())

    seen = {n["id"] for n in ast["nodes"]}
    merged_nodes = list(ast["nodes"])
    for n in sem["nodes"]:
        if n["id"] not in seen:
            merged_nodes.append(n)
            seen.add(n["id"])

    merged = {
        "nodes": merged_nodes,
        "edges": ast["edges"] + sem["edges"],
        "hyperedges": sem.get("hyperedges", []),
        "input_tokens": sem.get("input_tokens", 0),
        "output_tokens": sem.get("output_tokens", 0),
    }
    (out_dir / ".graphify_extract.json").write_text(json.dumps(merged, indent=2))
    log.info(f"Merged: {len(merged_nodes)} nodes, {len(merged['edges'])} edges")
    return merged


 def build_graph(project_dir: Path, detection: dict):
    """Build graph, cluster, analyze."""
    from graphify.build import build_from_json
    from graphify.cluster import cluster, score_all
    from graphify.analyze import god_nodes, surprising_connections, suggest_questions
    from graphify.report import generate
    from graphify.export import to_json

    out_dir = project_dir / "graphify-out"
    extraction = json.loads((out_dir / ".graphify_extract.json").read_text())

    G = build_from_json(extraction)
    if G.number_of_nodes() == 0:
        log.error("Graph is empty — no nodes extracted. Check that files are supported.")
        sys.exit(1)

    communities = cluster(G)
    cohesion = score_all(G, communities)
    tokens = {"input": extraction.get("input_tokens", 0), "output": extraction.get("output_tokens", 0)}
    gods = god_nodes(G)
    surprises = surprising_connections(G, communities)

    # Auto-label communities from node names
    labels = _label_communities(G, communities)

    questions = suggest_questions(G, communities, labels)
    report = generate(
        G, communities, cohesion, labels, gods, surprises,
        detection, tokens, str(project_dir), suggested_questions=questions,
    )
    (out_dir / "GRAPH_REPORT.md").write_text(report)
    to_json(G, communities, str(out_dir / "graph.json"))

    # Persist analysis + labels (needed by refresh-claude and status)
    analysis = {
        "communities": {str(k): v for k, v in communities.items()},
        "cohesion": {str(k): v for k, v in cohesion.items()},
        "gods": gods,
        "surprises": surprises,
        "questions": questions,
    }
    (out_dir / ".graphify_analysis.json").write_text(json.dumps(analysis, indent=2))
    (out_dir / ".graphify_labels.json").write_text(json.dumps({str(k): v for k, v in labels.items()}))

    log.info(f"Graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges, {len(communities)} communities")
    return G, communities, cohesion, gods, labels


 _GENERIC_NAMES = frozenset({
    "main", "init", "setup", "fixture", "test", "run", "get", "set",
    "open", "close", "read", "write", "print", "check", "load", "save",
    "start", "stop", "create", "delete", "update", "parse", "format",
    "sort", "filter", "map", "reduce", "zip", "sum", "max", "min",
    "abs", "len", "str", "int", "list", "dict", "bytes", "hash",
    "decode", "encode", "match", "split", "join", "pack", "unpack",
    "constructor", "middleware", "wrapper", "handler", "callback",
 })


 def _label_communities(G, communities: dict) -> dict:
    """Auto-label communities with meaningful names.

    Strategy:
    1. Prefer non-generic function/class names from the community
    2. Fall back to source file stem (without extension) for context
    3. Deduplicate labels by appending source file context
    """
    labels = {}
    used_labels = {}  # track label -> cid to detect duplicates

    for cid, node_ids in communities.items():
        cid_int = int(cid) if isinstance(cid, str) else cid

        # Collect function/class names (non-file nodes)
        func_names = []
        file_names = []
        for nid in node_ids:
            if nid not in G.nodes:
                continue
            lbl = G.nodes[nid].get("label", nid)
            src = G.nodes[nid].get("source_file", "")
            if "." in lbl or "/" in lbl:
                # This is a file-level node — extract stem for fallback
                file_names.append(Path(lbl).stem if "." in lbl else Path(src).stem)
            else:
                func_names.append((lbl, src))

        # Pick best label: prefer non-generic, longest (more specific) name
        # Filter out comments, strings, and overly long labels
        best = None
        for name, src in sorted(func_names, key=lambda x: len(x[0]), reverse=True):
            clean = name.rstrip("()")
            # Skip comments, strings, hashes, or labels > 60 chars
            if clean.startswith("#") or clean.startswith("//") or clean.startswith("'") or clean.startswith('"'):
                continue
            if len(clean) > 60:
                continue
            if clean.lower() not in _GENERIC_NAMES:
                best = (name, src)
                break

        if not best and func_names:
            # All names are generic — use the most common source file for context
            name, src = func_names[0]
            stem = Path(src).stem if src else ""
            best = (f"{stem}/{name}" if stem else name, src)

        if not best and file_names:
            best = (file_names[0], "")

        if not best:
            best = (f"Module {cid_int}", "")

        label = best[0]
        src = best[1]

        # Deduplicate: if label already used, add file context
        if label in used_labels:
            # Fix the previous one too
            prev_cid = used_labels[label]
            if prev_cid is not None:
                prev_nodes = communities.get(prev_cid, communities.get(str(prev_cid), []))
                prev_src = ""
                for nid in prev_nodes:
                    if nid in G.nodes:
                        prev_src = G.nodes[nid].get("source_file", "")
                        if prev_src:
                            break
                if prev_src:
                    labels[int(prev_cid)] = f"{labels[int(prev_cid)]} ({Path(prev_src).stem})"
                used_labels[label] = None  # mark as already fixed

            # Add context to current one
            stem = Path(src).stem if src else str(cid_int)
            label = f"{label} ({stem})"

        used_labels.setdefault(label.split(" (")[0], cid_int)
        labels[cid_int] = label

    return labels


 def generate_outputs(project_dir: Path, G, communities, cohesion, labels, *, obsidian=True, wiki=True, html=True):
    """Generate all output artifacts."""
    out_dir = project_dir / "graphify-out"

    if html and G.number_of_nodes() <= 5000:
        from graphify.export import to_html
        to_html(G, communities, str(out_dir / "graph.html"), community_labels=labels or None)
        log.info("graph.html written")
    elif html:
        log.warning(f"{G.number_of_nodes()} nodes — skipping HTML, use Obsidian")

    if obsidian:
        from graphify.export import to_obsidian, to_canvas
        obsidian_dir = str(out_dir / "obsidian")
        n = to_obsidian(G, communities, obsidian_dir, community_labels=labels or None, cohesion=cohesion)
        to_canvas(G, communities, f"{obsidian_dir}/graph.canvas", community_labels=labels or None)
        log.info(f"Obsidian: {n} notes")

    if wiki:
        _generate_wiki(project_dir, G, communities, labels)


 def _generate_wiki(project_dir: Path, G, communities, labels):
    """Generate wiki articles from graph communities."""
    out_dir = project_dir / "graphify-out"
    wiki_dir = out_dir / "wiki"
    wiki_dir.mkdir(parents=True, exist_ok=True)

    # Sort communities by size (largest first)
    sorted_cids = sorted(communities.keys(), key=lambda c: len(communities[c]), reverse=True)

    # Split into significant (3+ nodes) and small (1-2 nodes)
    significant = [(c, communities[c]) for c in sorted_cids if len(communities[c]) >= 3]
    small = [(c, communities[c]) for c in sorted_cids if len(communities[c]) < 3]

    # Index page — only list significant communities + summary of small ones
    lines = [
        "# Project Knowledge Graph Wiki",
        "",
        f"**{G.number_of_nodes()} nodes** | **{G.number_of_edges()} edges** | **{len(communities)} communities**",
        "", "## Key Modules", "",
    ]
    for cid, nodes in significant:
        label = labels.get(cid, f"Community {cid}")
        slug = _slug(label)
        lines.append(f"- [{label}]({slug}.md) ({len(nodes)} nodes)")

    if small:
        lines += [
            "",
            f"## Small Modules ({len(small)} communities with 1-2 nodes)",
            "",
            "These are isolated files or functions with few connections. Articles still exist for each.",
            "",
        ]
        # List first 20 small modules, then summarize the rest
        for cid, nodes in small[:20]:
            label = labels.get(cid, f"Community {cid}")
            slug = _slug(label)
            lines.append(f"- [{label}]({slug}.md) ({len(nodes)} nodes)")
        if len(small) > 20:
            lines.append(f"- ... and {len(small) - 20} more small modules")

    # God nodes from analysis
    analysis_path = out_dir / ".graphify_analysis.json"
    if analysis_path.exists():
        analysis = json.loads(analysis_path.read_text())
        gods = analysis.get("gods", [])
        if gods:
            lines += ["", "## God Nodes (most connected)", ""]
            for g in gods[:10]:
                lines.append(f"- **{g['label']}** ({g['edges']} edges)")

    (wiki_dir / "index.md").write_text("\n".join(lines))

    # Per-community articles
    for cid in sorted(communities.keys()):
        label = labels.get(cid, f"Community {cid}")
        node_ids = communities[cid]
        node_set = set(node_ids)

        art = [f"# {label}", "", "## Nodes", ""]
        for nid in node_ids:
            if nid in G.nodes:
                nd = G.nodes[nid]
                art.append(f"- **{nd.get('label', nid)}** — `{nd.get('source_file', '')}`")

        art += ["", "## Relationships", ""]
        for nid in node_ids:
            if nid not in G.nodes:
                continue
            for neighbor in G.neighbors(nid):
                e = G.edges[nid, neighbor]
                nl = G.nodes[nid].get("label", nid)
                tl = G.nodes[neighbor].get("label", neighbor)
                cross = "" if neighbor in node_set else " *(cross-community)*"
                art.append(f"- {nl} --**{e.get('relation', '?')}**--> {tl} [{e.get('confidence', '')}]{cross}")

        (wiki_dir / f"{_slug(label)}.md").write_text("\n".join(art))

    log.info(f"Wiki: {len(communities) + 1} articles")


 def _slug(label: str) -> str:
    return label.lower().replace(" ", "-").replace("/", "-").replace("(", "").replace(")", "")


 # ── architecture snapshot for CLAUDE.md ───────────────────────────────


 def _build_architecture_snapshot(project_dir: Path) -> str:
    """Build concise architecture description from graph data."""
    out_dir = project_dir / "graphify-out"
    graph_path = out_dir / "graph.json"

    if not graph_path.exists():
        return "No graph built yet. Run: python graphify-setup.py init"

    from networkx.readwrite import json_graph

    data = json.loads(graph_path.read_text())
    G = json_graph.node_link_graph(data, edges="links")

    # Load or recover labels and communities
    labels_path = out_dir / ".graphify_labels.json"
    analysis_path = out_dir / ".graphify_analysis.json"

    labels = {}
    if labels_path.exists():
        labels = {int(k): v for k, v in json.loads(labels_path.read_text()).items()}

    communities = {}
    gods = []
    surprises = []
    if analysis_path.exists():
        analysis = json.loads(analysis_path.read_text())
        communities = analysis.get("communities", {})
        gods = analysis.get("gods", [])
        surprises = analysis.get("surprises", [])

    # Recover from graph.json if analysis was cleaned up
    if not communities:
        comm_map = {}
        for nid, ndata in G.nodes(data=True):
            cid = ndata.get("community", 0)
            comm_map.setdefault(str(cid), []).append(nid)
        communities = comm_map
        if not labels:
            labels = _label_communities(G, {int(k): v for k, v in communities.items()})

    if not gods:
        gods = [
            {"label": G.nodes[n].get("label", n), "edges": d}
            for n, d in sorted(G.degree(), key=lambda x: x[1], reverse=True)[:5]
            if d > 1
        ]

    sig_count = sum(1 for v in communities.values() if len(v) >= 3)
    small_count = len(communities) - sig_count

    lines = [f"{G.number_of_nodes()} nodes, {G.number_of_edges()} edges, {sig_count} key modules (+{small_count} small)", ""]

    # Only show communities with 3+ nodes, capped at 20
    sorted_comms = sorted(communities.items(), key=lambda x: len(x[1]), reverse=True)
    shown = 0
    for cid, nodes in sorted_comms:
        if len(nodes) < 3:
            break
        label = labels.get(int(cid), f"Module {cid}")
        node_labels = [
            G.nodes[n].get("label", "")
            for n in nodes
            if n in G.nodes and "." not in G.nodes[n].get("label", "") and "/" not in G.nodes[n].get("label", "")
            and G.nodes[n].get("label", "")
        ]
        if node_labels:
            lines.append(f"- **{label}**: {', '.join(node_labels[:5])}")
            shown += 1
            if shown >= 20:
                remaining = sig_count - shown
                if remaining > 0:
                    lines.append(f"- ... and {remaining} more (see wiki/index.md)")
                break

    if gods:
        lines.append("")
        lines.append("God nodes: " + ", ".join(f"{g['label']} ({g['edges']})" for g in gods[:5]))

    if surprises:
        lines.append("")
        for s in surprises[:3]:
            if isinstance(s, dict) and s.get("description"):
                lines.append(f"- Surprise: {s['description']}")

    return "\n".join(lines)


 def write_claude_md(project_dir: Path):
    """Generate CLAUDE.md with architecture snapshot."""
    architecture = _build_architecture_snapshot(project_dir)
    content = CLAUDE_MD_TEMPLATE.format(architecture=architecture)
    target = project_dir / "CLAUDE.md"

    # If CLAUDE.md exists and has non-graphify content, preserve it
    if target.exists():
        existing = target.read_text()
        # Check if there's content before the graphify section
        marker = "# graphify"
        if marker in existing:
            before = existing[: existing.index(marker)].rstrip()
            if before:
                content = before + "\n\n" + content
        elif not existing.startswith("# graphify") and not existing.startswith("# Graphify"):
            # Entirely different CLAUDE.md — prepend graphify section
            content = content + "\n" + existing

    target.write_text(content)
    log.info("CLAUDE.md written")


 # ── manifest & cleanup ────────────────────────────────────────────────


 def save_manifest(project_dir: Path, detection: dict):
    """Save manifest for incremental updates."""
    from graphify.detect import save_manifest as _save
    _save(detection["files"])


 def update_cost(project_dir: Path, detection: dict):
    """Update cumulative cost tracker."""
    out_dir = project_dir / "graphify-out"
    extract_path = out_dir / ".graphify_extract.json"
    if not extract_path.exists():
        return

    extract = json.loads(extract_path.read_text())
    inp = extract.get("input_tokens", 0)
    out = extract.get("output_tokens", 0)

    cost_path = out_dir / "cost.json"
    cost = json.loads(cost_path.read_text()) if cost_path.exists() else {
        "runs": [], "total_input_tokens": 0, "total_output_tokens": 0,
    }
    cost["runs"].append({
        "date": datetime.now(timezone.utc).isoformat(),
        "input_tokens": inp, "output_tokens": out,
        "files": detection.get("total_files", 0),
    })
    cost["total_input_tokens"] += inp
    cost["total_output_tokens"] += out
    cost_path.write_text(json.dumps(cost, indent=2))


 def cleanup_temp(project_dir: Path):
    """Remove temp files, keep analysis + labels."""
    out_dir = project_dir / "graphify-out"
    for name in [
        ".graphify_detect.json", ".graphify_extract.json",
        ".graphify_ast.json", ".graphify_semantic.json",
        ".graphify_cached.json", ".graphify_uncached.txt",
        ".needs_update",
    ]:
        p = out_dir / name
        if p.exists():
            p.unlink()


 # ── commands ──────────────────────────────────────────────────────────


 def cmd_init(project_dir: Path, *, obsidian=True, wiki=True):
    """Full deep index with --mode deep --wiki --obsidian by default."""
    elapsed = _timer()
    ensure_graphify()
    write_ignore(project_dir)

    log.info(f"Indexing {project_dir} (deep mode)...")
    detection = detect_files(project_dir)

    total = detection.get("total_files", 0)
    words = detection.get("total_words", 0)
    log.info(f"Corpus: {total} files, ~{words:,} words")
    for ftype, flist in detection.get("files", {}).items():
        if flist:
            log.info(f"  {ftype}: {len(flist)}")

    if total == 0:
        log.error("No supported files found.")
        sys.exit(1)

    # Warn on large corpora
    if total > 200 or words > 2_000_000:
        log.warning(f"Large corpus ({total} files, ~{words:,} words). This may take a while.")

    # AST extraction (code files — deterministic, free)
    run_ast(project_dir, detection)

    # Semantic extraction (check cache, extract uncached via Claude)
    uncached = run_cache_and_semantic(project_dir, detection)
    if uncached:
        log.info(f"{len(uncached)} files need semantic extraction (docs/papers/images).")
        log.info("Run `/graphify . --mode deep` in Claude to extract these with subagents.")
        log.info("Proceeding with AST + cached data for now.")

    merge_extractions(project_dir)

    # Build graph + outputs
    G, communities, cohesion, _, labels = build_graph(project_dir, detection)
    generate_outputs(project_dir, G, communities, cohesion, labels, obsidian=obsidian, wiki=wiki)

    # Finalize
    save_manifest(project_dir, detection)
    update_cost(project_dir, detection)
    write_claude_md(project_dir)
    cleanup_temp(project_dir)

    log.info("")
    log.info(f"Done in {elapsed()}s. Outputs in graphify-out/:")
    log.info("  graph.html      — interactive visualization")
    log.info("  graph.json      — queryable graph data")
    log.info("  GRAPH_REPORT.md — audit report with god nodes")
    if obsidian:
        log.info("  obsidian/       — Obsidian vault + canvas")
    if wiki:
        log.info("  wiki/           — Claude memory layer")
    log.info("  ../CLAUDE.md    — Graphify-first rules for Claude")
    if uncached:
        log.info("")
        log.info(f"NOTE: {len(uncached)} files still need deep semantic extraction.")
        log.info("  Run in Claude: /graphify . --mode deep --update")


 def cmd_update(project_dir: Path):
    """Incremental update — only changed files."""
    ensure_graphify()
    out_dir = project_dir / "graphify-out"

    if not (out_dir / "graph.json").exists():
        log.warning("No existing graph — running full init.")
        cmd_init(project_dir)
        return

    from graphify.detect import detect_incremental

    result = detect_incremental(project_dir)
    new_total = result.get("new_total", 0)
    if new_total == 0:
        log.info("No files changed. Graph is current.")
        return

    elapsed = _timer()
    log.info(f"{new_total} changed file(s), rebuilding...")

    detection = detect_files(project_dir)
    run_ast(project_dir, detection)
    run_cache_and_semantic(project_dir, detection)
    merge_extractions(project_dir)

    G, communities, cohesion, _, labels = build_graph(project_dir, detection)
    generate_outputs(project_dir, G, communities, cohesion, labels)

    save_manifest(project_dir, detection)
    update_cost(project_dir, detection)
    write_claude_md(project_dir)
    cleanup_temp(project_dir)
    log.info(f"Update complete in {elapsed()}s.")


 def cmd_status(project_dir: Path):
    """Show current graph stats."""
    ensure_graphify()
    out_dir = project_dir / "graphify-out"
    graph_path = out_dir / "graph.json"

    if not graph_path.exists():
        print("No graph found. Run: python graphify-setup.py init")
        return

    from networkx.readwrite import json_graph
    data = json.loads(graph_path.read_text())
    G = json_graph.node_link_graph(data, edges="links")

    labels_path = out_dir / ".graphify_labels.json"
    labels = json.loads(labels_path.read_text()) if labels_path.exists() else {}

    analysis_path = out_dir / ".graphify_analysis.json"
    gods = []
    if analysis_path.exists():
        gods = json.loads(analysis_path.read_text()).get("gods", [])

    cost_path = out_dir / "cost.json"
    cost = json.loads(cost_path.read_text()) if cost_path.exists() else {}

    mtime = datetime.fromtimestamp(graph_path.stat().st_mtime).strftime("%Y-%m-%d %H:%M")

    print(f"Graph:        {G.number_of_nodes()} nodes, {G.number_of_edges()} edges, {len(labels)} communities")
    print(f"Last built:   {mtime}")
    print(f"God nodes:    {', '.join(g['label'] for g in gods[:5]) if gods else 'none'}")
    print(f"Total runs:   {len(cost.get('runs', []))}")
    print(f"Total tokens: {cost.get('total_input_tokens', 0):,} in / {cost.get('total_output_tokens', 0):,} out")
    print()

    outputs = {
        "graph.html": "Interactive viz",
        "graph.json": "Raw graph",
        "GRAPH_REPORT.md": "Audit report",
        "obsidian/": "Obsidian vault",
        "wiki/index.md": "Wiki memory",
    }
    for path, desc in outputs.items():
        p = out_dir / path
        exists = p.exists() or (p.is_dir() if path.endswith("/") else False)
        mark = "ok" if exists else "missing"
        print(f"  [{mark:>7}] {path:<20} {desc}")

    claude_md = project_dir / "CLAUDE.md"
    mark = "ok" if claude_md.exists() else "missing"
    print(f"  [{mark:>7}] {'CLAUDE.md':<20} Graphify-first rules")


 def cmd_refresh_claude(project_dir: Path):
    """Regenerate CLAUDE.md from existing graph."""
    ensure_graphify()
    if not (project_dir / "graphify-out" / "graph.json").exists():
        log.error("No graph found. Run: python graphify-setup.py init")
        sys.exit(1)
    write_claude_md(project_dir)


 def cmd_watch(project_dir: Path, debounce_sec: int = 5):
    """Watch for changes and auto-rebuild."""
    try:
        from watchdog.observers import Observer
        from watchdog.events import FileSystemEventHandler
    except ImportError:
        log.error("watchdog required for watch mode. Run: pip install watchdog")
        sys.exit(1)

    ensure_graphify()
    write_ignore(project_dir)

    if not (project_dir / "graphify-out" / "graph.json").exists():
        log.info("No graph — running initial index...")
        cmd_init(project_dir)

    # Load ignore patterns
    ignore_path = project_dir / ".graphifyignore"
    ignore_dirs = set()
    if ignore_path.exists():
        for line in ignore_path.read_text().splitlines():
            line = line.strip()
            if line and not line.startswith("#") and line.endswith("/"):
                ignore_dirs.add(line.rstrip("/"))

    class Handler(FileSystemEventHandler):
        def __init__(self):
            self.last_run = 0.0

        def on_modified(self, event):
            if event.is_directory:
                return
            src = event.src_path
            # Skip ignored directories
            for d in ignore_dirs:
                if f"/{d}/" in src or src.endswith(f"/{d}"):
                    return
            # Debounce
            now = time.time()
            if (now - self.last_run) < debounce_sec:
                return
            self.last_run = now
            log.info(f"Changed: {Path(src).name}")
            try:
                cmd_update(project_dir)
            except Exception as e:
                log.error(f"Update failed: {e}")

    observer = Observer()
    observer.schedule(Handler(), str(project_dir), recursive=True)
    observer.start()
    log.info(f"Watching {project_dir} (debounce={debounce_sec}s, Ctrl+C to stop)")

    def _stop(*_):
        observer.stop()
        log.info("Stopped.")
        sys.exit(0)

    signal.signal(signal.SIGINT, _stop)
    signal.signal(signal.SIGTERM, _stop)

    while observer.is_alive():
        observer.join(timeout=1)


 # ── main ──────────────────────────────────────────────────────────────


 def main():
    parser = argparse.ArgumentParser(
        description="graphify-setup — one-command project indexing for AI development",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""\
 examples:
  python graphify-setup.py init .               # index current directory
  python graphify-setup.py init /path/to/project  # index any project
  python graphify-setup.py init --no-obsidian   # skip Obsidian vault
  python graphify-setup.py update               # incremental rebuild
  python graphify-setup.py status               # show graph stats
  python graphify-setup.py refresh-claude       # regenerate CLAUDE.md only
  python graphify-setup.py watch --debounce 10  # auto-rebuild on changes
 """,
    )
    parser.add_argument("cmd", choices=["init", "update", "status", "refresh-claude", "watch"])
    parser.add_argument("path", nargs="?", default=".", help="Project directory (default: .)")
    parser.add_argument("--no-obsidian", action="store_true", help="Skip Obsidian vault generation")
    parser.add_argument("--no-wiki", action="store_true", help="Skip wiki generation")
    parser.add_argument("--debounce", type=int, default=5, help="Watch debounce in seconds (default: 5)")
    args = parser.parse_args()

    project_dir = Path(args.path).resolve()
    if not project_dir.is_dir():
        log.error(f"Not a directory: {project_dir}")
        sys.exit(1)

    os.chdir(project_dir)

    cmd_map = {
        "init": lambda: cmd_init(project_dir, obsidian=not args.no_obsidian, wiki=not args.no_wiki),
        "update": lambda: cmd_update(project_dir),
        "status": lambda: cmd_status(project_dir),
        "refresh-claude": lambda: cmd_refresh_claude(project_dir),
        "watch": lambda: cmd_watch(project_dir, debounce_sec=args.debounce),
    }
    cmd_map[args.cmd]()


 if __name__ == "__main__":
    main()
No results found