marclove · August 9, 2025 17:24
diff --git a/llm_context_bundle.rs b/llm_context_bundle.rs
 //! LLM Context Bundler
 //! --------------------
 //! A tiny, **parser-driven** concatenation bundler designed specifically for
 //! *prompting a coding LLM* with a complete, readable view of your codebase.
 //!
 //! Goals (tailored to doc-generation / comprehension):
 //! - Resolve realistic JS/TS/Paths with **oxc_resolver** (tsconfig-aware)
 //! - Parse code with **oxc_parser**; find ESM imports and top-level CJS `require()`
 //! - Inline dependencies **before** each file (DFS), preserving strong file
 //!   boundaries and breadcrumbs for the LLM
 //! - Keep imports in the text (as comments) so relationships remain visible
 //! - Skip heavy third-party code by default (exclude `node_modules`) to save tokens
 //! - Optional JSON inlining (commented), optional nested `require()` discovery
 //! - Optional output size cap with a clear truncation notice at the top
 //!
 //! Non-goals:
 //! - This is *not* an executable bundle. No module wrapper, no transpile.
 //!   It's for reading only, so we favor clarity over runtime correctness.
 //!
 //! Public API:
 //! - `bundle(path) -> Result<String>`: default, opinionated settings for LLM prompts
 //! - `bundle_with_options(path, &BundleOptions) -> Result<String>`: full control
 //!
 //! Suggested `Cargo.toml` (relevant deps):
 //! ```toml
 //! [dependencies]
 //! oxc_resolver  = "11"
 //! oxc_parser    = "0.29"
 //! oxc_ast       = "0.29"
 //! oxc_span      = "0.29"
 //! oxc_allocator = "0.29"
 //! ```

 use std::{
    collections::{HashMap, HashSet},
    fs,
    path::{Path, PathBuf},
 };

 use oxc_allocator::Allocator;
 use oxc_ast::ast as ast;
 use oxc_parser::Parser;
 use oxc_resolver::{ResolveOptions, Resolver, TsconfigOptions, TsconfigReferences, NODEJS_BUILTINS};
 use oxc_span::{SourceType, Span};

 // ------------------------------ Error alias ------------------------------

 /// Lightweight error alias for library ergonomics.
 pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;

 // ------------------------------ Options ----------------------------------

 /// Options tuned for feeding code to an LLM, not for execution.
 #[derive(Debug, Clone)]
 pub struct BundleOptions {
    /// Include dependencies from `node_modules`.
    /// Default: **false** (third-party code usually wastes tokens for doc tasks)
    pub include_node_modules: bool,

    /// Try to find **any** static `require("...")` across the file, not just top-level.
    /// Default: **false** (top-level is usually enough; full scan adds work)
    pub include_all_static_requires: bool,

    /// Replace import/require statements that we inline with a comment instead of removing.
    /// Default: **true** (better breadcrumbs for an LLM)
    pub comment_instead_of_strip: bool,

    /// Maximum number of bytes to emit (not including the preface header).
    /// If reached, we stop inlining further files and mark the output as truncated.
    /// Default: **None** (no cap)
    pub max_bytes: Option<usize>,

    /// Inline `.json` files (as *commented* blocks) when imported.
    /// Default: **true** (JSON config is often helpful context)
    pub include_json: bool,
 }

 impl Default for BundleOptions {
    fn default() -> Self {
        Self {
            include_node_modules: false,
            include_all_static_requires: false,
            comment_instead_of_strip: true,
            max_bytes: None,
            include_json: true,
        }
    }
 }

 // ------------------------------ Public API ------------------------------

 /// Opinionated defaults ideal for LLM doc/comprehension prompts.
 ///
 /// *Signature preserved as requested.*
 pub fn bundle(ts_file_path: &Path) -> Result<String> {
    bundle_with_options(ts_file_path, &BundleOptions::default())
 }

 /// Full-control entry. Prefer this if you need to tweak behavior.
 pub fn bundle_with_options(ts_file_path: &Path, opts: &BundleOptions) -> Result<String> {
    let entry = ts_file_path.canonicalize()?;
    let project_dir = entry
        .parent()
        .ok_or("Entry file must have a parent directory")?
        .canonicalize()?;

    // Wire tsconfig.json into the resolver if present – this enables TS path mapping.
    let tsconfig = find_upwards(&project_dir, "tsconfig.json");

    let mut ropts = ResolveOptions {
        // Prefer TS/ESM first for comprehension, then common variants
        extensions: vec![
            ".ts".into(), ".tsx".into(), ".mts".into(), ".cts".into(),
            ".js".into(), ".jsx".into(), ".mjs".into(), ".cjs".into(),
            ".json".into(), // allow JSON resolution for context
        ],
        // Node-style condition names. Include both ESM and CJS entry points.
        condition_names: vec!["node".into(), "import".into(), "require".into(), "default".into()],
        ..ResolveOptions::default()
    };
    if let Some(config_file) = tsconfig {
        ropts.tsconfig = Some(TsconfigOptions { config_file, references: TsconfigReferences::Auto });
    }

    let resolver = Resolver::new(ropts);

    // Internal bundler state. We keep this separate so the recursion stays clean.
    let mut state = State::new(resolver, opts.clone());

    // Build the bundle body first; prepend a summary header at the very end
    // so we can include accurate counts and truncation notes.
    state.bundle_file(&entry)?;
    let body = state.out;

    // Compose preface summary (human- and LLM-friendly)
    let mut preface = String::new();
    preface.push_str("/*\n");
    preface.push_str("================== BUNDLE PREFACE ==================\n");
    preface.push_str(&format!("Entry: {}\n", entry.display()));
    preface.push_str(&format!("Files inlined: {}\n", state.included_files.len()));
    preface.push_str(&format!("Total bytes (body): {}\n", state.total_bytes));
    if let Some(max) = opts.max_bytes {
        preface.push_str(&format!("Max bytes: {}\n", max));
    }
    if state.truncated {
        preface.push_str("NOTE: Output truncated due to max_bytes. Some dependencies were not inlined.\n");
    }
    if !state.unresolved.is_empty() {
        preface.push_str("Unresolved or skipped (kept as-is):\n");
        for (spec, from) in &state.unresolved {
            preface.push_str(&format!("  - '{}' (from {})\n", spec, from.display()));
        }
    }
    preface.push_str("====================================================\n");
    preface.push_str("*/\n\n");

    Ok(format!("{}{}", preface, body))
 }

 // ------------------------------ Core state --------------------------------

 /// Internal, mutable state carried through the DFS.
 struct State {
    resolver: Resolver,
    opts: BundleOptions,

    // Avoid re-reading or re-emitting the same file.
    visited: HashSet<PathBuf>,

    // Resolve cache: (context_dir, spec) -> full_path
    res_cache: HashMap<(PathBuf, String), PathBuf>,

    // Diagnostics & summary
    included_files: Vec<PathBuf>,
    unresolved: Vec<(String, PathBuf)>,

    // Output accumulation & limits
    out: String,
    total_bytes: usize,
    truncated: bool,
 }

 impl State {
    fn new(resolver: Resolver, opts: BundleOptions) -> Self {
        Self {
            resolver,
            opts,
            visited: HashSet::new(),
            res_cache: HashMap::new(),
            included_files: Vec::new(),
            unresolved: Vec::new(),
            out: String::new(),
            total_bytes: 0,
            truncated: false,
        }
    }

    /// Guard to enforce `max_bytes`. Once we hit the cap, we stop recursing.
    fn can_emit_more(&self) -> bool {
        if let Some(max) = self.opts.max_bytes {
            self.total_bytes < max
        } else {
            true
        }
    }

    /// Append bytes while tracking the cap.
    fn push_out(&mut self, s: &str) {
        if self.truncated { return; }
        if let Some(max) = self.opts.max_bytes {
            // compute how many bytes we *can* still add
            let remaining = max.saturating_sub(self.total_bytes);
            if s.len() > remaining {
                self.out.push_str(&s[..remaining]);
                self.total_bytes += remaining;
                self.truncated = true;
                return;
            }
        }
        self.total_bytes += s.len();
        self.out.push_str(s);
    }

    /// Resolve `spec` from within `ctx_dir`, using a small cache.
    fn resolve(&mut self, ctx_dir: &Path, spec: &str) -> Option<PathBuf> {
        let key = (ctx_dir.to_path_buf(), spec.to_string());
        if let Some(p) = self.res_cache.get(&key) { return Some(p.clone()); }
        match self.resolver.resolve(ctx_dir, spec) {
            Ok(res) => {
                let p = res.full_path().to_path_buf();
                self.res_cache.insert(key, p.clone());
                Some(p)
            }
            Err(_) => None
        }
    }

    /// Depth-first inline of a single file.
    fn bundle_file(&mut self, file_path: &Path) -> Result<()> {
        let path = file_path.canonicalize()?;
        if !self.visited.insert(path.clone()) {
            // already emitted this file
            return Ok(());
        }
        if is_declaration_file(&path) {
            return Ok(());
        }
        if !self.can_emit_more() { self.truncated = true; return Ok(()); }

        let source = fs::read_to_string(&path)?;

        // Parse the file into an AST for reliable import/require collection.
        let allocator = Allocator::default();
        let source_type = SourceType::from_path(&path).unwrap_or(SourceType::ts());
        let parser = Parser::new(&allocator, &source, source_type);
        let parsed = parser.parse();
        let program = parsed.program;

        // 1) Collect ESM imports / re-exports (with source) and top-level CJS requires.
        let mut to_inline: Vec<SpecUse> = Vec::new();

        // ESM: keep exact statement spans so we can comment/strip only those lines.
        for stmt in &program.body {
            match stmt {
                ast::Statement::ImportDeclaration(n) => {
                    if let Some(src) = &n.source { to_inline.push(SpecUse::esm(n.span, &src.value)); }
                }
                ast::Statement::ExportNamedDeclaration(n) => {
                    if let Some(src) = &n.source { to_inline.push(SpecUse::esm(n.span, &src.value)); }
                }
                ast::Statement::ExportAllDeclaration(n) => {
                    if let Some(src) = &n.source { to_inline.push(SpecUse::esm(n.span, &src.value)); }
                }
                _ => {}
            }
        }

        // CJS: top-level require forms -> strip or comment the *entire* statement.
        for stmt in &program.body {
            if let ast::Statement::ExpressionStatement(expr_stmt) = stmt {
                if let Some(spec) = extract_static_require_spec(&expr_stmt.expression) {
                    to_inline.push(SpecUse::cjs_top(expr_stmt.span, spec));
                }
            }
            if let ast::Statement::VariableDeclaration(var_decl) = stmt {
                for decl in &var_decl.declarations {
                    if let Some(init) = &decl.init {
                        if let Some(spec) = extract_static_require_spec(init) {
                            to_inline.push(SpecUse::cjs_top(var_decl.span, spec));
                            break; // Remove once per statement
                        }
                    }
                }
            }
        }

        // Optionally: discover ANY static `require("...")` (not only top-level) via a
        // lightweight heuristic. We do NOT strip these (could be inside expressions),
        // but we inline their targets before this file so the LLM sees the code too.
        if self.opts.include_all_static_requires {
            for spec in find_all_static_requires_heuristic(&source) {
                to_inline.push(SpecUse::cjs_nested(spec));
            }
        }

        // Deduplicate by spec while keeping earliest statement span for stripping.
        to_inline = dedup_spec_uses(to_inline);

        // 2) Resolve & inline dependencies first (DFS). Track which statements we actually inlined
        //    so we can replace those specific import/require statements with a small comment.
        let mut replacements: HashMap<Span, String> = HashMap::new();

        for item in &to_inline {
            let spec = item.spec();
            // Skip node builtins and URLs/data URIs – they are not project files.
            if is_node_builtin(spec) || looks_like_url(spec) { continue; }

            let ctx_dir = path.parent().ok_or("File without parent directory")?.to_path_buf();
            let Some(resolved) = self.resolve(&ctx_dir, spec) else {
                self.unresolved.push((spec.to_string(), path.clone()));
                continue;
            };

            // Skip declaration files; optionally skip node_modules to save tokens.
            if is_declaration_file(&resolved) { continue; }
            if !self.opts.include_node_modules && in_node_modules(&resolved) { continue; }

            if is_json_file(&resolved) {
                if self.opts.include_json {
                    // Inline JSON as a commented block; ideal for documentation context.
                    self.emit_json_block(&resolved)?;
                }
                // If we emitted JSON, treat as "inlined" for ESM/CJS statement replacement.
                if let Some(sp) = item.strip_span() { replacements.insert(sp, replaced_comment(spec, &resolved)); }
                continue;
            }

            if is_code_file(&resolved) {
                self.bundle_file(&resolved)?; // DFS inline
                if let Some(sp) = item.strip_span() {
                    replacements.insert(sp, replaced_comment(spec, &resolved));
                }
            } else {
                // Other asset types – keep import as-is, but note unresolved/unsupported.
                self.unresolved.push((spec.to_string(), path.clone()));
            }

            if !self.can_emit_more() { self.truncated = true; break; }
        }

        // 3) Write *this* file with optional statement replacement (comment or strip).
        // We reconstruct by streaming the original source and splicing where needed.
        self.push_out("/* ===== FILE START ===== */\n");
        self.push_out(&format!("/* PATH: {} */\n", path.display()));

        let mut cursor = 0usize;
        for stmt in &program.body {
            let sp = stmt.span();
            if let Some(repl) = replacements.get(&sp) {
                // Emit text *before* the statement, then our breadcrumb comment.
                let start = sp.start as usize;
                let end = sp.end as usize;
                if start > cursor { self.push_out(&source[cursor..start]); }
                if self.opts.comment_instead_of_strip { self.push_out(repl); }
                // else: drop the statement entirely
                cursor = end;
            }
        }
        if cursor < source.len() { self.push_out(&source[cursor..]); }
        self.push_out("\n/* ===== FILE END ===== */\n\n");

        // Record for preface summary.
        self.included_files.push(path);
        Ok(())
    }

    /// Emit a commented JSON block. This keeps the raw JSON visible to the LLM without
    /// affecting any parsing logic in downstream tools.
    fn emit_json_block(&mut self, json_path: &Path) -> Result<()> {
        if !self.can_emit_more() { self.truncated = true; return Ok(()); }
        let text = fs::read_to_string(json_path)?;
        self.push_out("/* ===== FILE START ===== */\n");
        self.push_out(&format!("/* PATH: {} (JSON) */\n", json_path.display()));
        self.push_out("/* BEGIN JSON */\n");
        self.push_out(&text);
        self.push_out("\n/* END JSON */\n");
        self.push_out("/* ===== FILE END ===== */\n\n");
        Ok(())
    }
 }

 // ------------------------------ Helpers -----------------------------------

 /// Represents a dependency use-site we discovered.
 #[derive(Clone, Debug)]
 enum SpecUse {
    /// ESM `import` or `export ... from` with the statement span.
    Esm { span: Span, spec: String },
    /// CJS top-level `require()` with the whole statement span.
    CjsTop { span: Span, spec: String },
    /// CJS nested static `require()` (no safe statement span to strip).
    CjsNested { spec: String },
 }

 impl SpecUse {
    fn esm(span: Span, spec: &str) -> Self { Self::Esm { span, spec: spec.to_string() } }
    fn cjs_top(span: Span, spec: &str) -> Self { Self::CjsTop { span, spec: spec.to_string() } }
    fn cjs_nested(spec: String) -> Self { Self::CjsNested { spec } }

    fn spec(&self) -> &str {
        match self { Self::Esm { spec, .. } | Self::CjsTop { spec, .. } | Self::CjsNested { spec } => spec }
    }
    fn strip_span(&self) -> Option<Span> {
        match self { Self::Esm { span, .. } | Self::CjsTop { span, .. } => Some(*span), _ => None }
    }
 }

 /// Deduplicate uses by spec, keeping the earliest strip-able span if any.
 fn dedup_spec_uses(mut items: Vec<SpecUse>) -> Vec<SpecUse> {
    let mut seen: HashMap<String, SpecUse> = HashMap::new();
    for it in items.drain(..) {
        let key = it.spec().to_string();
        seen.entry(key).and_modify(|existing| {
            // Prefer a variant that has a span (so we can strip/comment), and keep the earliest span.
            match (existing.strip_span(), it.strip_span()) {
                (None, Some(_)) => { *existing = it.clone(); }
                (Some(old), Some(new)) if new.start < old.start => { *existing = it.clone(); }
                _ => {}
            }
        }).or_insert(it);
    }
    seen.into_values().collect()
 }

 /// Extract `require("...")` if the expression is a static single-argument call,
 /// possibly wrapped in a member expression (e.g. `require("x").foo`).
 fn extract_static_require_spec<'a>(expr: &ast::Expression<'a>) -> Option<&'a str> {
    use ast::{Argument, Expression, MemberExpression};
    match expr {
        Expression::CallExpression(call) => {
            if let Expression::Identifier(ident) = &call.callee {
                if ident.name.as_str() == "require" && call.arguments.len() == 1 {
                    if let Argument::Expression(Expression::StringLiteral(lit)) = &call.arguments[0] {
                        return Some(lit.value.as_str());
                    }
                }
            }
            None
        }
        Expression::MemberExpression(member) => {
            match member {
                MemberExpression::StaticMemberExpression(m) => extract_static_require_spec(&m.object),
                MemberExpression::ComputedMemberExpression(m) => extract_static_require_spec(&m.object),
                MemberExpression::PrivateFieldExpression(_) => None,
            }
        }
        _ => None,
    }
 }

 /// Heuristic (regex) for discovering *any* `require("...")` in the file when
 /// `include_all_static_requires` is enabled. We don't strip these; we only inline
 /// their targets so the LLM has the code for context.
 fn find_all_static_requires_heuristic(source: &str) -> Vec<String> {
    // NOTE: This is intentionally simple; it is good enough for doc prompts and avoids
    // writing a full AST walker. It ignores comments/strings edge cases.
    let re = regex::Regex::new(r#"require\s*\(\s*['\"]([^'\"]+)['\"]\s*\)"#).unwrap();
    let mut v = Vec::new();
    let mut seen = HashSet::new();
    for cap in re.captures_iter(source) {
        if let Some(m) = cap.get(1) { if seen.insert(m.as_str().to_string()) { v.push(m.as_str().to_string()); } }
    }
    v
 }

 fn is_code_file(p: &Path) -> bool {
    matches!(p.extension().and_then(|e| e.to_str()).unwrap_or(""),
        "js" | "mjs" | "cjs" | "jsx" |
        "ts" | "mts" | "cts" | "tsx")
 }

 fn is_json_file(p: &Path) -> bool {
    p.extension().and_then(|e| e.to_str()) == Some("json")
 }

 fn is_declaration_file(p: &Path) -> bool {
    p.extension().and_then(|e| e.to_str()).map(|ext| ext == "d.ts" || ext == "d.mts" || ext == "d.cts").unwrap_or(false)
 }

 fn is_node_builtin(spec: &str) -> bool {
    let s = spec.strip_prefix("node:").unwrap_or(spec);
    NODEJS_BUILTINS.iter().any(|&b| b == s)
 }

 fn looks_like_url(spec: &str) -> bool {
    let lower = spec.to_ascii_lowercase();
    lower.starts_with("http://") || lower.starts_with("https://") || lower.starts_with("data:")
 }

 fn in_node_modules(p: &Path) -> bool {
    p.components().any(|c| c.as_os_str() == "node_modules")
 }

 fn find_upwards(start: &Path, needle: &str) -> Option<PathBuf> {
    let mut dir = Some(start.to_path_buf());
    while let Some(d) = dir {
        let candidate = d.join(needle);
        if candidate.is_file() { return Some(candidate); }
        dir = d.parent().map(|p| p.to_path_buf());
    }
    None
 }

 fn replaced_comment(spec: &str, resolved: &Path) -> String {
    format!("/* inlined: '{}' => {} */\n", spec, resolved.display())
 }
	//! LLM Context Bundler
	//! --------------------
	//! A tiny, parser-driven concatenation bundler designed specifically for
	//! prompting a coding LLM with a complete, readable view of your codebase.
	//!
	//! Goals (tailored to doc-generation / comprehension):
	//! - Resolve realistic JS/TS/Paths with oxc_resolver (tsconfig-aware)
	//! - Parse code with oxc_parser; find ESM imports and top-level CJS `require()`
	//! - Inline dependencies before each file (DFS), preserving strong file
	//! boundaries and breadcrumbs for the LLM
	//! - Keep imports in the text (as comments) so relationships remain visible
	//! - Skip heavy third-party code by default (exclude `node_modules`) to save tokens
	//! - Optional JSON inlining (commented), optional nested `require()` discovery
	//! - Optional output size cap with a clear truncation notice at the top
	//!
	//! Non-goals:
	//! - This is not an executable bundle. No module wrapper, no transpile.
	//! It's for reading only, so we favor clarity over runtime correctness.
	//!
	//! Public API:
	//! - `bundle(path) -> Result<String>`: default, opinionated settings for LLM prompts
	//! - `bundle_with_options(path, &BundleOptions) -> Result<String>`: full control
	//!
	//! Suggested `Cargo.toml` (relevant deps):
	//! ```toml
	//! [dependencies]
	//! oxc_resolver = "11"
	//! oxc_parser = "0.29"
	//! oxc_ast = "0.29"
	//! oxc_span = "0.29"
	//! oxc_allocator = "0.29"
	//! ```

	use std::{
	collections::{HashMap, HashSet},
	fs,
	path::{Path, PathBuf},
	};

	use oxc_allocator::Allocator;
	use oxc_ast::ast as ast;
	use oxc_parser::Parser;
	use oxc_resolver::{ResolveOptions, Resolver, TsconfigOptions, TsconfigReferences, NODEJS_BUILTINS};
	use oxc_span::{SourceType, Span};

	// ------------------------------ Error alias ------------------------------

	/// Lightweight error alias for library ergonomics.
	pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;

	// ------------------------------ Options ----------------------------------

	/// Options tuned for feeding code to an LLM, not for execution.
	#[derive(Debug, Clone)]
	pub struct BundleOptions {
	/// Include dependencies from `node_modules`.
	/// Default: false (third-party code usually wastes tokens for doc tasks)
	pub include_node_modules: bool,

	/// Try to find any static `require("...")` across the file, not just top-level.
	/// Default: false (top-level is usually enough; full scan adds work)
	pub include_all_static_requires: bool,

	/// Replace import/require statements that we inline with a comment instead of removing.
	/// Default: true (better breadcrumbs for an LLM)
	pub comment_instead_of_strip: bool,

	/// Maximum number of bytes to emit (not including the preface header).
	/// If reached, we stop inlining further files and mark the output as truncated.
	/// Default: None (no cap)
	pub max_bytes: Option<usize>,

	/// Inline `.json` files (as commented blocks) when imported.
	/// Default: true (JSON config is often helpful context)
	pub include_json: bool,
	}

	impl Default for BundleOptions {
	fn default() -> Self {
	Self {
	include_node_modules: false,
	include_all_static_requires: false,
	comment_instead_of_strip: true,
	max_bytes: None,
	include_json: true,
	}
	}
	}

	// ------------------------------ Public API ------------------------------

	/// Opinionated defaults ideal for LLM doc/comprehension prompts.
	///
	/// Signature preserved as requested.
	pub fn bundle(ts_file_path: &Path) -> Result<String> {
	bundle_with_options(ts_file_path, &BundleOptions::default())
	}

	/// Full-control entry. Prefer this if you need to tweak behavior.
	pub fn bundle_with_options(ts_file_path: &Path, opts: &BundleOptions) -> Result<String> {
	let entry = ts_file_path.canonicalize()?;
	let project_dir = entry
	.parent()
	.ok_or("Entry file must have a parent directory")?
	.canonicalize()?;

	// Wire tsconfig.json into the resolver if present – this enables TS path mapping.
	let tsconfig = find_upwards(&project_dir, "tsconfig.json");

	let mut ropts = ResolveOptions {
	// Prefer TS/ESM first for comprehension, then common variants
	extensions: vec![
	".ts".into(), ".tsx".into(), ".mts".into(), ".cts".into(),
	".js".into(), ".jsx".into(), ".mjs".into(), ".cjs".into(),
	".json".into(), // allow JSON resolution for context
	],
	// Node-style condition names. Include both ESM and CJS entry points.
	condition_names: vec!["node".into(), "import".into(), "require".into(), "default".into()],
	..ResolveOptions::default()
	};
	if let Some(config_file) = tsconfig {
	ropts.tsconfig = Some(TsconfigOptions { config_file, references: TsconfigReferences::Auto });
	}

	let resolver = Resolver::new(ropts);

	// Internal bundler state. We keep this separate so the recursion stays clean.
	let mut state = State::new(resolver, opts.clone());

	// Build the bundle body first; prepend a summary header at the very end
	// so we can include accurate counts and truncation notes.
	state.bundle_file(&entry)?;
	let body = state.out;

	// Compose preface summary (human- and LLM-friendly)
	let mut preface = String::new();
	preface.push_str("/*\n");
	preface.push_str("================== BUNDLE PREFACE ==================\n");
	preface.push_str(&format!("Entry: {}\n", entry.display()));
	preface.push_str(&format!("Files inlined: {}\n", state.included_files.len()));
	preface.push_str(&format!("Total bytes (body): {}\n", state.total_bytes));
	if let Some(max) = opts.max_bytes {
	preface.push_str(&format!("Max bytes: {}\n", max));
	}
	if state.truncated {
	preface.push_str("NOTE: Output truncated due to max_bytes. Some dependencies were not inlined.\n");
	}
	if !state.unresolved.is_empty() {
	preface.push_str("Unresolved or skipped (kept as-is):\n");
	for (spec, from) in &state.unresolved {
	preface.push_str(&format!(" - '{}' (from {})\n", spec, from.display()));
	}
	}
	preface.push_str("====================================================\n");
	preface.push_str("*/\n\n");

	Ok(format!("{}{}", preface, body))
	}

	// ------------------------------ Core state --------------------------------

	/// Internal, mutable state carried through the DFS.
	struct State {
	resolver: Resolver,
	opts: BundleOptions,

	// Avoid re-reading or re-emitting the same file.
	visited: HashSet<PathBuf>,

	// Resolve cache: (context_dir, spec) -> full_path
	res_cache: HashMap<(PathBuf, String), PathBuf>,

	// Diagnostics & summary
	included_files: Vec<PathBuf>,
	unresolved: Vec<(String, PathBuf)>,

	// Output accumulation & limits
	out: String,
	total_bytes: usize,
	truncated: bool,
	}

	impl State {
	fn new(resolver: Resolver, opts: BundleOptions) -> Self {
	Self {
	resolver,
	opts,
	visited: HashSet::new(),
	res_cache: HashMap::new(),
	included_files: Vec::new(),
	unresolved: Vec::new(),
	out: String::new(),
	total_bytes: 0,
	truncated: false,
	}
	}

	/// Guard to enforce `max_bytes`. Once we hit the cap, we stop recursing.
	fn can_emit_more(&self) -> bool {
	if let Some(max) = self.opts.max_bytes {
	self.total_bytes < max
	} else {
	true
	}
	}

	/// Append bytes while tracking the cap.
	fn push_out(&mut self, s: &str) {
	if self.truncated { return; }
	if let Some(max) = self.opts.max_bytes {
	// compute how many bytes we can still add
	let remaining = max.saturating_sub(self.total_bytes);
	if s.len() > remaining {
	self.out.push_str(&s[..remaining]);
	self.total_bytes += remaining;
	self.truncated = true;
	return;
	}
	}
	self.total_bytes += s.len();
	self.out.push_str(s);
	}

	/// Resolve `spec` from within `ctx_dir`, using a small cache.
	fn resolve(&mut self, ctx_dir: &Path, spec: &str) -> Option<PathBuf> {
	let key = (ctx_dir.to_path_buf(), spec.to_string());
	if let Some(p) = self.res_cache.get(&key) { return Some(p.clone()); }
	match self.resolver.resolve(ctx_dir, spec) {
	Ok(res) => {
	let p = res.full_path().to_path_buf();
	self.res_cache.insert(key, p.clone());
	Some(p)
	}
	Err(_) => None
	}
	}

	/// Depth-first inline of a single file.
	fn bundle_file(&mut self, file_path: &Path) -> Result<()> {
	let path = file_path.canonicalize()?;
	if !self.visited.insert(path.clone()) {
	// already emitted this file
	return Ok(());
	}
	if is_declaration_file(&path) {
	return Ok(());
	}
	if !self.can_emit_more() { self.truncated = true; return Ok(()); }

	let source = fs::read_to_string(&path)?;

	// Parse the file into an AST for reliable import/require collection.
	let allocator = Allocator::default();
	let source_type = SourceType::from_path(&path).unwrap_or(SourceType::ts());
	let parser = Parser::new(&allocator, &source, source_type);
	let parsed = parser.parse();
	let program = parsed.program;

	// 1) Collect ESM imports / re-exports (with source) and top-level CJS requires.
	let mut to_inline: Vec<SpecUse> = Vec::new();

	// ESM: keep exact statement spans so we can comment/strip only those lines.
	for stmt in &program.body {
	match stmt {
	ast::Statement::ImportDeclaration(n) => {
	if let Some(src) = &n.source { to_inline.push(SpecUse::esm(n.span, &src.value)); }
	}
	ast::Statement::ExportNamedDeclaration(n) => {
	if let Some(src) = &n.source { to_inline.push(SpecUse::esm(n.span, &src.value)); }
	}
	ast::Statement::ExportAllDeclaration(n) => {
	if let Some(src) = &n.source { to_inline.push(SpecUse::esm(n.span, &src.value)); }
	}
	_ => {}
	}
	}

	// CJS: top-level require forms -> strip or comment the entire statement.
	for stmt in &program.body {
	if let ast::Statement::ExpressionStatement(expr_stmt) = stmt {
	if let Some(spec) = extract_static_require_spec(&expr_stmt.expression) {
	to_inline.push(SpecUse::cjs_top(expr_stmt.span, spec));
	}
	}
	if let ast::Statement::VariableDeclaration(var_decl) = stmt {
	for decl in &var_decl.declarations {
	if let Some(init) = &decl.init {
	if let Some(spec) = extract_static_require_spec(init) {
	to_inline.push(SpecUse::cjs_top(var_decl.span, spec));
	break; // Remove once per statement
	}
	}
	}
	}
	}

	// Optionally: discover ANY static `require("...")` (not only top-level) via a
	// lightweight heuristic. We do NOT strip these (could be inside expressions),
	// but we inline their targets before this file so the LLM sees the code too.
	if self.opts.include_all_static_requires {
	for spec in find_all_static_requires_heuristic(&source) {
	to_inline.push(SpecUse::cjs_nested(spec));
	}
	}

	// Deduplicate by spec while keeping earliest statement span for stripping.
	to_inline = dedup_spec_uses(to_inline);

	// 2) Resolve & inline dependencies first (DFS). Track which statements we actually inlined
	// so we can replace those specific import/require statements with a small comment.
	let mut replacements: HashMap<Span, String> = HashMap::new();

	for item in &to_inline {
	let spec = item.spec();
	// Skip node builtins and URLs/data URIs – they are not project files.
	if is_node_builtin(spec) \|\| looks_like_url(spec) { continue; }

	let ctx_dir = path.parent().ok_or("File without parent directory")?.to_path_buf();
	let Some(resolved) = self.resolve(&ctx_dir, spec) else {
	self.unresolved.push((spec.to_string(), path.clone()));
	continue;
	};

	// Skip declaration files; optionally skip node_modules to save tokens.
	if is_declaration_file(&resolved) { continue; }
	if !self.opts.include_node_modules && in_node_modules(&resolved) { continue; }

	if is_json_file(&resolved) {
	if self.opts.include_json {
	// Inline JSON as a commented block; ideal for documentation context.
	self.emit_json_block(&resolved)?;
	}
	// If we emitted JSON, treat as "inlined" for ESM/CJS statement replacement.
	if let Some(sp) = item.strip_span() { replacements.insert(sp, replaced_comment(spec, &resolved)); }
	continue;
	}

	if is_code_file(&resolved) {
	self.bundle_file(&resolved)?; // DFS inline
	if let Some(sp) = item.strip_span() {
	replacements.insert(sp, replaced_comment(spec, &resolved));
	}
	} else {
	// Other asset types – keep import as-is, but note unresolved/unsupported.
	self.unresolved.push((spec.to_string(), path.clone()));
	}

	if !self.can_emit_more() { self.truncated = true; break; }
	}

	// 3) Write this file with optional statement replacement (comment or strip).
	// We reconstruct by streaming the original source and splicing where needed.
	self.push_out("/* ===== FILE START ===== */\n");
	self.push_out(&format!("/* PATH: {} */\n", path.display()));

	let mut cursor = 0usize;
	for stmt in &program.body {
	let sp = stmt.span();
	if let Some(repl) = replacements.get(&sp) {
	// Emit text before the statement, then our breadcrumb comment.
	let start = sp.start as usize;
	let end = sp.end as usize;
	if start > cursor { self.push_out(&source[cursor..start]); }
	if self.opts.comment_instead_of_strip { self.push_out(repl); }
	// else: drop the statement entirely
	cursor = end;
	}
	}
	if cursor < source.len() { self.push_out(&source[cursor..]); }
	self.push_out("\n/* ===== FILE END ===== */\n\n");

	// Record for preface summary.
	self.included_files.push(path);
	Ok(())
	}

	/// Emit a commented JSON block. This keeps the raw JSON visible to the LLM without
	/// affecting any parsing logic in downstream tools.
	fn emit_json_block(&mut self, json_path: &Path) -> Result<()> {
	if !self.can_emit_more() { self.truncated = true; return Ok(()); }
	let text = fs::read_to_string(json_path)?;
	self.push_out("/* ===== FILE START ===== */\n");
	self.push_out(&format!("/* PATH: {} (JSON) */\n", json_path.display()));
	self.push_out("/* BEGIN JSON */\n");
	self.push_out(&text);
	self.push_out("\n/* END JSON */\n");
	self.push_out("/* ===== FILE END ===== */\n\n");
	Ok(())
	}
	}

	// ------------------------------ Helpers -----------------------------------

	/// Represents a dependency use-site we discovered.
	#[derive(Clone, Debug)]
	enum SpecUse {
	/// ESM `import` or `export ... from` with the statement span.
	Esm { span: Span, spec: String },
	/// CJS top-level `require()` with the whole statement span.
	CjsTop { span: Span, spec: String },
	/// CJS nested static `require()` (no safe statement span to strip).
	CjsNested { spec: String },
	}

	impl SpecUse {
	fn esm(span: Span, spec: &str) -> Self { Self::Esm { span, spec: spec.to_string() } }
	fn cjs_top(span: Span, spec: &str) -> Self { Self::CjsTop { span, spec: spec.to_string() } }
	fn cjs_nested(spec: String) -> Self { Self::CjsNested { spec } }

	fn spec(&self) -> &str {
	match self { Self::Esm { spec, .. } \| Self::CjsTop { spec, .. } \| Self::CjsNested { spec } => spec }
	}
	fn strip_span(&self) -> Option<Span> {
	match self { Self::Esm { span, .. } \| Self::CjsTop { span, .. } => Some(*span), _ => None }
	}
	}

	/// Deduplicate uses by spec, keeping the earliest strip-able span if any.
	fn dedup_spec_uses(mut items: Vec<SpecUse>) -> Vec<SpecUse> {
	let mut seen: HashMap<String, SpecUse> = HashMap::new();
	for it in items.drain(..) {
	let key = it.spec().to_string();
	seen.entry(key).and_modify(\|existing\| {
	// Prefer a variant that has a span (so we can strip/comment), and keep the earliest span.
	match (existing.strip_span(), it.strip_span()) {
	(None, Some(_)) => { *existing = it.clone(); }
	(Some(old), Some(new)) if new.start < old.start => { *existing = it.clone(); }
	_ => {}
	}
	}).or_insert(it);
	}
	seen.into_values().collect()
	}

	/// Extract `require("...")` if the expression is a static single-argument call,
	/// possibly wrapped in a member expression (e.g. `require("x").foo`).
	fn extract_static_require_spec<'a>(expr: &ast::Expression<'a>) -> Option<&'a str> {
	use ast::{Argument, Expression, MemberExpression};
	match expr {
	Expression::CallExpression(call) => {
	if let Expression::Identifier(ident) = &call.callee {
	if ident.name.as_str() == "require" && call.arguments.len() == 1 {
	if let Argument::Expression(Expression::StringLiteral(lit)) = &call.arguments[0] {
	return Some(lit.value.as_str());
	}
	}
	}
	None
	}
	Expression::MemberExpression(member) => {
	match member {
	MemberExpression::StaticMemberExpression(m) => extract_static_require_spec(&m.object),
	MemberExpression::ComputedMemberExpression(m) => extract_static_require_spec(&m.object),
	MemberExpression::PrivateFieldExpression(_) => None,
	}
	}
	_ => None,
	}
	}

	/// Heuristic (regex) for discovering any `require("...")` in the file when
	/// `include_all_static_requires` is enabled. We don't strip these; we only inline
	/// their targets so the LLM has the code for context.
	fn find_all_static_requires_heuristic(source: &str) -> Vec<String> {
	// NOTE: This is intentionally simple; it is good enough for doc prompts and avoids
	// writing a full AST walker. It ignores comments/strings edge cases.
	let re = regex::Regex::new(r#"require\s\(\s['\"]([^'\"]+)['\"]\s*\)"#).unwrap();
	let mut v = Vec::new();
	let mut seen = HashSet::new();
	for cap in re.captures_iter(source) {
	if let Some(m) = cap.get(1) { if seen.insert(m.as_str().to_string()) { v.push(m.as_str().to_string()); } }
	}
	v
	}

	fn is_code_file(p: &Path) -> bool {
	matches!(p.extension().and_then(\|e\| e.to_str()).unwrap_or(""),
	"js" \| "mjs" \| "cjs" \| "jsx" \|
	"ts" \| "mts" \| "cts" \| "tsx")
	}

	fn is_json_file(p: &Path) -> bool {
	p.extension().and_then(\|e\| e.to_str()) == Some("json")
	}

	fn is_declaration_file(p: &Path) -> bool {
	p.extension().and_then(\|e\| e.to_str()).map(\|ext\| ext == "d.ts" \|\| ext == "d.mts" \|\| ext == "d.cts").unwrap_or(false)
	}

	fn is_node_builtin(spec: &str) -> bool {
	let s = spec.strip_prefix("node:").unwrap_or(spec);
	NODEJS_BUILTINS.iter().any(\|&b\| b == s)
	}

	fn looks_like_url(spec: &str) -> bool {
	let lower = spec.to_ascii_lowercase();
	lower.starts_with("http://") \|\| lower.starts_with("https://") \|\| lower.starts_with("data:")
	}

	fn in_node_modules(p: &Path) -> bool {
	p.components().any(\|c\| c.as_os_str() == "node_modules")
	}

	fn find_upwards(start: &Path, needle: &str) -> Option<PathBuf> {
	let mut dir = Some(start.to_path_buf());
	while let Some(d) = dir {
	let candidate = d.join(needle);
	if candidate.is_file() { return Some(candidate); }
	dir = d.parent().map(\|p\| p.to_path_buf());
	}
	None
	}

	fn replaced_comment(spec: &str, resolved: &Path) -> String {
	format!("/* inlined: '{}' => {} */\n", spec, resolved.display())
	}