mohsen1 · May 11, 2025 23:33
diff --git a/README.md b/README.md
diff --git a/ask.js b/ask.js
 #!/usr/bin/env node
 // @ts-check

 /**
 * @fileoverview
 * This script asks DeepSeek to help with debugging a Rust project.
 * It serializes the project, gets test failures, and sends the content to DeepSeek.
 * The response is then printed to the console.
 *
 * YOU WILL NEED `yek` to be installed
 * @see https://github.com/bodo-run/yek
 */

 const { spawn, execSync } = require("child_process");
 const https = require("https");
 const fs = require("fs");

 const token = process.env.DEEPSEEK_API_KEY;

 const debugEnabled = process.argv.includes("--debug");
 const testCommand = "cargo test"; // TODO: make this configurable
 const testProgram = testCommand.split(" ")[0];
 const testArgs = testCommand.split(" ").slice(1);

 const systemPrompt = [
  "You are a senior Rust engineer with 10+ years of experience in systems programming.",
  "Your expertise includes:",
  "- Deep knowledge of Rust's ownership system, lifetimes, and concurrency model",
  "- Mastery of cargo, clippy, and modern Rust toolchain features",
  "- Experience debugging complex memory issues and performance bottlenecks",
  "- Familiarity with common Rust crates and idiomatic patterns",

  "When analyzing test failures:",
  "1. First clearly identify the failure type (compiler error, runtime panic, logical error, performance issue)",
  "2. Analyze backtraces and error messages with attention to ownership boundaries",
  "3. Consider common Rust pitfalls:",
  "   - Lifetime mismatches",
  "   - Unsafe code violations",
  "   - Trait bound errors",
  "   - Concurrency race conditions",
  "   - Iterator invalidation",
  "4. Cross-reference with cargo test output and clippy warnings",

  "For proposed fixes:",
  "- Always prioritize type safety and borrow checker rules",
  "- Prefer idiomatic solutions over clever hacks",
  "- Include exact code diffs using markdown format with file names",
  "- Explain the root cause before presenting fixes",
  "- Suggest relevant clippy lints or cargo checks to prevent regressions",

  "Response guidelines:",
  "- Structure analysis using bullet points for clarity",
  "- Use code fences for error snippets and diffs",
  "- Highlight connections between test failures and system architecture",
  "- When uncertain, propose multiple hypothesis with verification strategies",

  "Special capabilities:",
  "- Leverage knowledge of Rust internals (MIR, drop order, etc.)",
  "- Reference similar issues in popular Rust OSS projects",
  "- Suggest property-based testing strategies for edge cases",
 ].join("\n");

 function debug(message) {
  if (debugEnabled) {
    console.log(`[ask.js] ${message}`);
  }
 }

 /**
 * Really dumb token counter. It assumes that each word is a token.
 * It's a bit smarter than that though, it splits camelCase,
 * snake_case, PascalCase, and kebab-case multi-word strings into tokens.
 * It also assumes ()[]{} are token separators.
 * Dumb but works for most cases and is fast.
 * @param {string} str
 * @returns {number}
 */
 function reallyDumbTokenCounter(str) {
  if (typeof str !== "string") {
    console.trace("str is not a string", typeof str, str);
  }
  str = typeof str === "string" ? str : "";
  return (
    str
      // Split on whitespace, newlines, and parentheses, brackets, and braces
      .split(/[\s\n()[\]{}]+/)
      .flatMap((word) =>
        // Split camelCase/PascalCase into separate words
        word
          .split(/(?=[A-Z][a-z])|(?<=[a-z])(?=[A-Z])/)
          // Split snake_case and kebab-case
          .flatMap((part) => part.split(/[_\-]/))
          // Filter out empty strings
          .filter(Boolean)
      ).length
  );
 }

 if (!token) {
  console.error("DEEPSEEK_API_KEY is not set");
  process.exit(1);
 }

 const maxTokens = 128000;
 // DeepSeek maximum context length is 128K tokens. we leave some room for the test failures.
 // 10000 tokens for test failures
 // Alternatively we can use the word count of trimmedTestOutput but that means running test and serializing
 // can not happen in parallel. 10k characters is good enough for most cases.
 const maxSize = maxTokens - 10000 - reallyDumbTokenCounter(systemPrompt);

 // Convert execSync to Promise-based execution
 async function execCommand(program, args = [], options = {}) {
  const outputs = [];
  return new Promise((resolve, reject) => {
    try {
      debug(`Running: ${program} ${args.join(" ")}`);
      const process = spawn(program, args, {
        shell: true,
        stdio: ["pipe", "pipe", "pipe"], // Always pipe to capture output
        ...options,
      });

      process.stdout.on("data", (data) => {
        const str = data.toString();
        outputs.push(str);
        if (options.printToConsole) {
          console.log(str);
        }
      });

      process.stderr.on("data", (data) => {
        const str = data.toString();
        outputs.push(str);
        if (options.printToConsole) {
          console.error(str);
        }
      });

      process.on("error", (error) => {
        if (options.returnError) {
          resolve(outputs.join(""));
        } else {
          reject(error);
        }
      });

      process.on("close", (code) => {
        const output = outputs.join("");
        if (code !== 0) {
          if (options.returnError) {
            resolve(output);
          } else {
            reject(
              new Error(`Command failed with code ${code}\nOutput: ${output}`)
            );
          }
        } else {
          resolve(output);
        }
      });
    } catch (error) {
      if (options.returnError) {
        resolve(outputs.join(""));
      } else {
        reject(error);
      }
    }
  });
 }
 const findTestFiles = async (tests) => {
  const results = new Set();

  for (const test of tests) {
    try {
      // Search in tests directory first
      const testsResult = execSync(
        `find ./tests -type f -name "*.rs" -exec grep -l "${test}" {} \\;`,
        {
          stdio: ["pipe", "pipe", "pipe"],
        }
      )
        .toString()
        .trim();

      if (testsResult) {
        testsResult.split("\n").forEach((file) => results.add(file));
        continue;
      }

      // If not found in tests, search in src
      const srcResult = execSync(
        `find ./src -type f -name "*.rs" -exec grep -l "${test}" {} \\;`,
        {
          stdio: ["pipe", "pipe", "pipe"],
        }
      )
        .toString()
        .trim();

      if (srcResult) {
        srcResult.split("\n").forEach((file) => results.add(file));
      }
    } catch (error) {
      debug(`Error finding test file for ${test}: ${error.message}`);
    }
  }

  return Array.from(results);
 };

 // Truncate and escape content if too large (from bottom up)
 const truncateAndEscape = (str) => {
  if (reallyDumbTokenCounter(str) > maxTokens) {
    str = "... (truncated) ...\n" + str.slice(-maxTokens);
  }
  return JSON.stringify(str);
 };

 // Run serialization and testing in parallel
 debug("Starting serialization and testing in parallel...");
 Promise.all([
  execCommand("yek", [`--max-size`, maxSize.toString(), `--tokens`], {}),
  execCommand(testProgram, testArgs, {
    returnError: true,
    printToConsole: true,
  }),
  execCommand("git", ["diff", "|", "cat"]),
 ])
  .then(async ([serialized, testOutput, gitDiff]) => {
    debug("Serializing and test run complete");

    // Check if any test failed by looking for "test result: FAILED" in the output
    const hasFailures = testOutput.includes("test result: FAILED");
    if (!hasFailures) {
      console.log("All tests passed!");
      process.exit(0);
    }

    // Extract failed test names
    const failedTests = testOutput
      .split("\n")
      .map((line) => line.trim())
      .filter((line) => line.toLowerCase().endsWith("failed"))
      .map((line) => line.split(" ")?.[1]);

    if (failedTests.length === 0) {
      console.log("All tests passed!");
      process.exit(0);
    }

    debug(`Failed tests: ${failedTests.join(", ")}`);

    const testFiles = await findTestFiles(failedTests);

    if (testFiles.length === 0) {
      console.error("Could not find any test files");
      process.exit(1);
    }

    debug(`Test files: ${testFiles.join(", ")}`);

    const testContents = testFiles
      .map((filename) => {
        try {
          return fs.readFileSync(filename, "utf8");
        } catch (error) {
          debug(`Error reading file ${filename}: ${error.message}`);
          return "";
        }
      })
      .filter(Boolean);

    if (testContents.length === 0) {
      console.error("Could not read any test files");
      process.exit(1);
    }

    const timer = setInterval(() => {
      process.stdout.write(".");
    }, 1000);

    // Any lines before "failures:" is not needed. Those are tests that passed.
    const trimmedTestOutput = testOutput.split("failures:").slice(1).join("\n");

    const content = truncateAndEscape(
      [
        `# Repo:`,
        serialized,
        `# Git diff:`,
        gitDiff,
        `# Test contents:`,
        testContents.join("\n\n"),
        `# Test failures:`,
        trimmedTestOutput,
      ].join("\n\n")
    );
    debug(`Content length: ${reallyDumbTokenCounter(content)} tokens`);
    console.log(
      `Asking DeepSeek R1 a ${reallyDumbTokenCounter(
        content
      )} token question. This will take a while...`
    );

    const data = JSON.stringify({
      model: "deepseek-reasoner",
      messages: [
        { role: "system", content: systemPrompt },
        { role: "user", content },
      ],
      stream: false,
    });

    debug(`Request payload size: ${Buffer.byteLength(data)} bytes`);

    const options = {
      hostname: "api.deepseek.com",
      path: "/chat/completions",
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        Authorization: `Bearer ${token}`,
        "Content-Length": Buffer.byteLength(data),
      },
    };

    debug("Sending request to DeepSeek API...");
    const req = https.request(options, (res) => {
      debug(`Response status: ${res.statusCode} ${res.statusMessage}`);
      let responseData = "";

      res.on("data", (chunk) => {
        responseData += chunk;
        debug(`Received chunk of ${chunk.length} bytes`);
      });

      res.on("end", () => {
        clearInterval(timer);
        debug("Response completed");
        try {
          const jsonResponse = JSON.parse(responseData);
          debug(`Parsed response successfully`);
          const content = jsonResponse?.choices?.[0]?.message?.content;
          if (content) {
            console.log(content);
          } else {
            console.error("No content found in the response");
            debug(`Full response: ${JSON.stringify(jsonResponse, null, 2)}`);
          }
        } catch (error) {
          console.error("Failed to parse response:", responseData);
          debug(`Parse error: ${error.message}`);
          process.exit(1);
        }
      });
    });

    req.on("error", (error) => {
      clearInterval(timer);
      console.error("Error:", error);
      debug(`Request error: ${error.message}`);
      process.exit(1);
    });

    debug("Writing request payload...");
    req.write(data);
    debug("Ending request");
    req.end();
  })
  .catch((error) => {
    console.error("Error:", error);
    process.exit(1);
  });
	#!/usr/bin/env node
	// @ts-check

	/**
	* @fileoverview
	* This script asks DeepSeek to help with debugging a Rust project.
	* It serializes the project, gets test failures, and sends the content to DeepSeek.
	* The response is then printed to the console.
	*
	* YOU WILL NEED `yek` to be installed
	* @see https://github.com/bodo-run/yek
	*/

	const { spawn, execSync } = require("child_process");
	const https = require("https");
	const fs = require("fs");

	const token = process.env.DEEPSEEK_API_KEY;

	const debugEnabled = process.argv.includes("--debug");
	const testCommand = "cargo test"; // TODO: make this configurable
	const testProgram = testCommand.split(" ")[0];
	const testArgs = testCommand.split(" ").slice(1);

	const systemPrompt = [
	"You are a senior Rust engineer with 10+ years of experience in systems programming.",
	"Your expertise includes:",
	"- Deep knowledge of Rust's ownership system, lifetimes, and concurrency model",
	"- Mastery of cargo, clippy, and modern Rust toolchain features",
	"- Experience debugging complex memory issues and performance bottlenecks",
	"- Familiarity with common Rust crates and idiomatic patterns",

	"When analyzing test failures:",
	"1. First clearly identify the failure type (compiler error, runtime panic, logical error, performance issue)",
	"2. Analyze backtraces and error messages with attention to ownership boundaries",
	"3. Consider common Rust pitfalls:",
	" - Lifetime mismatches",
	" - Unsafe code violations",
	" - Trait bound errors",
	" - Concurrency race conditions",
	" - Iterator invalidation",
	"4. Cross-reference with cargo test output and clippy warnings",

	"For proposed fixes:",
	"- Always prioritize type safety and borrow checker rules",
	"- Prefer idiomatic solutions over clever hacks",
	"- Include exact code diffs using markdown format with file names",
	"- Explain the root cause before presenting fixes",
	"- Suggest relevant clippy lints or cargo checks to prevent regressions",

	"Response guidelines:",
	"- Structure analysis using bullet points for clarity",
	"- Use code fences for error snippets and diffs",
	"- Highlight connections between test failures and system architecture",
	"- When uncertain, propose multiple hypothesis with verification strategies",

	"Special capabilities:",
	"- Leverage knowledge of Rust internals (MIR, drop order, etc.)",
	"- Reference similar issues in popular Rust OSS projects",
	"- Suggest property-based testing strategies for edge cases",
	].join("\n");

	function debug(message) {
	if (debugEnabled) {
	console.log(`[ask.js] ${message}`);
	}
	}

	/**
	* Really dumb token counter. It assumes that each word is a token.
	* It's a bit smarter than that though, it splits camelCase,
	* snake_case, PascalCase, and kebab-case multi-word strings into tokens.
	* It also assumes ()[]{} are token separators.
	* Dumb but works for most cases and is fast.
	* @param {string} str
	* @returns {number}
	*/
	function reallyDumbTokenCounter(str) {
	if (typeof str !== "string") {
	console.trace("str is not a string", typeof str, str);
	}
	str = typeof str === "string" ? str : "";
	return (
	str
	// Split on whitespace, newlines, and parentheses, brackets, and braces
	.split(/[\s\n()[\]{}]+/)
	.flatMap((word) =>
	// Split camelCase/PascalCase into separate words
	word
	.split(/(?=[A-Z][a-z])\|(?<=[a-z])(?=[A-Z])/)
	// Split snake_case and kebab-case
	.flatMap((part) => part.split(/[_\-]/))
	// Filter out empty strings
	.filter(Boolean)
	).length
	);
	}

	if (!token) {
	console.error("DEEPSEEK_API_KEY is not set");
	process.exit(1);
	}

	const maxTokens = 128000;
	// DeepSeek maximum context length is 128K tokens. we leave some room for the test failures.
	// 10000 tokens for test failures
	// Alternatively we can use the word count of trimmedTestOutput but that means running test and serializing
	// can not happen in parallel. 10k characters is good enough for most cases.
	const maxSize = maxTokens - 10000 - reallyDumbTokenCounter(systemPrompt);

	// Convert execSync to Promise-based execution
	async function execCommand(program, args = [], options = {}) {
	const outputs = [];
	return new Promise((resolve, reject) => {
	try {
	debug(`Running: ${program} ${args.join(" ")}`);
	const process = spawn(program, args, {
	shell: true,
	stdio: ["pipe", "pipe", "pipe"], // Always pipe to capture output
	...options,
	});

	process.stdout.on("data", (data) => {
	const str = data.toString();
	outputs.push(str);
	if (options.printToConsole) {
	console.log(str);
	}
	});

	process.stderr.on("data", (data) => {
	const str = data.toString();
	outputs.push(str);
	if (options.printToConsole) {
	console.error(str);
	}
	});

	process.on("error", (error) => {
	if (options.returnError) {
	resolve(outputs.join(""));
	} else {
	reject(error);
	}
	});

	process.on("close", (code) => {
	const output = outputs.join("");
	if (code !== 0) {
	if (options.returnError) {
	resolve(output);
	} else {
	reject(
	new Error(`Command failed with code ${code}\nOutput: ${output}`)
	);
	}
	} else {
	resolve(output);
	}
	});
	} catch (error) {
	if (options.returnError) {
	resolve(outputs.join(""));
	} else {
	reject(error);
	}
	}
	});
	}
	const findTestFiles = async (tests) => {
	const results = new Set();

	for (const test of tests) {
	try {
	// Search in tests directory first
	const testsResult = execSync(
	`find ./tests -type f -name "*.rs" -exec grep -l "${test}" {} \\;`,
	{
	stdio: ["pipe", "pipe", "pipe"],
	}
	)
	.toString()
	.trim();

	if (testsResult) {
	testsResult.split("\n").forEach((file) => results.add(file));
	continue;
	}

	// If not found in tests, search in src
	const srcResult = execSync(
	`find ./src -type f -name "*.rs" -exec grep -l "${test}" {} \\;`,
	{
	stdio: ["pipe", "pipe", "pipe"],
	}
	)
	.toString()
	.trim();

	if (srcResult) {
	srcResult.split("\n").forEach((file) => results.add(file));
	}
	} catch (error) {
	debug(`Error finding test file for ${test}: ${error.message}`);
	}
	}

	return Array.from(results);
	};

	// Truncate and escape content if too large (from bottom up)
	const truncateAndEscape = (str) => {
	if (reallyDumbTokenCounter(str) > maxTokens) {
	str = "... (truncated) ...\n" + str.slice(-maxTokens);
	}
	return JSON.stringify(str);
	};

	// Run serialization and testing in parallel
	debug("Starting serialization and testing in parallel...");
	Promise.all([
	execCommand("yek", [`--max-size`, maxSize.toString(), `--tokens`], {}),
	execCommand(testProgram, testArgs, {
	returnError: true,
	printToConsole: true,
	}),
	execCommand("git", ["diff", "\|", "cat"]),
	])
	.then(async ([serialized, testOutput, gitDiff]) => {
	debug("Serializing and test run complete");

	// Check if any test failed by looking for "test result: FAILED" in the output
	const hasFailures = testOutput.includes("test result: FAILED");
	if (!hasFailures) {
	console.log("All tests passed!");
	process.exit(0);
	}

	// Extract failed test names
	const failedTests = testOutput
	.split("\n")
	.map((line) => line.trim())
	.filter((line) => line.toLowerCase().endsWith("failed"))
	.map((line) => line.split(" ")?.[1]);

	if (failedTests.length === 0) {
	console.log("All tests passed!");
	process.exit(0);
	}

	debug(`Failed tests: ${failedTests.join(", ")}`);

	const testFiles = await findTestFiles(failedTests);

	if (testFiles.length === 0) {
	console.error("Could not find any test files");
	process.exit(1);
	}

	debug(`Test files: ${testFiles.join(", ")}`);

	const testContents = testFiles
	.map((filename) => {
	try {
	return fs.readFileSync(filename, "utf8");
	} catch (error) {
	debug(`Error reading file ${filename}: ${error.message}`);
	return "";
	}
	})
	.filter(Boolean);

	if (testContents.length === 0) {
	console.error("Could not read any test files");
	process.exit(1);
	}

	const timer = setInterval(() => {
	process.stdout.write(".");
	}, 1000);

	// Any lines before "failures:" is not needed. Those are tests that passed.
	const trimmedTestOutput = testOutput.split("failures:").slice(1).join("\n");

	const content = truncateAndEscape(
	[
	`# Repo:`,
	serialized,
	`# Git diff:`,
	gitDiff,
	`# Test contents:`,
	testContents.join("\n\n"),
	`# Test failures:`,
	trimmedTestOutput,
	].join("\n\n")
	);
	debug(`Content length: ${reallyDumbTokenCounter(content)} tokens`);
	console.log(
	`Asking DeepSeek R1 a ${reallyDumbTokenCounter(
	content
	)} token question. This will take a while...`
	);

	const data = JSON.stringify({
	model: "deepseek-reasoner",
	messages: [
	{ role: "system", content: systemPrompt },
	{ role: "user", content },
	],
	stream: false,
	});

	debug(`Request payload size: ${Buffer.byteLength(data)} bytes`);

	const options = {
	hostname: "api.deepseek.com",
	path: "/chat/completions",
	method: "POST",
	headers: {
	"Content-Type": "application/json",
	Authorization: `Bearer ${token}`,
	"Content-Length": Buffer.byteLength(data),
	},
	};

	debug("Sending request to DeepSeek API...");
	const req = https.request(options, (res) => {
	debug(`Response status: ${res.statusCode} ${res.statusMessage}`);
	let responseData = "";

	res.on("data", (chunk) => {
	responseData += chunk;
	debug(`Received chunk of ${chunk.length} bytes`);
	});

	res.on("end", () => {
	clearInterval(timer);
	debug("Response completed");
	try {
	const jsonResponse = JSON.parse(responseData);
	debug(`Parsed response successfully`);
	const content = jsonResponse?.choices?.[0]?.message?.content;
	if (content) {
	console.log(content);
	} else {
	console.error("No content found in the response");
	debug(`Full response: ${JSON.stringify(jsonResponse, null, 2)}`);
	}
	} catch (error) {
	console.error("Failed to parse response:", responseData);
	debug(`Parse error: ${error.message}`);
	process.exit(1);
	}
	});
	});

	req.on("error", (error) => {
	clearInterval(timer);
	console.error("Error:", error);
	debug(`Request error: ${error.message}`);
	process.exit(1);
	});

	debug("Writing request payload...");
	req.write(data);
	debug("Ending request");
	req.end();
	})
	.catch((error) => {
	console.error("Error:", error);
	process.exit(1);
	});