|
#!/usr/bin/env node |
|
// @ts-check |
|
|
|
/** |
|
* @fileoverview |
|
* This script asks DeepSeek to help with debugging a Rust project. |
|
* It serializes the project, gets test failures, and sends the content to DeepSeek. |
|
* The response is then printed to the console. |
|
* |
|
* YOU WILL NEED `yek` to be installed |
|
* @see https://github.com/bodo-run/yek |
|
*/ |
|
|
|
const { spawn, execSync } = require("child_process"); |
|
const https = require("https"); |
|
const fs = require("fs"); |
|
|
|
const token = process.env.DEEPSEEK_API_KEY; |
|
|
|
const debugEnabled = process.argv.includes("--debug"); |
|
const testCommand = "cargo test"; // TODO: make this configurable |
|
const testProgram = testCommand.split(" ")[0]; |
|
const testArgs = testCommand.split(" ").slice(1); |
|
|
|
const systemPrompt = [ |
|
"You are a senior Rust engineer with 10+ years of experience in systems programming.", |
|
"Your expertise includes:", |
|
"- Deep knowledge of Rust's ownership system, lifetimes, and concurrency model", |
|
"- Mastery of cargo, clippy, and modern Rust toolchain features", |
|
"- Experience debugging complex memory issues and performance bottlenecks", |
|
"- Familiarity with common Rust crates and idiomatic patterns", |
|
|
|
"When analyzing test failures:", |
|
"1. First clearly identify the failure type (compiler error, runtime panic, logical error, performance issue)", |
|
"2. Analyze backtraces and error messages with attention to ownership boundaries", |
|
"3. Consider common Rust pitfalls:", |
|
" - Lifetime mismatches", |
|
" - Unsafe code violations", |
|
" - Trait bound errors", |
|
" - Concurrency race conditions", |
|
" - Iterator invalidation", |
|
"4. Cross-reference with cargo test output and clippy warnings", |
|
|
|
"For proposed fixes:", |
|
"- Always prioritize type safety and borrow checker rules", |
|
"- Prefer idiomatic solutions over clever hacks", |
|
"- Include exact code diffs using markdown format with file names", |
|
"- Explain the root cause before presenting fixes", |
|
"- Suggest relevant clippy lints or cargo checks to prevent regressions", |
|
|
|
"Response guidelines:", |
|
"- Structure analysis using bullet points for clarity", |
|
"- Use code fences for error snippets and diffs", |
|
"- Highlight connections between test failures and system architecture", |
|
"- When uncertain, propose multiple hypothesis with verification strategies", |
|
|
|
"Special capabilities:", |
|
"- Leverage knowledge of Rust internals (MIR, drop order, etc.)", |
|
"- Reference similar issues in popular Rust OSS projects", |
|
"- Suggest property-based testing strategies for edge cases", |
|
].join("\n"); |
|
|
|
function debug(message) { |
|
if (debugEnabled) { |
|
console.log(`[ask.js] ${message}`); |
|
} |
|
} |
|
|
|
/** |
|
* Really dumb token counter. It assumes that each word is a token. |
|
* It's a bit smarter than that though, it splits camelCase, |
|
* snake_case, PascalCase, and kebab-case multi-word strings into tokens. |
|
* It also assumes ()[]{} are token separators. |
|
* Dumb but works for most cases and is fast. |
|
* @param {string} str |
|
* @returns {number} |
|
*/ |
|
function reallyDumbTokenCounter(str) { |
|
if (typeof str !== "string") { |
|
console.trace("str is not a string", typeof str, str); |
|
} |
|
str = typeof str === "string" ? str : ""; |
|
return ( |
|
str |
|
// Split on whitespace, newlines, and parentheses, brackets, and braces |
|
.split(/[\s\n()[\]{}]+/) |
|
.flatMap((word) => |
|
// Split camelCase/PascalCase into separate words |
|
word |
|
.split(/(?=[A-Z][a-z])|(?<=[a-z])(?=[A-Z])/) |
|
// Split snake_case and kebab-case |
|
.flatMap((part) => part.split(/[_\-]/)) |
|
// Filter out empty strings |
|
.filter(Boolean) |
|
).length |
|
); |
|
} |
|
|
|
if (!token) { |
|
console.error("DEEPSEEK_API_KEY is not set"); |
|
process.exit(1); |
|
} |
|
|
|
const maxTokens = 128000; |
|
// DeepSeek maximum context length is 128K tokens. we leave some room for the test failures. |
|
// 10000 tokens for test failures |
|
// Alternatively we can use the word count of trimmedTestOutput but that means running test and serializing |
|
// can not happen in parallel. 10k characters is good enough for most cases. |
|
const maxSize = maxTokens - 10000 - reallyDumbTokenCounter(systemPrompt); |
|
|
|
// Convert execSync to Promise-based execution |
|
async function execCommand(program, args = [], options = {}) { |
|
const outputs = []; |
|
return new Promise((resolve, reject) => { |
|
try { |
|
debug(`Running: ${program} ${args.join(" ")}`); |
|
const process = spawn(program, args, { |
|
shell: true, |
|
stdio: ["pipe", "pipe", "pipe"], // Always pipe to capture output |
|
...options, |
|
}); |
|
|
|
process.stdout.on("data", (data) => { |
|
const str = data.toString(); |
|
outputs.push(str); |
|
if (options.printToConsole) { |
|
console.log(str); |
|
} |
|
}); |
|
|
|
process.stderr.on("data", (data) => { |
|
const str = data.toString(); |
|
outputs.push(str); |
|
if (options.printToConsole) { |
|
console.error(str); |
|
} |
|
}); |
|
|
|
process.on("error", (error) => { |
|
if (options.returnError) { |
|
resolve(outputs.join("")); |
|
} else { |
|
reject(error); |
|
} |
|
}); |
|
|
|
process.on("close", (code) => { |
|
const output = outputs.join(""); |
|
if (code !== 0) { |
|
if (options.returnError) { |
|
resolve(output); |
|
} else { |
|
reject( |
|
new Error(`Command failed with code ${code}\nOutput: ${output}`) |
|
); |
|
} |
|
} else { |
|
resolve(output); |
|
} |
|
}); |
|
} catch (error) { |
|
if (options.returnError) { |
|
resolve(outputs.join("")); |
|
} else { |
|
reject(error); |
|
} |
|
} |
|
}); |
|
} |
|
const findTestFiles = async (tests) => { |
|
const results = new Set(); |
|
|
|
for (const test of tests) { |
|
try { |
|
// Search in tests directory first |
|
const testsResult = execSync( |
|
`find ./tests -type f -name "*.rs" -exec grep -l "${test}" {} \\;`, |
|
{ |
|
stdio: ["pipe", "pipe", "pipe"], |
|
} |
|
) |
|
.toString() |
|
.trim(); |
|
|
|
if (testsResult) { |
|
testsResult.split("\n").forEach((file) => results.add(file)); |
|
continue; |
|
} |
|
|
|
// If not found in tests, search in src |
|
const srcResult = execSync( |
|
`find ./src -type f -name "*.rs" -exec grep -l "${test}" {} \\;`, |
|
{ |
|
stdio: ["pipe", "pipe", "pipe"], |
|
} |
|
) |
|
.toString() |
|
.trim(); |
|
|
|
if (srcResult) { |
|
srcResult.split("\n").forEach((file) => results.add(file)); |
|
} |
|
} catch (error) { |
|
debug(`Error finding test file for ${test}: ${error.message}`); |
|
} |
|
} |
|
|
|
return Array.from(results); |
|
}; |
|
|
|
// Truncate and escape content if too large (from bottom up) |
|
const truncateAndEscape = (str) => { |
|
if (reallyDumbTokenCounter(str) > maxTokens) { |
|
str = "... (truncated) ...\n" + str.slice(-maxTokens); |
|
} |
|
return JSON.stringify(str); |
|
}; |
|
|
|
// Run serialization and testing in parallel |
|
debug("Starting serialization and testing in parallel..."); |
|
Promise.all([ |
|
execCommand("yek", [`--max-size`, maxSize.toString(), `--tokens`], {}), |
|
execCommand(testProgram, testArgs, { |
|
returnError: true, |
|
printToConsole: true, |
|
}), |
|
execCommand("git", ["diff", "|", "cat"]), |
|
]) |
|
.then(async ([serialized, testOutput, gitDiff]) => { |
|
debug("Serializing and test run complete"); |
|
|
|
// Check if any test failed by looking for "test result: FAILED" in the output |
|
const hasFailures = testOutput.includes("test result: FAILED"); |
|
if (!hasFailures) { |
|
console.log("All tests passed!"); |
|
process.exit(0); |
|
} |
|
|
|
// Extract failed test names |
|
const failedTests = testOutput |
|
.split("\n") |
|
.map((line) => line.trim()) |
|
.filter((line) => line.toLowerCase().endsWith("failed")) |
|
.map((line) => line.split(" ")?.[1]); |
|
|
|
if (failedTests.length === 0) { |
|
console.log("All tests passed!"); |
|
process.exit(0); |
|
} |
|
|
|
debug(`Failed tests: ${failedTests.join(", ")}`); |
|
|
|
const testFiles = await findTestFiles(failedTests); |
|
|
|
if (testFiles.length === 0) { |
|
console.error("Could not find any test files"); |
|
process.exit(1); |
|
} |
|
|
|
debug(`Test files: ${testFiles.join(", ")}`); |
|
|
|
const testContents = testFiles |
|
.map((filename) => { |
|
try { |
|
return fs.readFileSync(filename, "utf8"); |
|
} catch (error) { |
|
debug(`Error reading file ${filename}: ${error.message}`); |
|
return ""; |
|
} |
|
}) |
|
.filter(Boolean); |
|
|
|
if (testContents.length === 0) { |
|
console.error("Could not read any test files"); |
|
process.exit(1); |
|
} |
|
|
|
const timer = setInterval(() => { |
|
process.stdout.write("."); |
|
}, 1000); |
|
|
|
// Any lines before "failures:" is not needed. Those are tests that passed. |
|
const trimmedTestOutput = testOutput.split("failures:").slice(1).join("\n"); |
|
|
|
const content = truncateAndEscape( |
|
[ |
|
`# Repo:`, |
|
serialized, |
|
`# Git diff:`, |
|
gitDiff, |
|
`# Test contents:`, |
|
testContents.join("\n\n"), |
|
`# Test failures:`, |
|
trimmedTestOutput, |
|
].join("\n\n") |
|
); |
|
debug(`Content length: ${reallyDumbTokenCounter(content)} tokens`); |
|
console.log( |
|
`Asking DeepSeek R1 a ${reallyDumbTokenCounter( |
|
content |
|
)} token question. This will take a while...` |
|
); |
|
|
|
const data = JSON.stringify({ |
|
model: "deepseek-reasoner", |
|
messages: [ |
|
{ role: "system", content: systemPrompt }, |
|
{ role: "user", content }, |
|
], |
|
stream: false, |
|
}); |
|
|
|
debug(`Request payload size: ${Buffer.byteLength(data)} bytes`); |
|
|
|
const options = { |
|
hostname: "api.deepseek.com", |
|
path: "/chat/completions", |
|
method: "POST", |
|
headers: { |
|
"Content-Type": "application/json", |
|
Authorization: `Bearer ${token}`, |
|
"Content-Length": Buffer.byteLength(data), |
|
}, |
|
}; |
|
|
|
debug("Sending request to DeepSeek API..."); |
|
const req = https.request(options, (res) => { |
|
debug(`Response status: ${res.statusCode} ${res.statusMessage}`); |
|
let responseData = ""; |
|
|
|
res.on("data", (chunk) => { |
|
responseData += chunk; |
|
debug(`Received chunk of ${chunk.length} bytes`); |
|
}); |
|
|
|
res.on("end", () => { |
|
clearInterval(timer); |
|
debug("Response completed"); |
|
try { |
|
const jsonResponse = JSON.parse(responseData); |
|
debug(`Parsed response successfully`); |
|
const content = jsonResponse?.choices?.[0]?.message?.content; |
|
if (content) { |
|
console.log(content); |
|
} else { |
|
console.error("No content found in the response"); |
|
debug(`Full response: ${JSON.stringify(jsonResponse, null, 2)}`); |
|
} |
|
} catch (error) { |
|
console.error("Failed to parse response:", responseData); |
|
debug(`Parse error: ${error.message}`); |
|
process.exit(1); |
|
} |
|
}); |
|
}); |
|
|
|
req.on("error", (error) => { |
|
clearInterval(timer); |
|
console.error("Error:", error); |
|
debug(`Request error: ${error.message}`); |
|
process.exit(1); |
|
}); |
|
|
|
debug("Writing request payload..."); |
|
req.write(data); |
|
debug("Ending request"); |
|
req.end(); |
|
}) |
|
.catch((error) => { |
|
console.error("Error:", error); |
|
process.exit(1); |
|
}); |