Last active
May 14, 2025 21:09
-
-
Save nfarina/4360ef0231666a061dd5dc0c654b01b4 to your computer and use it in GitHub Desktop.
Exports a giant text file out of a repository suitable for feeding into an LLM with long context (o3, o4-mini, Grok, Gemini 2.5), then can import the result.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
/** | |
* BigFile: A utility for working with entire codebases as a single file | |
* | |
* This script provides two main functions: | |
* 1. export - Crawls through your repository and outputs all files in a single "big file" format | |
* - Skips binary files, large files, and common ignore patterns | |
* - Writes to stdout so you can pipe or redirect the output | |
* | |
* 2. import - Reads a previously exported "big file" and writes files to disk | |
* - Creates directories as needed | |
* - Expects the special === BEGIN/END markers to denote files | |
* | |
* Usage: | |
* $ bigfile export > mycode.txt # Save all code to a text file | |
* $ bigfile export | pbcopy # Copy all code to clipboard | |
* $ bigfile import < mycode.txt # Restore files from a text file | |
* | |
* IMPORTANT: Make sure your code is checked into git before importing | |
* the result! Additionally, there is no mechanism for the import to delete | |
* files; you may need to do that manually. | |
* | |
* Here's an example of how to use the import function without intermediate files: | |
* | |
* $ bigfile import | |
* Starting import to: /path/to/project | |
* Paste your exported file content below, press enter one last time, then Ctrl+D when finished: | |
* (paste your content here) | |
* (press enter one last time, then Ctrl+D when done) | |
* | |
* The output format uses markers to denote the beginning and end of each file: | |
* === BEGIN path/to/file.js === | |
* (file contents here) | |
* === END path/to/file.js === | |
* | |
* This format makes it easy to share entire codebases with AI assistants or | |
* other developers without needing to zip files or use version control. | |
*/ | |
const fs = require("fs").promises; | |
const path = require("path"); | |
const readline = require("readline"); | |
// --- Configuration --- | |
const ROOT_DIR = process.cwd(); // Assumes you run the script from the repo root | |
const IGNORE_PATTERNS = [ | |
".git", | |
"node_modules", | |
".vscode", | |
".idea", | |
"dist", | |
"build", | |
"coverage", | |
"package-lock.json", | |
]; | |
const IGNORE_EXTENSIONS = [ | |
".cache", | |
// Binary file extensions (add more as needed) | |
".png", | |
".jpg", | |
".jpeg", | |
".gif", | |
".bmp", | |
".tiff", | |
".ico", | |
".mp3", | |
".wav", | |
".ogg", | |
".flac", | |
".mp4", | |
".avi", | |
".mov", | |
".wmv", | |
".flv", | |
".pdf", | |
".doc", | |
".docx", | |
".xls", | |
".xlsx", | |
".ppt", | |
".pptx", | |
".zip", | |
".tar", | |
".gz", | |
".rar", | |
".7z", | |
".exe", | |
".dll", | |
".so", | |
".o", | |
".a", | |
".lib", | |
".class", | |
".jar", | |
".pyc", | |
".pyo", | |
".lock", | |
]; | |
const MAX_FILE_SIZE_BYTES = 5 * 1024 * 1024; // 5MB limit per file (adjust as needed) | |
// --- End Configuration --- | |
// Helper to check if a file is likely binary by looking for null bytes | |
async function isLikelyBinary(filePath) { | |
try { | |
const buffer = await fs.readFile(filePath, { encoding: null }); // Read as buffer | |
// Check first 1024 bytes for a null character | |
const sample = buffer.subarray(0, Math.min(1024, buffer.length)); | |
for (let i = 0; i < sample.length; i++) { | |
if (sample[i] === 0) { | |
return true; // Found a null byte, likely binary | |
} | |
} | |
return false; // No null bytes in sample, likely text | |
} catch (err) { | |
console.warn( | |
`Warning: Could not read ${filePath} for binary check: ${err.message}` | |
); | |
return true; // Assume binary on error to be safe | |
} | |
} | |
async function* walk(dir) { | |
const entries = await fs.readdir(dir, { withFileTypes: true }); | |
for (const entry of entries) { | |
const fullPath = path.join(dir, entry.name); | |
const relativePath = path.relative(ROOT_DIR, fullPath); | |
if (IGNORE_PATTERNS.includes(entry.name)) { | |
continue; | |
} | |
if (entry.isDirectory()) { | |
yield* walk(fullPath); | |
} else if (entry.isFile()) { | |
const ext = path.extname(entry.name).toLowerCase(); | |
if (IGNORE_EXTENSIONS.includes(ext)) { | |
console.error(`Skipping (extension): ${relativePath}`); | |
continue; | |
} | |
try { | |
const stats = await fs.stat(fullPath); | |
if (stats.size > MAX_FILE_SIZE_BYTES) { | |
console.error( | |
`Skipping (too large: ${Math.round( | |
stats.size / 1024 / 1024 | |
)}MB): ${relativePath}` | |
); | |
continue; | |
} | |
if (await isLikelyBinary(fullPath)) { | |
console.error(`Skipping (likely binary): ${relativePath}`); | |
continue; | |
} | |
} catch (statErr) { | |
console.error( | |
`Warning: Could not stat ${relativePath}: ${statErr.message}. Skipping.` | |
); | |
continue; | |
} | |
yield { path: relativePath, fullPath }; | |
} | |
} | |
} | |
async function exportFiles() { | |
let fileCount = 0; | |
let totalSize = 0; | |
console.error(`Starting export from: ${ROOT_DIR}`); | |
console.error(`Ignoring patterns: ${IGNORE_PATTERNS.join(", ")}`); | |
console.error(`Ignoring extensions: ${IGNORE_EXTENSIONS.join(", ")}`); | |
console.error(`Max file size: ${MAX_FILE_SIZE_BYTES / 1024 / 1024} MB\n`); | |
try { | |
for await (const file of walk(ROOT_DIR)) { | |
try { | |
const content = await fs.readFile(file.fullPath, "utf-8"); | |
// Normalize path separators for consistent output | |
const normalizedRelativePath = file.path.split(path.sep).join("/"); | |
const fileBlob = `\n\n=== BEGIN ${normalizedRelativePath} ===\n${content}\n=== END ${normalizedRelativePath} ===\n\n`; | |
process.stdout.write(fileBlob); | |
fileCount++; | |
totalSize += Buffer.byteLength(fileBlob, "utf-8"); | |
process.stderr.write( | |
`Processed ${fileCount} files. Current total size: ${Math.round( | |
totalSize / 1024 | |
)} KB\r` | |
); | |
} catch (readErr) { | |
console.error( | |
`\nWarning: Could not read file ${file.path}: ${readErr.message}. Skipping.` | |
); | |
} | |
} | |
process.stderr.write("\n"); // New line after progress indicator | |
if (fileCount === 0) { | |
console.error("No files found or all files were skipped."); | |
return; | |
} | |
process.stderr.write(`\nProcessed ${fileCount} files.\n`); | |
process.stderr.write(`Total size: ${Math.round(totalSize / 1024)} KB\n`); | |
process.stderr.write(` | |
Remember, you can use unix pipes to process the output, like "bigfile export | pbcopy" to put it in your clipboard, or "bigfile export > bigfile.txt" to save it to a file. | |
Use a prompt like this: | |
--- | |
Hi, can you please refactor this project and clean it up? Output only the changed files in the exact same "big file" format. | |
\`\`\` | |
<paste the export here> | |
\`\`\` | |
`); | |
} catch (error) { | |
console.error("\nAn error occurred:", error); | |
} | |
} | |
async function importFiles() { | |
console.error("Starting import to: " + ROOT_DIR); | |
console.error( | |
"Paste your exported file content below, press enter one last time, then Ctrl+D when finished:" | |
); | |
const rl = readline.createInterface({ | |
input: process.stdin, | |
output: process.stdout, | |
terminal: false, | |
}); | |
let buffer = ""; | |
let currentFile = null; | |
let fileContent = ""; | |
let fileCount = 0; | |
// Process the input line by line | |
for await (const line of rl) { | |
buffer += line + "\n"; | |
// Check for file start marker | |
if (line.startsWith("=== BEGIN ")) { | |
currentFile = line.substring(10, line.length - 4).trim(); | |
fileContent = ""; | |
continue; | |
} | |
// Check for file end marker | |
if (line.startsWith("=== END ") && currentFile) { | |
// Write the file | |
try { | |
const fullPath = path.join(ROOT_DIR, currentFile); | |
const dirPath = path.dirname(fullPath); | |
// Create directory if it doesn't exist | |
await fs.mkdir(dirPath, { recursive: true }); | |
// Write file content | |
await fs.writeFile(fullPath, fileContent); | |
console.error(`Imported: ${currentFile}`); | |
fileCount++; | |
} catch (error) { | |
console.error(`Error writing file ${currentFile}: ${error.message}`); | |
} | |
currentFile = null; | |
fileContent = ""; | |
continue; | |
} | |
// If we're within a file block, accumulate the content | |
if (currentFile) { | |
fileContent += line + "\n"; | |
} | |
} | |
console.error(`\nImport complete. Wrote ${fileCount} files.`); | |
} | |
async function main() { | |
// Get the operation from command line arguments | |
const operation = process.argv[2]?.toLowerCase(); | |
if (operation === "export") { | |
await exportFiles(); | |
} else if (operation === "import") { | |
await importFiles(); | |
} else { | |
console.error(` | |
Usage: ${path.basename(process.argv[1])} [export|import] | |
export - Crawl the repository and output file contents | |
import - Read file contents from stdin and write to the repository | |
`); | |
process.exit(1); | |
} | |
} | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment