Skip to content

Instantly share code, notes, and snippets.

@nfarina
Last active May 14, 2025 21:09
Show Gist options
  • Save nfarina/4360ef0231666a061dd5dc0c654b01b4 to your computer and use it in GitHub Desktop.
Save nfarina/4360ef0231666a061dd5dc0c654b01b4 to your computer and use it in GitHub Desktop.
Exports a giant text file out of a repository suitable for feeding into an LLM with long context (o3, o4-mini, Grok, Gemini 2.5), then can import the result.
#!/usr/bin/env node
/**
* BigFile: A utility for working with entire codebases as a single file
*
* This script provides two main functions:
* 1. export - Crawls through your repository and outputs all files in a single "big file" format
* - Skips binary files, large files, and common ignore patterns
* - Writes to stdout so you can pipe or redirect the output
*
* 2. import - Reads a previously exported "big file" and writes files to disk
* - Creates directories as needed
* - Expects the special === BEGIN/END markers to denote files
*
* Usage:
* $ bigfile export > mycode.txt # Save all code to a text file
* $ bigfile export | pbcopy # Copy all code to clipboard
* $ bigfile import < mycode.txt # Restore files from a text file
*
* IMPORTANT: Make sure your code is checked into git before importing
* the result! Additionally, there is no mechanism for the import to delete
* files; you may need to do that manually.
*
* Here's an example of how to use the import function without intermediate files:
*
* $ bigfile import
* Starting import to: /path/to/project
* Paste your exported file content below, press enter one last time, then Ctrl+D when finished:
* (paste your content here)
* (press enter one last time, then Ctrl+D when done)
*
* The output format uses markers to denote the beginning and end of each file:
* === BEGIN path/to/file.js ===
* (file contents here)
* === END path/to/file.js ===
*
* This format makes it easy to share entire codebases with AI assistants or
* other developers without needing to zip files or use version control.
*/
const fs = require("fs").promises;
const path = require("path");
const readline = require("readline");
// --- Configuration ---
const ROOT_DIR = process.cwd(); // Assumes you run the script from the repo root
const IGNORE_PATTERNS = [
".git",
"node_modules",
".vscode",
".idea",
"dist",
"build",
"coverage",
"package-lock.json",
];
const IGNORE_EXTENSIONS = [
".cache",
// Binary file extensions (add more as needed)
".png",
".jpg",
".jpeg",
".gif",
".bmp",
".tiff",
".ico",
".mp3",
".wav",
".ogg",
".flac",
".mp4",
".avi",
".mov",
".wmv",
".flv",
".pdf",
".doc",
".docx",
".xls",
".xlsx",
".ppt",
".pptx",
".zip",
".tar",
".gz",
".rar",
".7z",
".exe",
".dll",
".so",
".o",
".a",
".lib",
".class",
".jar",
".pyc",
".pyo",
".lock",
];
const MAX_FILE_SIZE_BYTES = 5 * 1024 * 1024; // 5MB limit per file (adjust as needed)
// --- End Configuration ---
// Helper to check if a file is likely binary by looking for null bytes
async function isLikelyBinary(filePath) {
try {
const buffer = await fs.readFile(filePath, { encoding: null }); // Read as buffer
// Check first 1024 bytes for a null character
const sample = buffer.subarray(0, Math.min(1024, buffer.length));
for (let i = 0; i < sample.length; i++) {
if (sample[i] === 0) {
return true; // Found a null byte, likely binary
}
}
return false; // No null bytes in sample, likely text
} catch (err) {
console.warn(
`Warning: Could not read ${filePath} for binary check: ${err.message}`
);
return true; // Assume binary on error to be safe
}
}
async function* walk(dir) {
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
const relativePath = path.relative(ROOT_DIR, fullPath);
if (IGNORE_PATTERNS.includes(entry.name)) {
continue;
}
if (entry.isDirectory()) {
yield* walk(fullPath);
} else if (entry.isFile()) {
const ext = path.extname(entry.name).toLowerCase();
if (IGNORE_EXTENSIONS.includes(ext)) {
console.error(`Skipping (extension): ${relativePath}`);
continue;
}
try {
const stats = await fs.stat(fullPath);
if (stats.size > MAX_FILE_SIZE_BYTES) {
console.error(
`Skipping (too large: ${Math.round(
stats.size / 1024 / 1024
)}MB): ${relativePath}`
);
continue;
}
if (await isLikelyBinary(fullPath)) {
console.error(`Skipping (likely binary): ${relativePath}`);
continue;
}
} catch (statErr) {
console.error(
`Warning: Could not stat ${relativePath}: ${statErr.message}. Skipping.`
);
continue;
}
yield { path: relativePath, fullPath };
}
}
}
async function exportFiles() {
let fileCount = 0;
let totalSize = 0;
console.error(`Starting export from: ${ROOT_DIR}`);
console.error(`Ignoring patterns: ${IGNORE_PATTERNS.join(", ")}`);
console.error(`Ignoring extensions: ${IGNORE_EXTENSIONS.join(", ")}`);
console.error(`Max file size: ${MAX_FILE_SIZE_BYTES / 1024 / 1024} MB\n`);
try {
for await (const file of walk(ROOT_DIR)) {
try {
const content = await fs.readFile(file.fullPath, "utf-8");
// Normalize path separators for consistent output
const normalizedRelativePath = file.path.split(path.sep).join("/");
const fileBlob = `\n\n=== BEGIN ${normalizedRelativePath} ===\n${content}\n=== END ${normalizedRelativePath} ===\n\n`;
process.stdout.write(fileBlob);
fileCount++;
totalSize += Buffer.byteLength(fileBlob, "utf-8");
process.stderr.write(
`Processed ${fileCount} files. Current total size: ${Math.round(
totalSize / 1024
)} KB\r`
);
} catch (readErr) {
console.error(
`\nWarning: Could not read file ${file.path}: ${readErr.message}. Skipping.`
);
}
}
process.stderr.write("\n"); // New line after progress indicator
if (fileCount === 0) {
console.error("No files found or all files were skipped.");
return;
}
process.stderr.write(`\nProcessed ${fileCount} files.\n`);
process.stderr.write(`Total size: ${Math.round(totalSize / 1024)} KB\n`);
process.stderr.write(`
Remember, you can use unix pipes to process the output, like "bigfile export | pbcopy" to put it in your clipboard, or "bigfile export > bigfile.txt" to save it to a file.
Use a prompt like this:
---
Hi, can you please refactor this project and clean it up? Output only the changed files in the exact same "big file" format.
\`\`\`
<paste the export here>
\`\`\`
`);
} catch (error) {
console.error("\nAn error occurred:", error);
}
}
async function importFiles() {
console.error("Starting import to: " + ROOT_DIR);
console.error(
"Paste your exported file content below, press enter one last time, then Ctrl+D when finished:"
);
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
terminal: false,
});
let buffer = "";
let currentFile = null;
let fileContent = "";
let fileCount = 0;
// Process the input line by line
for await (const line of rl) {
buffer += line + "\n";
// Check for file start marker
if (line.startsWith("=== BEGIN ")) {
currentFile = line.substring(10, line.length - 4).trim();
fileContent = "";
continue;
}
// Check for file end marker
if (line.startsWith("=== END ") && currentFile) {
// Write the file
try {
const fullPath = path.join(ROOT_DIR, currentFile);
const dirPath = path.dirname(fullPath);
// Create directory if it doesn't exist
await fs.mkdir(dirPath, { recursive: true });
// Write file content
await fs.writeFile(fullPath, fileContent);
console.error(`Imported: ${currentFile}`);
fileCount++;
} catch (error) {
console.error(`Error writing file ${currentFile}: ${error.message}`);
}
currentFile = null;
fileContent = "";
continue;
}
// If we're within a file block, accumulate the content
if (currentFile) {
fileContent += line + "\n";
}
}
console.error(`\nImport complete. Wrote ${fileCount} files.`);
}
async function main() {
// Get the operation from command line arguments
const operation = process.argv[2]?.toLowerCase();
if (operation === "export") {
await exportFiles();
} else if (operation === "import") {
await importFiles();
} else {
console.error(`
Usage: ${path.basename(process.argv[1])} [export|import]
export - Crawl the repository and output file contents
import - Read file contents from stdin and write to the repository
`);
process.exit(1);
}
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment