Skip to content

Instantly share code, notes, and snippets.

@tinfoil-knight
Created August 31, 2024 20:23
Show Gist options
  • Save tinfoil-knight/eee70f2613fa0a0263b61ee00bbcfc98 to your computer and use it in GitHub Desktop.
Save tinfoil-knight/eee70f2613fa0a0263b61ee00bbcfc98 to your computer and use it in GitHub Desktop.
#!/usr/bin/env node
const fs = require("fs");
const path = require("path");
function readLines(absolutePath) {
const contents = fs.readFileSync(absolutePath, "utf8");
return contents.split("\n");
}
function findDuplicates(lines) {
// we only want to keep the latest command
const duplicates = new Set();
const lineToPos = {};
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const pos = lineToPos[line];
if (pos !== undefined) {
duplicates.add(pos);
}
lineToPos[line] = i;
}
return duplicates;
}
function levenshteinDistance(str1, str2, memo = {}) {
if (str1.length === 0) return str2.length;
if (str2.length === 0) return str1.length;
const key = str1.length + "," + str2.length;
if (key in memo) return memo[key];
if (str1[str1.length - 1] === str2[str2.length - 1]) {
memo[key] = levenshteinDistance(str1.slice(0, -1), str2.slice(0, -1), memo);
} else {
memo[key] =
1 +
Math.min(
levenshteinDistance(str1.slice(0, -1), str2, memo),
levenshteinDistance(str1, str2.slice(0, -1), memo),
levenshteinDistance(str1.slice(0, -1), str2.slice(0, -1), memo),
);
}
return memo[key];
}
function findInvalidCommands(lines) {
const STRING_DISTANCE_THRESHOLD = 2;
// heuristic: if all the args of the previous command are same except for the name of the command,
// and the string distance b.w. the commands is below a threshold, it's likely that the command was an invalid typo
const invalids = new Set();
for (let i = 1; i < lines.length; i++) {
let [prevCmd, prevArgs] = lines[i - 1].split(" ", 2);
let [currCmd, currArgs] = lines[i].split(" ", 2);
if (prevArgs === currArgs) {
const dist = levenshteinDistance(prevCmd, currCmd);
if (dist > 0 && dist <= STRING_DISTANCE_THRESHOLD) {
invalids.add(i - 1);
}
}
}
return invalids;
}
function findNonReusableGitCommands(lines) {
const indexes = new Set();
for (let i = 0; i < lines.length; i++) {
if (/^git (cm|brD|cob)/.test(lines[i])) {
indexes.add(i);
}
}
return indexes;
}
function writeLinesToFile(absolutePath, lines) {
return new Promise((resolve, reject) => {
const writeStream = fs.createWriteStream(absolutePath);
writeStream.on("error", (error) => {
reject(`Error writing to file: ${error.message}`);
});
lines.forEach((line) => {
writeStream.write(line + "\n");
});
writeStream.end(() => {
resolve();
});
});
}
function main() {
if (process.argv.length !== 4) {
console.log(
`Usage:
./shell_history_compactor.js <src-file-path> <dest-file-path>
`,
);
process.exit(1);
}
const srcFilePath = path.resolve(process.argv[2]);
const dstFilePath = path.resolve(process.argv[3]);
const lines = readLines(srcFilePath);
console.log(`Out of ${lines.length} lines:`);
const duplicates = findDuplicates(lines);
const uniqueLines = lines.filter((_, idx) => !duplicates.has(idx));
console.log(`${duplicates.size} lines are duplicate.`);
const invalids = findInvalidCommands(uniqueLines);
const validLines = uniqueLines.filter((_, idx) => !invalids.has(idx));
console.log(`${invalids.size} lines have invalid commands.`);
const nonReusables = findNonReusableGitCommands(validLines);
const reusableLines = validLines.filter((_, idx) => !nonReusables.has(idx));
console.log(`${nonReusables.size} lines are not reusable.`);
const finalLines = reusableLines;
const saved = ((lines.length - finalLines.length) * 100) / lines.length;
console.log(
`\n${finalLines.length} lines will be written to new file. ${saved.toFixed(2)}% savings.`,
);
writeLinesToFile(dstFilePath, finalLines)
.then(() => {})
.catch((error) => console.error(error));
}
main();
@eclectic-memory
Copy link

Feedback for Self:

  • git revert can be added to the list of non-reusable commands

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment