Created
August 31, 2024 20:23
-
-
Save tinfoil-knight/eee70f2613fa0a0263b61ee00bbcfc98 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
const fs = require("fs"); | |
const path = require("path"); | |
function readLines(absolutePath) { | |
const contents = fs.readFileSync(absolutePath, "utf8"); | |
return contents.split("\n"); | |
} | |
function findDuplicates(lines) { | |
// we only want to keep the latest command | |
const duplicates = new Set(); | |
const lineToPos = {}; | |
for (let i = 0; i < lines.length; i++) { | |
const line = lines[i]; | |
const pos = lineToPos[line]; | |
if (pos !== undefined) { | |
duplicates.add(pos); | |
} | |
lineToPos[line] = i; | |
} | |
return duplicates; | |
} | |
function levenshteinDistance(str1, str2, memo = {}) { | |
if (str1.length === 0) return str2.length; | |
if (str2.length === 0) return str1.length; | |
const key = str1.length + "," + str2.length; | |
if (key in memo) return memo[key]; | |
if (str1[str1.length - 1] === str2[str2.length - 1]) { | |
memo[key] = levenshteinDistance(str1.slice(0, -1), str2.slice(0, -1), memo); | |
} else { | |
memo[key] = | |
1 + | |
Math.min( | |
levenshteinDistance(str1.slice(0, -1), str2, memo), | |
levenshteinDistance(str1, str2.slice(0, -1), memo), | |
levenshteinDistance(str1.slice(0, -1), str2.slice(0, -1), memo), | |
); | |
} | |
return memo[key]; | |
} | |
function findInvalidCommands(lines) { | |
const STRING_DISTANCE_THRESHOLD = 2; | |
// heuristic: if all the args of the previous command are same except for the name of the command, | |
// and the string distance b.w. the commands is below a threshold, it's likely that the command was an invalid typo | |
const invalids = new Set(); | |
for (let i = 1; i < lines.length; i++) { | |
let [prevCmd, prevArgs] = lines[i - 1].split(" ", 2); | |
let [currCmd, currArgs] = lines[i].split(" ", 2); | |
if (prevArgs === currArgs) { | |
const dist = levenshteinDistance(prevCmd, currCmd); | |
if (dist > 0 && dist <= STRING_DISTANCE_THRESHOLD) { | |
invalids.add(i - 1); | |
} | |
} | |
} | |
return invalids; | |
} | |
function findNonReusableGitCommands(lines) { | |
const indexes = new Set(); | |
for (let i = 0; i < lines.length; i++) { | |
if (/^git (cm|brD|cob)/.test(lines[i])) { | |
indexes.add(i); | |
} | |
} | |
return indexes; | |
} | |
function writeLinesToFile(absolutePath, lines) { | |
return new Promise((resolve, reject) => { | |
const writeStream = fs.createWriteStream(absolutePath); | |
writeStream.on("error", (error) => { | |
reject(`Error writing to file: ${error.message}`); | |
}); | |
lines.forEach((line) => { | |
writeStream.write(line + "\n"); | |
}); | |
writeStream.end(() => { | |
resolve(); | |
}); | |
}); | |
} | |
function main() { | |
if (process.argv.length !== 4) { | |
console.log( | |
`Usage: | |
./shell_history_compactor.js <src-file-path> <dest-file-path> | |
`, | |
); | |
process.exit(1); | |
} | |
const srcFilePath = path.resolve(process.argv[2]); | |
const dstFilePath = path.resolve(process.argv[3]); | |
const lines = readLines(srcFilePath); | |
console.log(`Out of ${lines.length} lines:`); | |
const duplicates = findDuplicates(lines); | |
const uniqueLines = lines.filter((_, idx) => !duplicates.has(idx)); | |
console.log(`${duplicates.size} lines are duplicate.`); | |
const invalids = findInvalidCommands(uniqueLines); | |
const validLines = uniqueLines.filter((_, idx) => !invalids.has(idx)); | |
console.log(`${invalids.size} lines have invalid commands.`); | |
const nonReusables = findNonReusableGitCommands(validLines); | |
const reusableLines = validLines.filter((_, idx) => !nonReusables.has(idx)); | |
console.log(`${nonReusables.size} lines are not reusable.`); | |
const finalLines = reusableLines; | |
const saved = ((lines.length - finalLines.length) * 100) / lines.length; | |
console.log( | |
`\n${finalLines.length} lines will be written to new file. ${saved.toFixed(2)}% savings.`, | |
); | |
writeLinesToFile(dstFilePath, finalLines) | |
.then(() => {}) | |
.catch((error) => console.error(error)); | |
} | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Feedback for Self:
revert
can be added to the list of non-reusable commands