Last active
April 17, 2026 00:26
-
-
Save tofrankie/aa26659072111913fc052fd38bdc9d85 to your computer and use it in GitHub Desktop.
Remove duplicate entries from macOS zsh history file, keeping only the most recent instance of each command.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env node | |
| /** | |
| * 去重并按时间戳升序排序 zsh_history(支持多行命令记录) | |
| * | |
| * 规则: | |
| * - 以 command(`;` 后面的所有内容,含换行)作为去重 key | |
| * - 保留同 command 的最新 timestamp 的那条记录 | |
| * - 最终按 timestamp 升序输出(timestamp 相同按出现顺序、再按 command 兜底) | |
| * | |
| * 默认处理脚本同目录的 `zsh_history`,并覆盖写回;会先备份为 `zsh_history.bak` | |
| * | |
| * 用法: | |
| * node zsh/remove_duplicate_zsh_history.js | |
| * node zsh/remove_duplicate_zsh_history.js --file /path/to/.zsh_history | |
| * node zsh/remove_duplicate_zsh_history.js --stdout # 仅输出到 stdout,不写文件 | |
| */ | |
| import fs from 'node:fs' | |
| import path from 'node:path' | |
| import { fileURLToPath } from 'node:url' | |
| main() | |
| function main() { | |
| const args = parseArgs(process.argv) | |
| const scriptDir = path.dirname(fileURLToPath(import.meta.url)) | |
| const historyFile = args.file ? path.resolve(args.file) : path.join(scriptDir, 'zsh_history') | |
| if (!fs.existsSync(historyFile)) { | |
| console.error(`zsh history file not found at ${historyFile}`) | |
| process.exit(1) | |
| } | |
| const original = fs.readFileSync(historyFile, 'utf8') | |
| const { records, trailingNewline } = splitZshHistoryRecords(original) | |
| const outputRecords = dedupeAndSort(records) | |
| const out = outputRecords.join('\n') + (trailingNewline ? '\n' : '') | |
| if (args.stdout) { | |
| process.stdout.write(out) | |
| return | |
| } | |
| const backupFile = `${historyFile}.bak` | |
| fs.copyFileSync(historyFile, backupFile) | |
| const tmpFile = `${historyFile}.tmp` | |
| fs.writeFileSync(tmpFile, out, 'utf8') | |
| fs.renameSync(tmpFile, historyFile) | |
| console.log( | |
| `Duplicates removed and sorted.\nRecords: ${records.length} -> ${outputRecords.length}.\nBackup saved as ${backupFile}` | |
| ) | |
| } | |
| function parseArgs(argv) { | |
| const args = { file: null, stdout: false } | |
| for (let i = 2; i < argv.length; i++) { | |
| const a = argv[i] | |
| if (a === '--stdout') { | |
| args.stdout = true | |
| continue | |
| } | |
| if (a === '--file') { | |
| const v = argv[i + 1] | |
| if (!v) throw new Error('缺少 --file 参数值') | |
| args.file = v | |
| i++ | |
| continue | |
| } | |
| throw new Error(`未知参数: ${a}`) | |
| } | |
| return args | |
| } | |
| function splitZshHistoryRecords(content) { | |
| if (!content) return { records: [], trailingNewline: false } | |
| const trailingNewline = content.endsWith('\n') | |
| const recordStarts = [0] | |
| let from = 0 | |
| while (true) { | |
| // zsh history 的每条记录都以 `: <timestamp>:<duration>;` 开头 | |
| // 多行命令会把续行内容直接写在后面(不再重复 `: `),所以我们用 `\n: ` 来定位“下一条记录” | |
| const idx = content.indexOf('\n: ', from) | |
| if (idx === -1) break | |
| recordStarts.push(idx + 1) // ":" 的位置 | |
| from = idx + 3 | |
| } | |
| const records = [] | |
| for (let i = 0; i < recordStarts.length; i++) { | |
| const start = recordStarts[i] | |
| const end = i + 1 < recordStarts.length ? recordStarts[i + 1] - 1 : content.length | |
| const rec = content.slice(start, end) | |
| if (rec.trim().length === 0) continue | |
| records.push(rec) | |
| } | |
| return { records, trailingNewline } | |
| } | |
| function parseRecord(rec) { | |
| // 期望格式:": <timestamp>:<duration>;<command...>" | |
| const normalized = rec.startsWith(': ') ? rec : `: ${rec}` | |
| const semicolonPos = normalized.indexOf(';') | |
| if (semicolonPos === -1) return null | |
| const timestampPart = normalized.slice(2, semicolonPos) // 去掉 ": " | |
| if (!timestampPart) return null | |
| const colonPos = timestampPart.indexOf(':') | |
| const tsStr = (colonPos >= 0 ? timestampPart.slice(0, colonPos) : timestampPart).trim() | |
| if (!/^\d+$/.test(tsStr)) return null | |
| const timestamp = Number(tsStr) | |
| const command = normalized.slice(semicolonPos + 1) | |
| // 过滤空命令(例如 ": 1767030772:0;"),这类记录对去重/排序没有意义且会污染 history | |
| if (command.trim().length === 0) return null | |
| return { timestamp, command, record: normalized } | |
| } | |
| function dedupeAndSort(records) { | |
| /** @type {Map<string, {timestamp: number, ord: number, record: string}>} */ | |
| const seen = new Map() | |
| let ord = 0 | |
| for (const rec of records) { | |
| ord++ | |
| const parsed = parseRecord(rec) | |
| if (!parsed) continue | |
| // 以完整 command(包含换行)作为去重 key:保证多行命令能被正确视为“一条命令” | |
| const prev = seen.get(parsed.command) | |
| if (!prev || parsed.timestamp > prev.timestamp) { | |
| seen.set(parsed.command, { timestamp: parsed.timestamp, ord, record: parsed.record }) | |
| } | |
| } | |
| const items = Array.from(seen.values()) | |
| items.sort((a, b) => { | |
| // 为了保证最终文件是时间戳升序:先按 timestamp 排序 | |
| if (a.timestamp !== b.timestamp) return a.timestamp - b.timestamp | |
| // timestamp 相同则按出现顺序稳定排序(避免不同平台/Node 版本输出漂移) | |
| if (a.ord !== b.ord) return a.ord - b.ord | |
| // 极少数情况下兜底,避免 sort 在完全相等时不同平台顺序漂移 | |
| return a.record < b.record ? -1 : a.record > b.record ? 1 : 0 | |
| }) | |
| return items.map(x => x.record) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment