Skip to content

Instantly share code, notes, and snippets.

@tofrankie
Last active April 17, 2026 00:26
Show Gist options
  • Select an option

  • Save tofrankie/aa26659072111913fc052fd38bdc9d85 to your computer and use it in GitHub Desktop.

Select an option

Save tofrankie/aa26659072111913fc052fd38bdc9d85 to your computer and use it in GitHub Desktop.
Remove duplicate entries from macOS zsh history file, keeping only the most recent instance of each command.
#!/usr/bin/env node
/**
* 去重并按时间戳升序排序 zsh_history(支持多行命令记录)
*
* 规则:
* - 以 command(`;` 后面的所有内容,含换行)作为去重 key
* - 保留同 command 的最新 timestamp 的那条记录
* - 最终按 timestamp 升序输出(timestamp 相同按出现顺序、再按 command 兜底)
*
* 默认处理脚本同目录的 `zsh_history`,并覆盖写回;会先备份为 `zsh_history.bak`
*
* 用法:
* node zsh/remove_duplicate_zsh_history.js
* node zsh/remove_duplicate_zsh_history.js --file /path/to/.zsh_history
* node zsh/remove_duplicate_zsh_history.js --stdout # 仅输出到 stdout,不写文件
*/
import fs from 'node:fs'
import path from 'node:path'
import { fileURLToPath } from 'node:url'
main()
function main() {
const args = parseArgs(process.argv)
const scriptDir = path.dirname(fileURLToPath(import.meta.url))
const historyFile = args.file ? path.resolve(args.file) : path.join(scriptDir, 'zsh_history')
if (!fs.existsSync(historyFile)) {
console.error(`zsh history file not found at ${historyFile}`)
process.exit(1)
}
const original = fs.readFileSync(historyFile, 'utf8')
const { records, trailingNewline } = splitZshHistoryRecords(original)
const outputRecords = dedupeAndSort(records)
const out = outputRecords.join('\n') + (trailingNewline ? '\n' : '')
if (args.stdout) {
process.stdout.write(out)
return
}
const backupFile = `${historyFile}.bak`
fs.copyFileSync(historyFile, backupFile)
const tmpFile = `${historyFile}.tmp`
fs.writeFileSync(tmpFile, out, 'utf8')
fs.renameSync(tmpFile, historyFile)
console.log(
`Duplicates removed and sorted.\nRecords: ${records.length} -> ${outputRecords.length}.\nBackup saved as ${backupFile}`
)
}
function parseArgs(argv) {
const args = { file: null, stdout: false }
for (let i = 2; i < argv.length; i++) {
const a = argv[i]
if (a === '--stdout') {
args.stdout = true
continue
}
if (a === '--file') {
const v = argv[i + 1]
if (!v) throw new Error('缺少 --file 参数值')
args.file = v
i++
continue
}
throw new Error(`未知参数: ${a}`)
}
return args
}
function splitZshHistoryRecords(content) {
if (!content) return { records: [], trailingNewline: false }
const trailingNewline = content.endsWith('\n')
const recordStarts = [0]
let from = 0
while (true) {
// zsh history 的每条记录都以 `: <timestamp>:<duration>;` 开头
// 多行命令会把续行内容直接写在后面(不再重复 `: `),所以我们用 `\n: ` 来定位“下一条记录”
const idx = content.indexOf('\n: ', from)
if (idx === -1) break
recordStarts.push(idx + 1) // ":" 的位置
from = idx + 3
}
const records = []
for (let i = 0; i < recordStarts.length; i++) {
const start = recordStarts[i]
const end = i + 1 < recordStarts.length ? recordStarts[i + 1] - 1 : content.length
const rec = content.slice(start, end)
if (rec.trim().length === 0) continue
records.push(rec)
}
return { records, trailingNewline }
}
function parseRecord(rec) {
// 期望格式:": <timestamp>:<duration>;<command...>"
const normalized = rec.startsWith(': ') ? rec : `: ${rec}`
const semicolonPos = normalized.indexOf(';')
if (semicolonPos === -1) return null
const timestampPart = normalized.slice(2, semicolonPos) // 去掉 ": "
if (!timestampPart) return null
const colonPos = timestampPart.indexOf(':')
const tsStr = (colonPos >= 0 ? timestampPart.slice(0, colonPos) : timestampPart).trim()
if (!/^\d+$/.test(tsStr)) return null
const timestamp = Number(tsStr)
const command = normalized.slice(semicolonPos + 1)
// 过滤空命令(例如 ": 1767030772:0;"),这类记录对去重/排序没有意义且会污染 history
if (command.trim().length === 0) return null
return { timestamp, command, record: normalized }
}
function dedupeAndSort(records) {
/** @type {Map<string, {timestamp: number, ord: number, record: string}>} */
const seen = new Map()
let ord = 0
for (const rec of records) {
ord++
const parsed = parseRecord(rec)
if (!parsed) continue
// 以完整 command(包含换行)作为去重 key:保证多行命令能被正确视为“一条命令”
const prev = seen.get(parsed.command)
if (!prev || parsed.timestamp > prev.timestamp) {
seen.set(parsed.command, { timestamp: parsed.timestamp, ord, record: parsed.record })
}
}
const items = Array.from(seen.values())
items.sort((a, b) => {
// 为了保证最终文件是时间戳升序:先按 timestamp 排序
if (a.timestamp !== b.timestamp) return a.timestamp - b.timestamp
// timestamp 相同则按出现顺序稳定排序(避免不同平台/Node 版本输出漂移)
if (a.ord !== b.ord) return a.ord - b.ord
// 极少数情况下兜底,避免 sort 在完全相等时不同平台顺序漂移
return a.record < b.record ? -1 : a.record > b.record ? 1 : 0
})
return items.map(x => x.record)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment