tofrankie · April 17, 2026 00:26
diff --git a/remove_duplicate_zsh_history b/remove_duplicate_zsh_history
 #!/usr/bin/env node

 /**
 * 去重并按时间戳升序排序 zsh_history（支持多行命令记录）
 *
 * 规则：
 * - 以 command（`;` 后面的所有内容，含换行）作为去重 key
 * - 保留同 command 的最新 timestamp 的那条记录
 * - 最终按 timestamp 升序输出（timestamp 相同按出现顺序、再按 command 兜底）
 *
 * 默认处理脚本同目录的 `zsh_history`，并覆盖写回；会先备份为 `zsh_history.bak`
 *
 * 用法：
 *   node zsh/remove_duplicate_zsh_history.js
 *   node zsh/remove_duplicate_zsh_history.js --file /path/to/.zsh_history
 *   node zsh/remove_duplicate_zsh_history.js --stdout        # 仅输出到 stdout，不写文件
 */
 
 import fs from 'node:fs'
 import path from 'node:path'
 import { fileURLToPath } from 'node:url'

 main()

 function main() {
  const args = parseArgs(process.argv)
  const scriptDir = path.dirname(fileURLToPath(import.meta.url))
  const historyFile = args.file ? path.resolve(args.file) : path.join(scriptDir, 'zsh_history')

  if (!fs.existsSync(historyFile)) {
    console.error(`zsh history file not found at ${historyFile}`)
    process.exit(1)
  }

  const original = fs.readFileSync(historyFile, 'utf8')
  const { records, trailingNewline } = splitZshHistoryRecords(original)
  const outputRecords = dedupeAndSort(records)

  const out = outputRecords.join('\n') + (trailingNewline ? '\n' : '')

  if (args.stdout) {
    process.stdout.write(out)
    return
  }

  const backupFile = `${historyFile}.bak`
  fs.copyFileSync(historyFile, backupFile)

  const tmpFile = `${historyFile}.tmp`
  fs.writeFileSync(tmpFile, out, 'utf8')
  fs.renameSync(tmpFile, historyFile)

  console.log(
    `Duplicates removed and sorted.\nRecords: ${records.length} -> ${outputRecords.length}.\nBackup saved as ${backupFile}`
  )
 }

 function parseArgs(argv) {
  const args = { file: null, stdout: false }
  for (let i = 2; i < argv.length; i++) {
    const a = argv[i]
    if (a === '--stdout') {
      args.stdout = true
      continue
    }
    if (a === '--file') {
      const v = argv[i + 1]
      if (!v) throw new Error('缺少 --file 参数值')
      args.file = v
      i++
      continue
    }
    throw new Error(`未知参数: ${a}`)
  }
  return args
 }

 function splitZshHistoryRecords(content) {
  if (!content) return { records: [], trailingNewline: false }

  const trailingNewline = content.endsWith('\n')
  const recordStarts = [0]

  let from = 0
  while (true) {
    // zsh history 的每条记录都以 `: <timestamp>:<duration>;` 开头
    // 多行命令会把续行内容直接写在后面（不再重复 `: `），所以我们用 `\n: ` 来定位“下一条记录”
    const idx = content.indexOf('\n: ', from)
    if (idx === -1) break
    recordStarts.push(idx + 1) // ":" 的位置
    from = idx + 3
  }

  const records = []
  for (let i = 0; i < recordStarts.length; i++) {
    const start = recordStarts[i]
    const end = i + 1 < recordStarts.length ? recordStarts[i + 1] - 1 : content.length
    const rec = content.slice(start, end)
    if (rec.trim().length === 0) continue
    records.push(rec)
  }

  return { records, trailingNewline }
 }

 function parseRecord(rec) {
  // 期望格式：": <timestamp>:<duration>;<command...>"
  const normalized = rec.startsWith(': ') ? rec : `: ${rec}`
  const semicolonPos = normalized.indexOf(';')
  if (semicolonPos === -1) return null

  const timestampPart = normalized.slice(2, semicolonPos) // 去掉 ": "
  if (!timestampPart) return null

  const colonPos = timestampPart.indexOf(':')
  const tsStr = (colonPos >= 0 ? timestampPart.slice(0, colonPos) : timestampPart).trim()
  if (!/^\d+$/.test(tsStr)) return null
  const timestamp = Number(tsStr)

  const command = normalized.slice(semicolonPos + 1)
  // 过滤空命令（例如 ": 1767030772:0;"），这类记录对去重/排序没有意义且会污染 history
  if (command.trim().length === 0) return null
  return { timestamp, command, record: normalized }
 }

 function dedupeAndSort(records) {
  /** @type {Map<string, {timestamp: number, ord: number, record: string}>} */
  const seen = new Map()

  let ord = 0
  for (const rec of records) {
    ord++
    const parsed = parseRecord(rec)
    if (!parsed) continue

    // 以完整 command（包含换行）作为去重 key：保证多行命令能被正确视为“一条命令”
    const prev = seen.get(parsed.command)
    if (!prev || parsed.timestamp > prev.timestamp) {
      seen.set(parsed.command, { timestamp: parsed.timestamp, ord, record: parsed.record })
    }
  }

  const items = Array.from(seen.values())
  items.sort((a, b) => {
    // 为了保证最终文件是时间戳升序：先按 timestamp 排序
    if (a.timestamp !== b.timestamp) return a.timestamp - b.timestamp
    // timestamp 相同则按出现顺序稳定排序（避免不同平台/Node 版本输出漂移）
    if (a.ord !== b.ord) return a.ord - b.ord
    // 极少数情况下兜底，避免 sort 在完全相等时不同平台顺序漂移
    return a.record < b.record ? -1 : a.record > b.record ? 1 : 0
  })

  return items.map(x => x.record)
 }
	#!/usr/bin/env node

	/**
	* 去重并按时间戳升序排序 zsh_history（支持多行命令记录）
	*
	* 规则：
	* - 以 command（`;` 后面的所有内容，含换行）作为去重 key
	* - 保留同 command 的最新 timestamp 的那条记录
	* - 最终按 timestamp 升序输出（timestamp 相同按出现顺序、再按 command 兜底）
	*
	* 默认处理脚本同目录的 `zsh_history`，并覆盖写回；会先备份为 `zsh_history.bak`
	*
	* 用法：
	* node zsh/remove_duplicate_zsh_history.js
	* node zsh/remove_duplicate_zsh_history.js --file /path/to/.zsh_history
	* node zsh/remove_duplicate_zsh_history.js --stdout # 仅输出到 stdout，不写文件
	*/

	import fs from 'node:fs'
	import path from 'node:path'
	import { fileURLToPath } from 'node:url'

	main()

	function main() {
	const args = parseArgs(process.argv)
	const scriptDir = path.dirname(fileURLToPath(import.meta.url))
	const historyFile = args.file ? path.resolve(args.file) : path.join(scriptDir, 'zsh_history')

	if (!fs.existsSync(historyFile)) {
	console.error(`zsh history file not found at ${historyFile}`)
	process.exit(1)
	}

	const original = fs.readFileSync(historyFile, 'utf8')
	const { records, trailingNewline } = splitZshHistoryRecords(original)
	const outputRecords = dedupeAndSort(records)

	const out = outputRecords.join('\n') + (trailingNewline ? '\n' : '')

	if (args.stdout) {
	process.stdout.write(out)
	return
	}

	const backupFile = `${historyFile}.bak`
	fs.copyFileSync(historyFile, backupFile)

	const tmpFile = `${historyFile}.tmp`
	fs.writeFileSync(tmpFile, out, 'utf8')
	fs.renameSync(tmpFile, historyFile)

	console.log(
	`Duplicates removed and sorted.\nRecords: ${records.length} -> ${outputRecords.length}.\nBackup saved as ${backupFile}`
	)
	}

	function parseArgs(argv) {
	const args = { file: null, stdout: false }
	for (let i = 2; i < argv.length; i++) {
	const a = argv[i]
	if (a === '--stdout') {
	args.stdout = true
	continue
	}
	if (a === '--file') {
	const v = argv[i + 1]
	if (!v) throw new Error('缺少 --file 参数值')
	args.file = v
	i++
	continue
	}
	throw new Error(`未知参数: ${a}`)
	}
	return args
	}

	function splitZshHistoryRecords(content) {
	if (!content) return { records: [], trailingNewline: false }

	const trailingNewline = content.endsWith('\n')
	const recordStarts = [0]

	let from = 0
	while (true) {
	// zsh history 的每条记录都以 `: <timestamp>:<duration>;` 开头
	// 多行命令会把续行内容直接写在后面（不再重复 `: `），所以我们用 `\n: ` 来定位“下一条记录”
	const idx = content.indexOf('\n: ', from)
	if (idx === -1) break
	recordStarts.push(idx + 1) // ":" 的位置
	from = idx + 3
	}

	const records = []
	for (let i = 0; i < recordStarts.length; i++) {
	const start = recordStarts[i]
	const end = i + 1 < recordStarts.length ? recordStarts[i + 1] - 1 : content.length
	const rec = content.slice(start, end)
	if (rec.trim().length === 0) continue
	records.push(rec)
	}

	return { records, trailingNewline }
	}

	function parseRecord(rec) {
	// 期望格式：": <timestamp>:<duration>;<command...>"
	const normalized = rec.startsWith(': ') ? rec : `: ${rec}`
	const semicolonPos = normalized.indexOf(';')
	if (semicolonPos === -1) return null

	const timestampPart = normalized.slice(2, semicolonPos) // 去掉 ": "
	if (!timestampPart) return null

	const colonPos = timestampPart.indexOf(':')
	const tsStr = (colonPos >= 0 ? timestampPart.slice(0, colonPos) : timestampPart).trim()
	if (!/^\d+$/.test(tsStr)) return null
	const timestamp = Number(tsStr)

	const command = normalized.slice(semicolonPos + 1)
	// 过滤空命令（例如 ": 1767030772:0;"），这类记录对去重/排序没有意义且会污染 history
	if (command.trim().length === 0) return null
	return { timestamp, command, record: normalized }
	}

	function dedupeAndSort(records) {
	/** @type {Map<string, {timestamp: number, ord: number, record: string}>} */
	const seen = new Map()

	let ord = 0
	for (const rec of records) {
	ord++
	const parsed = parseRecord(rec)
	if (!parsed) continue

	// 以完整 command（包含换行）作为去重 key：保证多行命令能被正确视为“一条命令”
	const prev = seen.get(parsed.command)
	if (!prev \|\| parsed.timestamp > prev.timestamp) {
	seen.set(parsed.command, { timestamp: parsed.timestamp, ord, record: parsed.record })
	}
	}

	const items = Array.from(seen.values())
	items.sort((a, b) => {
	// 为了保证最终文件是时间戳升序：先按 timestamp 排序
	if (a.timestamp !== b.timestamp) return a.timestamp - b.timestamp
	// timestamp 相同则按出现顺序稳定排序（避免不同平台/Node 版本输出漂移）
	if (a.ord !== b.ord) return a.ord - b.ord
	// 极少数情况下兜底，避免 sort 在完全相等时不同平台顺序漂移
	return a.record < b.record ? -1 : a.record > b.record ? 1 : 0
	})

	return items.map(x => x.record)
	}
No results found