Created
May 14, 2026 16:20
-
-
Save leek/d0a4e8a9ae26ffa544b660af37b89fa7 to your computer and use it in GitHub Desktop.
Remove dead bookmarks (404 / DNS fail / timeout) from a Chrome profile
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # chrome-deadlink-purge.sh — remove dead bookmarks (404 / DNS fail / timeout) from a Chrome profile. | |
| # | |
| # Usage: | |
| # chrome-deadlink-purge.sh [PROFILE_DIR] | |
| # PROFILE_DIR defaults to: ~/Library/Application Support/Google/Chrome/Default (macOS) | |
| # | |
| # Requirements: bash, jq, curl, xargs. | |
| # Quit Chrome before running — Chrome rewrites the Bookmarks file on exit and will | |
| # clobber your edits otherwise. | |
| # | |
| # What it does: | |
| # 1. Backs up the Bookmarks file. | |
| # 2. Extracts every URL. | |
| # 3. Probes each URL in parallel (30-way) with a real-browser User-Agent. | |
| # 4. Marks dead = HTTP 404 OR DNS fail (curl exit 6) OR timeout (curl exit 28). | |
| # 5. Rewrites Bookmarks with dead URLs removed and the checksum field dropped | |
| # (Chrome recomputes it on next launch). | |
| # 6. Deletes Bookmarks.bak so Chrome doesn't auto-restore the old file. | |
| set -euo pipefail | |
| PROFILE_DIR="${1:-$HOME/Library/Application Support/Google/Chrome/Default}" | |
| BOOKMARKS="$PROFILE_DIR/Bookmarks" | |
| WORK="$(mktemp -d -t chrome-deadlink-XXXXXX)" | |
| PARALLEL="${PARALLEL:-30}" | |
| CONNECT_TIMEOUT="${CONNECT_TIMEOUT:-8}" | |
| MAX_TIME="${MAX_TIME:-15}" | |
| [ -f "$BOOKMARKS" ] || { echo "no Bookmarks file at: $BOOKMARKS" >&2; exit 1; } | |
| command -v jq >/dev/null || { echo "jq required" >&2; exit 1; } | |
| if pgrep -x "Google Chrome" >/dev/null 2>&1; then | |
| echo "Chrome is running. Quit it first (File > Quit, or Cmd+Q). Aborting." >&2 | |
| exit 1 | |
| fi | |
| echo "==> workdir: $WORK" | |
| echo "==> profile: $PROFILE_DIR" | |
| STAMP="$(date +%Y%m%d-%H%M%S)" | |
| BACKUP="$WORK/Bookmarks.backup.$STAMP" | |
| cp "$BOOKMARKS" "$BACKUP" | |
| echo "==> backup: $BACKUP" | |
| # Probe one URL. Output: STATUS<TAB>URL. | |
| cat > "$WORK/probe.sh" <<'PROBE' | |
| #!/usr/bin/env bash | |
| url="$1" | |
| case "$url" in http://*|https://*) ;; *) printf 'SKIP\t%s\n' "$url"; exit 0 ;; esac | |
| code=$(curl -sS -o /dev/null -L \ | |
| --max-time "${MAX_TIME:-15}" --connect-timeout "${CONNECT_TIMEOUT:-8}" \ | |
| -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36" \ | |
| -w "%{http_code}" "$url" 2>/dev/null) | |
| rc=$? | |
| case $rc in | |
| 0) printf '%s\t%s\n' "$code" "$url" ;; | |
| 6) printf 'DNS\t%s\n' "$url" ;; | |
| 28) printf 'TIMEOUT\t%s\n' "$url" ;; | |
| 7) printf 'CONNREFUSED\t%s\n' "$url" ;; | |
| *) printf 'ERR%s\t%s\n' "$rc" "$url" ;; | |
| esac | |
| PROBE | |
| chmod +x "$WORK/probe.sh" | |
| jq -r '[.. | objects | select(.type=="url") | .url] | unique | .[]' "$BOOKMARKS" > "$WORK/urls.txt" | |
| TOTAL=$(wc -l < "$WORK/urls.txt" | tr -d ' ') | |
| echo "==> probing $TOTAL unique URLs ($PARALLEL parallel) ..." | |
| export MAX_TIME CONNECT_TIMEOUT | |
| # -S 65536 raises BSD xargs replstr buffer; -0 handles long lines safely. | |
| tr '\n' '\0' < "$WORK/urls.txt" \ | |
| | xargs -0 -n1 -P "$PARALLEL" -S 65536 -I{} "$WORK/probe.sh" "{}" \ | |
| > "$WORK/results.tsv" 2> "$WORK/probe.err" || true | |
| echo "==> status counts:" | |
| awk -F'\t' '{print $1}' "$WORK/results.tsv" | sort | uniq -c | sort -rn | sed 's/^/ /' | |
| awk -F'\t' '$1=="404" || $1=="DNS" || $1=="TIMEOUT" {print $2}' "$WORK/results.tsv" > "$WORK/dead.txt" | |
| DEAD=$(wc -l < "$WORK/dead.txt" | tr -d ' ') | |
| echo "==> dead URLs (404 + DNS + TIMEOUT): $DEAD" | |
| [ "$DEAD" -eq 0 ] && { echo "nothing to remove."; exit 0; } | |
| jq -R . "$WORK/dead.txt" | jq -s . > "$WORK/dead.json" | |
| jq --slurpfile dead "$WORK/dead.json" ' | |
| ($dead[0] | map({(.): true}) | add) as $deadset | |
| | walk( | |
| if (type == "object") and (.children | type == "array") | |
| then .children |= map(select((.type != "url") or (($deadset[.url] // false) | not))) | |
| else . | |
| end | |
| ) | |
| | del(.checksum) | |
| ' "$BOOKMARKS" > "$WORK/Bookmarks.cleaned" | |
| BEFORE=$(jq '[.. | objects | select(.type=="url") | .url] | length' "$BOOKMARKS") | |
| AFTER=$(jq '[.. | objects | select(.type=="url") | .url] | length' "$WORK/Bookmarks.cleaned") | |
| echo "==> bookmarks: $BEFORE -> $AFTER (removed $((BEFORE-AFTER)))" | |
| if pgrep -x "Google Chrome" >/dev/null 2>&1; then | |
| echo "Chrome restarted mid-run. Aborting before write." >&2 | |
| exit 1 | |
| fi | |
| cp "$WORK/Bookmarks.cleaned" "$BOOKMARKS" | |
| chmod 600 "$BOOKMARKS" | |
| rm -f "$PROFILE_DIR/Bookmarks.bak" | |
| echo "==> deployed. Reopen Chrome to verify." | |
| echo "==> artifacts kept in: $WORK" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment