Last active
March 17, 2026 17:05
-
-
Save cleanhands/c245de814209de4db0d7e3884ee76e0d to your computer and use it in GitHub Desktop.
Download from tuckercarlson.com using browserless and yt-dlp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # tucker-dl.sh - Download Tucker Carlson videos using browserless + yt-dlp | |
| set -Eeuo pipefail | |
| shopt -s inherit_errexit nullglob 2>/dev/null || true | |
| trap 'echo "❌ ERROR: Script failed at line $LINENO (exit code $?)" >&2' ERR | |
| for cmd in curl jq yt-dlp; do | |
| command -v "$cmd" >/dev/null || { echo "❌ ERROR: '$cmd' is required." >&2; exit 1; } | |
| done | |
| # ==================== CONFIG (ENV > config file > defaults) ==================== | |
| BROWSERLESS_ORIGIN="${BROWSERLESS_ORIGIN:-http://localhost:3000}" | |
| BROWSERLESS_TOKEN="${BROWSERLESS_TOKEN:-}" | |
| config_file="${XDG_CONFIG_HOME:-$HOME/.config}/tuckerdl.conf" | |
| [[ -r "$config_file" ]] && while IFS= read -r line || [[ -n $line ]]; do | |
| [[ $line =~ ^[[:space:]]*(BROWSERLESS_(TOKEN|ORIGIN))=(.*)$ ]] && declare "${BASH_REMATCH[1]}=${BASH_REMATCH[3]}" | |
| done < <(grep -E '^[[:space:]]*BROWSERLESS_' "$config_file" || true) | |
| # ==================== CLI PARSING ==================== | |
| url="" | |
| save_json=false | |
| while [[ $# -gt 0 ]]; do | |
| case "$1" in | |
| -t|--token) shift; BROWSERLESS_TOKEN="${1:?}";; | |
| -o|--origin) shift; BROWSERLESS_ORIGIN="${1:?}";; | |
| -j|--json) save_json=true;; | |
| -h|--help) | |
| cat <<EOF | |
| Usage: $(basename "$0") [OPTIONS] <URL> | |
| Options: | |
| -t, --token TOKEN Browserless token | |
| -o, --origin ORIGIN Browserless base URL (default: http://localhost:3000) | |
| -j, --json Also save pretty-printed JSON-LD (+ sourceUrl) as <title>.json | |
| -h, --help Show this help | |
| -v, --verbose Enable debug output | |
| EOF | |
| exit 0 | |
| ;; | |
| -v|--verbose) set -x;; | |
| -*) echo "❌ Unknown option $1" >&2; exit 1;; | |
| *) [[ -n "$url" ]] && { echo "❌ Only one URL allowed" >&2; exit 1; }; url="$1";; | |
| esac | |
| shift | |
| done | |
| [[ -z "$url" ]] && { echo "❌ URL is required" >&2; exit 1; } | |
| # ==================== STRICT URL VALIDATION ==================== | |
| if [[ "$url" != https://tuckercarlson.com/* ]]; then | |
| echo "❌ ERROR: Only https://tuckercarlson.com/ URLs are allowed" >&2 | |
| echo " Received: $url" >&2 | |
| exit 1 | |
| fi | |
| # ==================== BROWSERLESS CALL ==================== | |
| scrape_url="${BROWSERLESS_ORIGIN}/chromium/scrape?stealth=true" | |
| [[ -n "$BROWSERLESS_TOKEN" ]] && scrape_url+="&token=${BROWSERLESS_TOKEN}" | |
| payload=$(jq -n --arg url "$url" '{url: $url, elements: [{selector: "script[type=\"application/ld+json\"]:first-of-type"}]}') | |
| echo "🔄 Fetching metadata from browserless..." | |
| tmp=$(mktemp) | |
| http_code=$(curl -s -o "$tmp" -w "%{http_code}" -X POST "$scrape_url" \ | |
| -H "Content-Type: application/json" -d "$payload" --max-time 30) || { rm -f "$tmp"; exit 1; } | |
| response=$(<"$tmp") | |
| rm -f "$tmp" | |
| [[ "$http_code" != 200 ]] && { echo "❌ Browserless error $http_code:"; <<<"$response" jq . 2>/dev/null || echo "$response"; exit 1; } | |
| if [[ -z "$(<<<"$response" jq -e '.data[0].results[0].text // empty' 2>/dev/null)" ]]; then | |
| echo "❌ No JSON-LD found" | |
| exit 1 | |
| fi | |
| # ==================== EXTRACT & CLEAN ==================== | |
| video_json=$(<<<"$response" jq -r '.data[0].results[0].text') | |
| content_url=$(<<<"$video_json" jq -r '.contentUrl // empty') | |
| title=$(<<<"$video_json" jq -r '.name // empty') | |
| [[ -z "$content_url" || -z "$title" ]] && { echo "❌ Missing video URL or title"; exit 1; } | |
| clean_title() { | |
| local t="$1" | |
| t="${t//&/\&}" | |
| t="${t//"/\"}" | |
| t="${t//'/\'}" | |
| t="${t//’/\'}" | |
| t="${t//‘/\'}" | |
| t="${t//“/\"}" | |
| t="${t//”/\"}" | |
| t="${t#"${t%%[![:space:]]*}" }" | |
| t="${t%"${t##*[![:space:]]}" }" | |
| printf '%s' "$t" | |
| } | |
| cleaned_title=$(clean_title "$title") | |
| # ==================== SAVE JSON (if requested) ==================== | |
| if [[ "$save_json" == true ]]; then | |
| <<<"$video_json" jq --arg sourceUrl "$url" ' | |
| . + {sourceUrl: $sourceUrl} | |
| ' > "${cleaned_title}.json" | |
| echo "📄 Saved JSON metadata: ${cleaned_title}.json" | |
| fi | |
| echo "📥 Downloading: $cleaned_title" | |
| echo " Stream: $content_url" | |
| # ==================== DOWNLOAD ==================== | |
| yt-dlp \ | |
| --ignore-config \ | |
| --no-warnings \ | |
| --restrict-filenames \ | |
| --merge-output-format mp4 \ | |
| --no-mtime \ | |
| "$content_url" \ | |
| -o "${cleaned_title}.%(ext)s" | |
| echo "✅ Done: ${cleaned_title}.mp4" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment