cleanhands · March 17, 2026 17:05
diff --git a/tucker.sh b/tucker.sh
 #!/usr/bin/env bash
 # tucker-dl.sh - Download Tucker Carlson videos using browserless + yt-dlp

 set -Eeuo pipefail
 shopt -s inherit_errexit nullglob 2>/dev/null || true
 trap 'echo "❌ ERROR: Script failed at line $LINENO (exit code $?)" >&2' ERR

 for cmd in curl jq yt-dlp; do
    command -v "$cmd" >/dev/null || { echo "❌ ERROR: '$cmd' is required." >&2; exit 1; }
 done

 # ==================== CONFIG (ENV > config file > defaults) ====================
 BROWSERLESS_ORIGIN="${BROWSERLESS_ORIGIN:-http://localhost:3000}"
 BROWSERLESS_TOKEN="${BROWSERLESS_TOKEN:-}"

 config_file="${XDG_CONFIG_HOME:-$HOME/.config}/tuckerdl.conf"
 [[ -r "$config_file" ]] && while IFS= read -r line || [[ -n $line ]]; do
    [[ $line =~ ^[[:space:]]*(BROWSERLESS_(TOKEN|ORIGIN))=(.*)$ ]] && declare "${BASH_REMATCH[1]}=${BASH_REMATCH[3]}"
 done < <(grep -E '^[[:space:]]*BROWSERLESS_' "$config_file" || true)

 # ==================== CLI PARSING ====================
 url=""
 save_json=false
 while [[ $# -gt 0 ]]; do
    case "$1" in
        -t|--token)    shift; BROWSERLESS_TOKEN="${1:?}";;
        -o|--origin)   shift; BROWSERLESS_ORIGIN="${1:?}";;
        -j|--json)     save_json=true;;
        -h|--help)
            cat <<EOF
 Usage: $(basename "$0") [OPTIONS] <URL>

 Options:
  -t, --token TOKEN     Browserless token
  -o, --origin ORIGIN   Browserless base URL (default: http://localhost:3000)
  -j, --json            Also save pretty-printed JSON-LD (+ sourceUrl) as <title>.json
  -h, --help            Show this help
  -v, --verbose         Enable debug output
 EOF
            exit 0
            ;;
        -v|--verbose)  set -x;;
        -*)            echo "❌ Unknown option $1" >&2; exit 1;;
        *)             [[ -n "$url" ]] && { echo "❌ Only one URL allowed" >&2; exit 1; }; url="$1";;
    esac
    shift
 done

 [[ -z "$url" ]] && { echo "❌ URL is required" >&2; exit 1; }

 # ==================== STRICT URL VALIDATION ====================
 if [[ "$url" != https://tuckercarlson.com/* ]]; then
    echo "❌ ERROR: Only https://tuckercarlson.com/ URLs are allowed" >&2
    echo "   Received: $url" >&2
    exit 1
 fi

 # ==================== BROWSERLESS CALL ====================
 scrape_url="${BROWSERLESS_ORIGIN}/chromium/scrape?stealth=true"
 [[ -n "$BROWSERLESS_TOKEN" ]] && scrape_url+="&token=${BROWSERLESS_TOKEN}"

 payload=$(jq -n --arg url "$url" '{url: $url, elements: [{selector: "script[type=\"application/ld+json\"]:first-of-type"}]}')

 echo "🔄 Fetching metadata from browserless..."

 tmp=$(mktemp)
 http_code=$(curl -s -o "$tmp" -w "%{http_code}" -X POST "$scrape_url" \
    -H "Content-Type: application/json" -d "$payload" --max-time 30) || { rm -f "$tmp"; exit 1; }

 response=$(<"$tmp")
 rm -f "$tmp"

 [[ "$http_code" != 200 ]] && { echo "❌ Browserless error $http_code:"; <<<"$response" jq . 2>/dev/null || echo "$response"; exit 1; }

 if [[ -z "$(<<<"$response" jq -e '.data[0].results[0].text // empty' 2>/dev/null)" ]]; then
    echo "❌ No JSON-LD found"
    exit 1
 fi

 # ==================== EXTRACT & CLEAN ====================
 video_json=$(<<<"$response" jq -r '.data[0].results[0].text')
 content_url=$(<<<"$video_json" jq -r '.contentUrl // empty')
 title=$(<<<"$video_json" jq -r '.name // empty')

 [[ -z "$content_url" || -z "$title" ]] && { echo "❌ Missing video URL or title"; exit 1; }

 clean_title() {
    local t="$1"
    t="${t//&amp;/\&}"
    t="${t//&quot;/\"}"
    t="${t//&apos;/\'}"
    t="${t//’/\'}"
    t="${t//‘/\'}"
    t="${t//“/\"}"
    t="${t//”/\"}"
    t="${t#"${t%%[![:space:]]*}" }"
    t="${t%"${t##*[![:space:]]}" }"
    printf '%s' "$t"
 }

 cleaned_title=$(clean_title "$title")

 # ==================== SAVE JSON (if requested) ====================
 if [[ "$save_json" == true ]]; then
    <<<"$video_json" jq --arg sourceUrl "$url" '
        . + {sourceUrl: $sourceUrl}
    ' > "${cleaned_title}.json"
    echo "📄 Saved JSON metadata: ${cleaned_title}.json"
 fi

 echo "📥 Downloading: $cleaned_title"
 echo "   Stream: $content_url"

 # ==================== DOWNLOAD ====================
 yt-dlp \
    --ignore-config \
    --no-warnings \
    --restrict-filenames \
    --merge-output-format mp4 \
    --no-mtime \
    "$content_url" \
    -o "${cleaned_title}.%(ext)s"

 echo "✅ Done: ${cleaned_title}.mp4"
	#!/usr/bin/env bash
	# tucker-dl.sh - Download Tucker Carlson videos using browserless + yt-dlp

	set -Eeuo pipefail
	shopt -s inherit_errexit nullglob 2>/dev/null \|\| true
	trap 'echo "❌ ERROR: Script failed at line $LINENO (exit code $?)" >&2' ERR

	for cmd in curl jq yt-dlp; do
	command -v "$cmd" >/dev/null \|\| { echo "❌ ERROR: '$cmd' is required." >&2; exit 1; }
	done

	# ==================== CONFIG (ENV > config file > defaults) ====================
	BROWSERLESS_ORIGIN="${BROWSERLESS_ORIGIN:-http://localhost:3000}"
	BROWSERLESS_TOKEN="${BROWSERLESS_TOKEN:-}"

	config_file="${XDG_CONFIG_HOME:-$HOME/.config}/tuckerdl.conf"
	[[ -r "$config_file" ]] && while IFS= read -r line \|\| [[ -n $line ]]; do
	[[ $line =~ ^[[:space:]](BROWSERLESS_(TOKEN\|ORIGIN))=(.)$ ]] && declare "${BASH_REMATCH[1]}=${BASH_REMATCH[3]}"
	done < <(grep -E '^[[:space:]]*BROWSERLESS_' "$config_file" \|\| true)

	# ==================== CLI PARSING ====================
	url=""
	save_json=false
	while [[ $# -gt 0 ]]; do
	case "$1" in
	-t\|--token) shift; BROWSERLESS_TOKEN="${1:?}";;
	-o\|--origin) shift; BROWSERLESS_ORIGIN="${1:?}";;
	-j\|--json) save_json=true;;
	-h\|--help)
	cat <<EOF
	Usage: $(basename "$0") [OPTIONS] <URL>

	Options:
	-t, --token TOKEN Browserless token
	-o, --origin ORIGIN Browserless base URL (default: http://localhost:3000)
	-j, --json Also save pretty-printed JSON-LD (+ sourceUrl) as <title>.json
	-h, --help Show this help
	-v, --verbose Enable debug output
	EOF
	exit 0
	;;
	-v\|--verbose) set -x;;
	-*) echo "❌ Unknown option $1" >&2; exit 1;;
	*) [[ -n "$url" ]] && { echo "❌ Only one URL allowed" >&2; exit 1; }; url="$1";;
	esac
	shift
	done

	[[ -z "$url" ]] && { echo "❌ URL is required" >&2; exit 1; }

	# ==================== STRICT URL VALIDATION ====================
	if [[ "$url" != https://tuckercarlson.com/* ]]; then
	echo "❌ ERROR: Only https://tuckercarlson.com/ URLs are allowed" >&2
	echo " Received: $url" >&2
	exit 1
	fi

	# ==================== BROWSERLESS CALL ====================
	scrape_url="${BROWSERLESS_ORIGIN}/chromium/scrape?stealth=true"
	[[ -n "$BROWSERLESS_TOKEN" ]] && scrape_url+="&token=${BROWSERLESS_TOKEN}"

	payload=$(jq -n --arg url "$url" '{url: $url, elements: [{selector: "script[type=\"application/ld+json\"]:first-of-type"}]}')

	echo "🔄 Fetching metadata from browserless..."

	tmp=$(mktemp)
	http_code=$(curl -s -o "$tmp" -w "%{http_code}" -X POST "$scrape_url" \
	-H "Content-Type: application/json" -d "$payload" --max-time 30) \|\| { rm -f "$tmp"; exit 1; }

	response=$(<"$tmp")
	rm -f "$tmp"

	[[ "$http_code" != 200 ]] && { echo "❌ Browserless error $http_code:"; <<<"$response" jq . 2>/dev/null \|\| echo "$response"; exit 1; }

	if [[ -z "$(<<<"$response" jq -e '.data[0].results[0].text // empty' 2>/dev/null)" ]]; then
	echo "❌ No JSON-LD found"
	exit 1
	fi

	# ==================== EXTRACT & CLEAN ====================
	video_json=$(<<<"$response" jq -r '.data[0].results[0].text')
	content_url=$(<<<"$video_json" jq -r '.contentUrl // empty')
	title=$(<<<"$video_json" jq -r '.name // empty')

	[[ -z "$content_url" \|\| -z "$title" ]] && { echo "❌ Missing video URL or title"; exit 1; }

	clean_title() {
	local t="$1"
	t="${t//&/\&}"
	t="${t//"/\"}"
	t="${t//'/\'}"
	t="${t//’/\'}"
	t="${t//‘/\'}"
	t="${t//“/\"}"
	t="${t//”/\"}"
	t="${t#"${t%%[![:space:]]*}" }"
	t="${t%"${t##*[![:space:]]}" }"
	printf '%s' "$t"
	}

	cleaned_title=$(clean_title "$title")

	# ==================== SAVE JSON (if requested) ====================
	if [[ "$save_json" == true ]]; then
	<<<"$video_json" jq --arg sourceUrl "$url" '
	. + {sourceUrl: $sourceUrl}
	' > "${cleaned_title}.json"
	echo "📄 Saved JSON metadata: ${cleaned_title}.json"
	fi

	echo "📥 Downloading: $cleaned_title"
	echo " Stream: $content_url"

	# ==================== DOWNLOAD ====================
	yt-dlp \
	--ignore-config \
	--no-warnings \
	--restrict-filenames \
	--merge-output-format mp4 \
	--no-mtime \
	"$content_url" \
	-o "${cleaned_title}.%(ext)s"

	echo "✅ Done: ${cleaned_title}.mp4"
No results found