pedramamini · April 21, 2026 17:55
diff --git a/youtube_to_obsidian.sh b/youtube_to_obsidian.sh
 #!/bin/bash

 # Do we want to ingest the videos into our vault?
 DOWNLOAD_VIDEOS=false

 # NOTE: THERE ARE CUSTOM FABRIC PATTERNS IN USE BELOW.
 #     /Users/pedram/go/bin/fabric --model gpt-4o --pattern ped_sitrep
 # THIS SCRIPT WILL NOT WORK FOR YOU. IT'S BUT AN EXAMPLE.

 # Ensure we're running in the directory where this script is located
 cd "/Users/pedram/Pedsidian/Content Farm/YouTube"

 # Set the playlist URL
 playlist_url="" # <<<<< ADD YOUR PLAYLIST URL HERE

 # Specify the Table of Contents file
 toc_file="📇 YouTube Index.md"

 # Initialize TOC file only if it doesn't exist
 if [ ! -f "$toc_file" ]; then
    echo "# Table of Contents" > "$toc_file"
    echo "Created Table of Contents file."
 fi

 # Initialize an array to keep track of processed videos
 processed_videos=()

 # Fetch playlist data using yt-dlp and parse it with jq
 entries=$(/opt/homebrew/bin/yt-dlp -J --flat-playlist "$playlist_url" | jq -r '.entries[] | "\(.id) \(.title)"')

 # Count total videos
 total=$(echo "$entries" | wc -l | awk '{print $1}')
 echo "[$(date)] Total videos in playlist: $total"

 # Counter for progress
 count=0

 # Process each video in the playlist
 echo "$entries" | while IFS=' ' read -r id title; do
    # Skip deleted and private videos
    title_lower=$(echo "$title" | tr '[:upper:]' '[:lower:]')

    if [[ "$title_lower" == "deleted video" || "$title" == "[Private video]" || -z "$id" ]]; then
        echo "[$(date)] Skipping deleted/private video or invalid entry."
        continue
    fi

    # Increment counter
    ((count++))

    # Clean title to remove any non-alphanumeric characters except spaces,
    # replace multiple spaces with a single space, and trim trailing spaces
    clean_title=$(echo "$title" | sed 's/[^a-zA-Z0-9 ]//g' | sed 's/  */ /g' | sed 's/[[:space:]]*$//')

    # Construct the video URL
    video_url="https://www.youtube.com/watch?v=$id"

    # Log processing
    echo "[$(date)] Processing ($count/$total): $title"

    markdown_file="${clean_title}.md"

    # Check for the existence of the video file using the cleaned title
    if [ "$DOWNLOAD_VIDEOS" = true ]; then
        video_file="./Videos/${clean_title}.mp4"
        if [[ -f "$video_file" ]]; then
            echo "[$(date)] Video already downloaded: $title"
        else
            echo "[$(date)] Downloading video: $title"
            /opt/homebrew/bin/yt-dlp --cookies-from-browser chrome -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4' -o "$video_file" "$video_url"
            # Add the video file to the list of processed videos
            processed_videos+=("$video_file")
        fi
    fi

    # Add the title to the ToC if not already present
    if ! grep -Fq -- "- [[$clean_title]]" "$toc_file"; then
        echo "- [[$clean_title]]" >> "$toc_file"
        echo "[$(date)] Added '$clean_title' to Table of Contents."
    else
        # if a ToC entry is present, we've already processed this video and shouldn't do it again, skip to the next entry in the playlist.
        echo "[$(date)] Entry already exists in ToC, skipping processing for: $title"
        continue
    fi

    # Create markdown file if it doesn't exist
    if [[ ! -f "$markdown_file" ]]; then
        echo "Creating markdown file: $markdown_file"
        echo "[YouTube Link]($video_url)" >> "$markdown_file"
        echo "- [ ] Watched" >> "$markdown_file"
    fi

    # Skip processing if #no-transcript tag exists
    if grep -q "^#no-transcript" "$markdown_file"; then
        echo "[$(date)] No transcript available, skipping further processing for $title."
        continue
    fi

    # Generate transcript if missing
    if ! grep -q "^# Summary" "$markdown_file"; then
        echo "[$(date)] Retrieving or generating transcript for $title..."
        
        transcript_available=$(ytt "$video_url" 2>/dev/null)
        if [[ -z "$transcript_available" ]] || [[ $transcript_available == *"Transcript not available"* ]] || [[ $transcript_available == *"Failed to generate transcript"* ]]; then
            echo "[$(date)] No transcript available."
            continue
        else
            # Generate summary
            summary=$(echo "$transcript_available" | /Users/pedram/go/bin/fabric --model gpt-4o --pattern ped_sitrep)

            # Generate label and rating
            label_rating=$(echo "$transcript_available" | /Users/pedram/go/bin/fabric --model gpt-4o --pattern ped_label_and_rate)

            if [ -n "$summary" ] && [ -n "$label_rating" ]; then
                echo -e "\n# Summary\n$summary" >> "$markdown_file"
                echo -e "\n# Label and Rating\n$label_rating" >> "$markdown_file"
                echo "[$(date)] Transcript successfully processed with summary and label/rating."
            else
                echo "[$(date)] Failed to generate summary or label/rating."
                continue
            fi
        fi
    fi

    echo "[$(date)] Processed $count of $total videos."
 done

 echo "[$(date)] Download complete. Markdown files generated."

 # Log the list of new video files processed
 if [ ${#processed_videos[@]} -gt 0 ]; then
    echo "[$(date)] Processed new video files in this run:"
    for video in "${processed_videos[@]}"; do
        echo "[$(date)] $video"
    done
 else
    echo "[$(date)] No new video files were processed in this run."
 fi
	#!/bin/bash

	# Do we want to ingest the videos into our vault?
	DOWNLOAD_VIDEOS=false

	# NOTE: THERE ARE CUSTOM FABRIC PATTERNS IN USE BELOW.
	# /Users/pedram/go/bin/fabric --model gpt-4o --pattern ped_sitrep
	# THIS SCRIPT WILL NOT WORK FOR YOU. IT'S BUT AN EXAMPLE.

	# Ensure we're running in the directory where this script is located
	cd "/Users/pedram/Pedsidian/Content Farm/YouTube"

	# Set the playlist URL
	playlist_url="" # <<<<< ADD YOUR PLAYLIST URL HERE

	# Specify the Table of Contents file
	toc_file="📇 YouTube Index.md"

	# Initialize TOC file only if it doesn't exist
	if [ ! -f "$toc_file" ]; then
	echo "# Table of Contents" > "$toc_file"
	echo "Created Table of Contents file."
	fi

	# Initialize an array to keep track of processed videos
	processed_videos=()

	# Fetch playlist data using yt-dlp and parse it with jq
	entries=$(/opt/homebrew/bin/yt-dlp -J --flat-playlist "$playlist_url" \| jq -r '.entries[] \| "\(.id) \(.title)"')

	# Count total videos
	total=$(echo "$entries" \| wc -l \| awk '{print $1}')
	echo "[$(date)] Total videos in playlist: $total"

	# Counter for progress
	count=0

	# Process each video in the playlist
	echo "$entries" \| while IFS=' ' read -r id title; do
	# Skip deleted and private videos
	title_lower=$(echo "$title" \| tr '[:upper:]' '[:lower:]')

	if [[ "$title_lower" == "deleted video" \|\| "$title" == "[Private video]" \|\| -z "$id" ]]; then
	echo "[$(date)] Skipping deleted/private video or invalid entry."
	continue
	fi

	# Increment counter
	((count++))

	# Clean title to remove any non-alphanumeric characters except spaces,
	# replace multiple spaces with a single space, and trim trailing spaces
	clean_title=$(echo "$title" \| sed 's/[^a-zA-Z0-9 ]//g' \| sed 's/ / /g' \| sed 's/[[:space:]]$//')

	# Construct the video URL
	video_url="https://www.youtube.com/watch?v=$id"

	# Log processing
	echo "[$(date)] Processing ($count/$total): $title"

	markdown_file="${clean_title}.md"

	# Check for the existence of the video file using the cleaned title
	if [ "$DOWNLOAD_VIDEOS" = true ]; then
	video_file="./Videos/${clean_title}.mp4"
	if [[ -f "$video_file" ]]; then
	echo "[$(date)] Video already downloaded: $title"
	else
	echo "[$(date)] Downloading video: $title"
	/opt/homebrew/bin/yt-dlp --cookies-from-browser chrome -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4' -o "$video_file" "$video_url"
	# Add the video file to the list of processed videos
	processed_videos+=("$video_file")
	fi
	fi

	# Add the title to the ToC if not already present
	if ! grep -Fq -- "- [[$clean_title]]" "$toc_file"; then
	echo "- [[$clean_title]]" >> "$toc_file"
	echo "[$(date)] Added '$clean_title' to Table of Contents."
	else
	# if a ToC entry is present, we've already processed this video and shouldn't do it again, skip to the next entry in the playlist.
	echo "[$(date)] Entry already exists in ToC, skipping processing for: $title"
	continue
	fi

	# Create markdown file if it doesn't exist
	if [[ ! -f "$markdown_file" ]]; then
	echo "Creating markdown file: $markdown_file"
	echo "[YouTube Link]($video_url)" >> "$markdown_file"
	echo "- [ ] Watched" >> "$markdown_file"
	fi

	# Skip processing if #no-transcript tag exists
	if grep -q "^#no-transcript" "$markdown_file"; then
	echo "[$(date)] No transcript available, skipping further processing for $title."
	continue
	fi

	# Generate transcript if missing
	if ! grep -q "^# Summary" "$markdown_file"; then
	echo "[$(date)] Retrieving or generating transcript for $title..."

	transcript_available=$(ytt "$video_url" 2>/dev/null)
	if [[ -z "$transcript_available" ]] \|\| [[ $transcript_available == "Transcript not available" ]] \|\| [[ $transcript_available == "Failed to generate transcript" ]]; then
	echo "[$(date)] No transcript available."
	continue
	else
	# Generate summary
	summary=$(echo "$transcript_available" \| /Users/pedram/go/bin/fabric --model gpt-4o --pattern ped_sitrep)

	# Generate label and rating
	label_rating=$(echo "$transcript_available" \| /Users/pedram/go/bin/fabric --model gpt-4o --pattern ped_label_and_rate)

	if [ -n "$summary" ] && [ -n "$label_rating" ]; then
	echo -e "\n# Summary\n$summary" >> "$markdown_file"
	echo -e "\n# Label and Rating\n$label_rating" >> "$markdown_file"
	echo "[$(date)] Transcript successfully processed with summary and label/rating."
	else
	echo "[$(date)] Failed to generate summary or label/rating."
	continue
	fi
	fi
	fi

	echo "[$(date)] Processed $count of $total videos."
	done

	echo "[$(date)] Download complete. Markdown files generated."

	# Log the list of new video files processed
	if [ ${#processed_videos[@]} -gt 0 ]; then
	echo "[$(date)] Processed new video files in this run:"
	for video in "${processed_videos[@]}"; do
	echo "[$(date)] $video"
	done
	else
	echo "[$(date)] No new video files were processed in this run."
	fi
No results found