Skip to content

Instantly share code, notes, and snippets.

@pedramamini
Created April 21, 2026 17:55
Show Gist options
  • Select an option

  • Save pedramamini/f93d368f043fa792809dec16da8c0fb0 to your computer and use it in GitHub Desktop.

Select an option

Save pedramamini/f93d368f043fa792809dec16da8c0fb0 to your computer and use it in GitHub Desktop.
YouTube ingestion to Obsidian with custom Fabric prompts
#!/bin/bash
# Do we want to ingest the videos into our vault?
DOWNLOAD_VIDEOS=false
# NOTE: THERE ARE CUSTOM FABRIC PATTERNS IN USE BELOW.
# /Users/pedram/go/bin/fabric --model gpt-4o --pattern ped_sitrep
# THIS SCRIPT WILL NOT WORK FOR YOU. IT'S BUT AN EXAMPLE.
# Ensure we're running in the directory where this script is located
cd "/Users/pedram/Pedsidian/Content Farm/YouTube"
# Set the playlist URL
playlist_url="" # <<<<< ADD YOUR PLAYLIST URL HERE
# Specify the Table of Contents file
toc_file="📇 YouTube Index.md"
# Initialize TOC file only if it doesn't exist
if [ ! -f "$toc_file" ]; then
echo "# Table of Contents" > "$toc_file"
echo "Created Table of Contents file."
fi
# Initialize an array to keep track of processed videos
processed_videos=()
# Fetch playlist data using yt-dlp and parse it with jq
entries=$(/opt/homebrew/bin/yt-dlp -J --flat-playlist "$playlist_url" | jq -r '.entries[] | "\(.id) \(.title)"')
# Count total videos
total=$(echo "$entries" | wc -l | awk '{print $1}')
echo "[$(date)] Total videos in playlist: $total"
# Counter for progress
count=0
# Process each video in the playlist
echo "$entries" | while IFS=' ' read -r id title; do
# Skip deleted and private videos
title_lower=$(echo "$title" | tr '[:upper:]' '[:lower:]')
if [[ "$title_lower" == "deleted video" || "$title" == "[Private video]" || -z "$id" ]]; then
echo "[$(date)] Skipping deleted/private video or invalid entry."
continue
fi
# Increment counter
((count++))
# Clean title to remove any non-alphanumeric characters except spaces,
# replace multiple spaces with a single space, and trim trailing spaces
clean_title=$(echo "$title" | sed 's/[^a-zA-Z0-9 ]//g' | sed 's/ */ /g' | sed 's/[[:space:]]*$//')
# Construct the video URL
video_url="https://www.youtube.com/watch?v=$id"
# Log processing
echo "[$(date)] Processing ($count/$total): $title"
markdown_file="${clean_title}.md"
# Check for the existence of the video file using the cleaned title
if [ "$DOWNLOAD_VIDEOS" = true ]; then
video_file="./Videos/${clean_title}.mp4"
if [[ -f "$video_file" ]]; then
echo "[$(date)] Video already downloaded: $title"
else
echo "[$(date)] Downloading video: $title"
/opt/homebrew/bin/yt-dlp --cookies-from-browser chrome -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4' -o "$video_file" "$video_url"
# Add the video file to the list of processed videos
processed_videos+=("$video_file")
fi
fi
# Add the title to the ToC if not already present
if ! grep -Fq -- "- [[$clean_title]]" "$toc_file"; then
echo "- [[$clean_title]]" >> "$toc_file"
echo "[$(date)] Added '$clean_title' to Table of Contents."
else
# if a ToC entry is present, we've already processed this video and shouldn't do it again, skip to the next entry in the playlist.
echo "[$(date)] Entry already exists in ToC, skipping processing for: $title"
continue
fi
# Create markdown file if it doesn't exist
if [[ ! -f "$markdown_file" ]]; then
echo "Creating markdown file: $markdown_file"
echo "[YouTube Link]($video_url)" >> "$markdown_file"
echo "- [ ] Watched" >> "$markdown_file"
fi
# Skip processing if #no-transcript tag exists
if grep -q "^#no-transcript" "$markdown_file"; then
echo "[$(date)] No transcript available, skipping further processing for $title."
continue
fi
# Generate transcript if missing
if ! grep -q "^# Summary" "$markdown_file"; then
echo "[$(date)] Retrieving or generating transcript for $title..."
transcript_available=$(ytt "$video_url" 2>/dev/null)
if [[ -z "$transcript_available" ]] || [[ $transcript_available == *"Transcript not available"* ]] || [[ $transcript_available == *"Failed to generate transcript"* ]]; then
echo "[$(date)] No transcript available."
continue
else
# Generate summary
summary=$(echo "$transcript_available" | /Users/pedram/go/bin/fabric --model gpt-4o --pattern ped_sitrep)
# Generate label and rating
label_rating=$(echo "$transcript_available" | /Users/pedram/go/bin/fabric --model gpt-4o --pattern ped_label_and_rate)
if [ -n "$summary" ] && [ -n "$label_rating" ]; then
echo -e "\n# Summary\n$summary" >> "$markdown_file"
echo -e "\n# Label and Rating\n$label_rating" >> "$markdown_file"
echo "[$(date)] Transcript successfully processed with summary and label/rating."
else
echo "[$(date)] Failed to generate summary or label/rating."
continue
fi
fi
fi
echo "[$(date)] Processed $count of $total videos."
done
echo "[$(date)] Download complete. Markdown files generated."
# Log the list of new video files processed
if [ ${#processed_videos[@]} -gt 0 ]; then
echo "[$(date)] Processed new video files in this run:"
for video in "${processed_videos[@]}"; do
echo "[$(date)] $video"
done
else
echo "[$(date)] No new video files were processed in this run."
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment