Created
April 21, 2026 17:55
-
-
Save pedramamini/f93d368f043fa792809dec16da8c0fb0 to your computer and use it in GitHub Desktop.
YouTube ingestion to Obsidian with custom Fabric prompts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Do we want to ingest the videos into our vault? | |
| DOWNLOAD_VIDEOS=false | |
| # NOTE: THERE ARE CUSTOM FABRIC PATTERNS IN USE BELOW. | |
| # /Users/pedram/go/bin/fabric --model gpt-4o --pattern ped_sitrep | |
| # THIS SCRIPT WILL NOT WORK FOR YOU. IT'S BUT AN EXAMPLE. | |
| # Ensure we're running in the directory where this script is located | |
| cd "/Users/pedram/Pedsidian/Content Farm/YouTube" | |
| # Set the playlist URL | |
| playlist_url="" # <<<<< ADD YOUR PLAYLIST URL HERE | |
| # Specify the Table of Contents file | |
| toc_file="📇 YouTube Index.md" | |
| # Initialize TOC file only if it doesn't exist | |
| if [ ! -f "$toc_file" ]; then | |
| echo "# Table of Contents" > "$toc_file" | |
| echo "Created Table of Contents file." | |
| fi | |
| # Initialize an array to keep track of processed videos | |
| processed_videos=() | |
| # Fetch playlist data using yt-dlp and parse it with jq | |
| entries=$(/opt/homebrew/bin/yt-dlp -J --flat-playlist "$playlist_url" | jq -r '.entries[] | "\(.id) \(.title)"') | |
| # Count total videos | |
| total=$(echo "$entries" | wc -l | awk '{print $1}') | |
| echo "[$(date)] Total videos in playlist: $total" | |
| # Counter for progress | |
| count=0 | |
| # Process each video in the playlist | |
| echo "$entries" | while IFS=' ' read -r id title; do | |
| # Skip deleted and private videos | |
| title_lower=$(echo "$title" | tr '[:upper:]' '[:lower:]') | |
| if [[ "$title_lower" == "deleted video" || "$title" == "[Private video]" || -z "$id" ]]; then | |
| echo "[$(date)] Skipping deleted/private video or invalid entry." | |
| continue | |
| fi | |
| # Increment counter | |
| ((count++)) | |
| # Clean title to remove any non-alphanumeric characters except spaces, | |
| # replace multiple spaces with a single space, and trim trailing spaces | |
| clean_title=$(echo "$title" | sed 's/[^a-zA-Z0-9 ]//g' | sed 's/ */ /g' | sed 's/[[:space:]]*$//') | |
| # Construct the video URL | |
| video_url="https://www.youtube.com/watch?v=$id" | |
| # Log processing | |
| echo "[$(date)] Processing ($count/$total): $title" | |
| markdown_file="${clean_title}.md" | |
| # Check for the existence of the video file using the cleaned title | |
| if [ "$DOWNLOAD_VIDEOS" = true ]; then | |
| video_file="./Videos/${clean_title}.mp4" | |
| if [[ -f "$video_file" ]]; then | |
| echo "[$(date)] Video already downloaded: $title" | |
| else | |
| echo "[$(date)] Downloading video: $title" | |
| /opt/homebrew/bin/yt-dlp --cookies-from-browser chrome -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4' -o "$video_file" "$video_url" | |
| # Add the video file to the list of processed videos | |
| processed_videos+=("$video_file") | |
| fi | |
| fi | |
| # Add the title to the ToC if not already present | |
| if ! grep -Fq -- "- [[$clean_title]]" "$toc_file"; then | |
| echo "- [[$clean_title]]" >> "$toc_file" | |
| echo "[$(date)] Added '$clean_title' to Table of Contents." | |
| else | |
| # if a ToC entry is present, we've already processed this video and shouldn't do it again, skip to the next entry in the playlist. | |
| echo "[$(date)] Entry already exists in ToC, skipping processing for: $title" | |
| continue | |
| fi | |
| # Create markdown file if it doesn't exist | |
| if [[ ! -f "$markdown_file" ]]; then | |
| echo "Creating markdown file: $markdown_file" | |
| echo "[YouTube Link]($video_url)" >> "$markdown_file" | |
| echo "- [ ] Watched" >> "$markdown_file" | |
| fi | |
| # Skip processing if #no-transcript tag exists | |
| if grep -q "^#no-transcript" "$markdown_file"; then | |
| echo "[$(date)] No transcript available, skipping further processing for $title." | |
| continue | |
| fi | |
| # Generate transcript if missing | |
| if ! grep -q "^# Summary" "$markdown_file"; then | |
| echo "[$(date)] Retrieving or generating transcript for $title..." | |
| transcript_available=$(ytt "$video_url" 2>/dev/null) | |
| if [[ -z "$transcript_available" ]] || [[ $transcript_available == *"Transcript not available"* ]] || [[ $transcript_available == *"Failed to generate transcript"* ]]; then | |
| echo "[$(date)] No transcript available." | |
| continue | |
| else | |
| # Generate summary | |
| summary=$(echo "$transcript_available" | /Users/pedram/go/bin/fabric --model gpt-4o --pattern ped_sitrep) | |
| # Generate label and rating | |
| label_rating=$(echo "$transcript_available" | /Users/pedram/go/bin/fabric --model gpt-4o --pattern ped_label_and_rate) | |
| if [ -n "$summary" ] && [ -n "$label_rating" ]; then | |
| echo -e "\n# Summary\n$summary" >> "$markdown_file" | |
| echo -e "\n# Label and Rating\n$label_rating" >> "$markdown_file" | |
| echo "[$(date)] Transcript successfully processed with summary and label/rating." | |
| else | |
| echo "[$(date)] Failed to generate summary or label/rating." | |
| continue | |
| fi | |
| fi | |
| fi | |
| echo "[$(date)] Processed $count of $total videos." | |
| done | |
| echo "[$(date)] Download complete. Markdown files generated." | |
| # Log the list of new video files processed | |
| if [ ${#processed_videos[@]} -gt 0 ]; then | |
| echo "[$(date)] Processed new video files in this run:" | |
| for video in "${processed_videos[@]}"; do | |
| echo "[$(date)] $video" | |
| done | |
| else | |
| echo "[$(date)] No new video files were processed in this run." | |
| fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment