#!/bin/bash |
# Requirements: |
# Fabric (https://github.com/danielmiessler/fabric) |
# brew install yt-dlp |
# Ensure we're running in the directory where this script is located |
cd "$(dirname "$0")" |
# Set the playlist URL |
playlist_url="https://www.youtube.com/playlist?list=PLubfvXZfGDEvWQDQjvUXO5DXNVuWQhCKn" |
# Specify the Table of Contents file |
toc_file="../YouTube Table of Contents.md" |
# Initialize TOC file only if it doesn't exist |
if [ ! -f "$toc_file" ]; then |
echo "# Table of Contents" > "$toc_file" |
echo "Created Table of Contents file." |
fi |
# Initialize an array to keep track of processed videos |
processed_videos=() |
# Fetch playlist data using yt-dlp and parse it with jq |
entries=$(yt-dlp -J --flat-playlist "$playlist_url" | jq -r '.entries[] | "\(.id) \(.title)"') |
# Count total videos |
total=$(echo "$entries" | wc -l | awk '{print $1}') |
echo "Total videos in playlist: $total" |
# Counter for progress |
count=0 |
# Process each video in the playlist |
echo "$entries" | while IFS=' ' read -r id title; do |
# Skip "deleted video" entries |
title_lower=$(echo "$title" | tr '[:upper:]' '[:lower:]') |
if [[ "$title_lower" == "deleted video" || -z "$id" ]]; then |
echo "Skipping deleted video or invalid entry." |
continue |
fi |
# Increment counter |
((count++)) |
# Clean title to remove any non-alphanumeric characters except spaces, |
# replace multiple spaces with a single space, and trim trailing spaces |
clean_title=$(echo "$title" | sed 's/[^a-zA-Z0-9 ]//g' | sed 's/ */ /g' | sed 's/[[:space:]]*$//') |
# Construct the video URL |
video_url="https://www.youtube.com/watch?v=$id" |
# Log processing |
echo "Processing ($count/$total): $title" |
video_file="./Videos/${clean_title}.mp4" |
markdown_file="${clean_title}.md" |
# Check for the existence of the video file using the cleaned title |
if [[ -f "$video_file" ]]; then |
echo "Video already downloaded: $title" |
else |
echo "Downloading video: $title" |
yt-dlp -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4' -o "$video_file" "$video_url" |
# Add the video file to the list of processed videos |
processed_videos+=("$video_file") |
fi |
# Create markdown file if it doesn't exist |
if [[ ! -f "$markdown_file" ]]; then |
echo "Creating markdown file: $markdown_file" |
echo "![[${clean_title}.mp4]]" > "$markdown_file" |
echo "[YouTube Link]($video_url)" >> "$markdown_file" |
echo "- [ ] Watched" >> "$markdown_file" |
fi |
# Add the title to the ToC if not already present |
if ! grep -Fq -- "- [[$clean_title]]" "$toc_file"; then |
echo "- [[$clean_title]]" >> "$toc_file" |
echo "Added '$clean_title' to Table of Contents." |
fi |
# Skip processing if #no-transcript tag exists |
if grep -q "^#no-transcript" "$markdown_file"; then |
echo "No transcript available, skipping further processing for $title." |
continue |
fi |
# Generate transcript if missing |
if ! grep -q "^# Transcript" "$markdown_file"; then |
echo "Checking for available transcript for $title..." |
transcript_available=$(yt --transcript "$video_url" 2>/dev/null) |
if [[ $transcript_available == *"Transcript not available"* ]]; then |
echo "No transcript available for $title." |
echo "#no-transcript" >> "$markdown_file" |
continue |
else |
echo "Transcript available from YouTube." |
transcript=$(echo "$transcript_available" | fabric --pattern ped_bjj_transcript_cleanup) |
echo "Transcript cleaned." |
echo -e "\n# Transcript\n$transcript" >> "$markdown_file" |
fi |
fi |
# Generate synopsis if missing |
if ! grep -q "^# Synopsis" "$markdown_file"; then |
echo "Generating synopsis for $title..." |
synopsis=$(echo "$transcript" | fabric --pattern ped_bjj_synopsis | sed '/^#/d') |
echo "Synopsis generated." |
echo -e "\n# Synopsis\n$synopsis" >> "$markdown_file" |
fi |
# Generate glossary if missing |
if ! grep -q "^# Glossary" "$markdown_file"; then |
echo "Generating glossary for $title..." |
glossary=$(echo "$transcript" | fabric --pattern ped_bjj_glossary) |
if [[ -n $glossary && ! $glossary =~ "^# Glossary" ]]; then |
echo "Glossary generated." |
echo -e "$glossary" >> "$markdown_file" |
else |
echo "No new glossary content to add." |
fi |
fi |
echo "Processed $count of $total videos." |
done |
echo "Download complete. Markdown files generated." |
# Log the list of new video files processed |
if [ ${#processed_videos[@]} -gt 0 ]; then |
echo "Processed new video files in this run:" |
for video in "${processed_videos[@]}"; do |
echo "$video" |
done |
else |
echo "No new video files were processed in this run." |
fi |