pedramamini · June 11, 2024 02:23
diff --git a/ped_bjj_glossary.md b/ped_bjj_glossary.md
diff --git a/ped_bjj_synopsis.md b/ped_bjj_synopsis.md
diff --git a/ped_bjj_transcript_cleanup.md b/ped_bjj_transcript_cleanup.md
diff --git a/pull_latest_bjj_content.sh b/pull_latest_bjj_content.sh
 #!/bin/bash

 # Requirements:
 #   Fabric (https://github.com/danielmiessler/fabric)
 #   brew install yt-dlp

 # Ensure we're running in the directory where this script is located
 cd "$(dirname "$0")"

 # Set the playlist URL
 playlist_url="https://www.youtube.com/playlist?list=PLubfvXZfGDEvWQDQjvUXO5DXNVuWQhCKn"

 # Specify the Table of Contents file
 toc_file="../YouTube Table of Contents.md"

 # Initialize TOC file only if it doesn't exist
 if [ ! -f "$toc_file" ]; then
    echo "# Table of Contents" > "$toc_file"
    echo "Created Table of Contents file."
 fi

 # Initialize an array to keep track of processed videos
 processed_videos=()

 # Fetch playlist data using yt-dlp and parse it with jq
 entries=$(yt-dlp -J --flat-playlist "$playlist_url" | jq -r '.entries[] | "\(.id) \(.title)"')

 # Count total videos
 total=$(echo "$entries" | wc -l | awk '{print $1}')
 echo "Total videos in playlist: $total"

 # Counter for progress
 count=0

 # Process each video in the playlist
 echo "$entries" | while IFS=' ' read -r id title; do
    # Skip "deleted video" entries
    title_lower=$(echo "$title" | tr '[:upper:]' '[:lower:]')

    if [[ "$title_lower" == "deleted video" || -z "$id" ]]; then
        echo "Skipping deleted video or invalid entry."
        continue
    fi

    # Increment counter
    ((count++))

    # Clean title to remove any non-alphanumeric characters except spaces,
    # replace multiple spaces with a single space, and trim trailing spaces
    clean_title=$(echo "$title" | sed 's/[^a-zA-Z0-9 ]//g' | sed 's/  */ /g' | sed 's/[[:space:]]*$//')

    # Construct the video URL
    video_url="https://www.youtube.com/watch?v=$id"

    # Log processing
    echo "Processing ($count/$total): $title"

    video_file="./Videos/${clean_title}.mp4"
    markdown_file="${clean_title}.md"

    # Check for the existence of the video file using the cleaned title
    if [[ -f "$video_file" ]]; then
        echo "Video already downloaded: $title"
    else
        echo "Downloading video: $title"
        yt-dlp -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4' -o "$video_file" "$video_url"
        # Add the video file to the list of processed videos
        processed_videos+=("$video_file")
    fi

    # Create markdown file if it doesn't exist
    if [[ ! -f "$markdown_file" ]]; then
        echo "Creating markdown file: $markdown_file"
        echo "![[${clean_title}.mp4]]" > "$markdown_file"
        echo "[YouTube Link]($video_url)" >> "$markdown_file"
        echo "- [ ] Watched" >> "$markdown_file"
    fi

    # Add the title to the ToC if not already present
    if ! grep -Fq -- "- [[$clean_title]]" "$toc_file"; then
        echo "- [[$clean_title]]" >> "$toc_file"
        echo "Added '$clean_title' to Table of Contents."
    fi

    # Skip processing if #no-transcript tag exists
    if grep -q "^#no-transcript" "$markdown_file"; then
        echo "No transcript available, skipping further processing for $title."
        continue
    fi

    # Generate transcript if missing
    if ! grep -q "^# Transcript" "$markdown_file"; then
        echo "Checking for available transcript for $title..."
        transcript_available=$(yt --transcript "$video_url" 2>/dev/null)

        if [[ $transcript_available == *"Transcript not available"* ]]; then
            echo "No transcript available for $title."
            echo "#no-transcript" >> "$markdown_file"
            continue
        else
            echo "Transcript available from YouTube."
            transcript=$(echo "$transcript_available" | fabric --pattern ped_bjj_transcript_cleanup)
            echo "Transcript cleaned."
            echo -e "\n# Transcript\n$transcript" >> "$markdown_file"
        fi
    fi

    # Generate synopsis if missing
    if ! grep -q "^# Synopsis" "$markdown_file"; then
        echo "Generating synopsis for $title..."
        synopsis=$(echo "$transcript" | fabric --pattern ped_bjj_synopsis | sed '/^#/d')
        echo "Synopsis generated."
        echo -e "\n# Synopsis\n$synopsis" >> "$markdown_file"
    fi

    # Generate glossary if missing
    if ! grep -q "^# Glossary" "$markdown_file"; then
        echo "Generating glossary for $title..."
        glossary=$(echo "$transcript" | fabric --pattern ped_bjj_glossary)
        if [[ -n $glossary && ! $glossary =~ "^# Glossary" ]]; then
            echo "Glossary generated."
            echo -e "$glossary" >> "$markdown_file"
        else
            echo "No new glossary content to add."
        fi
    fi

    echo "Processed $count of $total videos."
 done

 echo "Download complete. Markdown files generated."

 # Log the list of new video files processed
 if [ ${#processed_videos[@]} -gt 0 ]; then
    echo "Processed new video files in this run:"
    for video in "${processed_videos[@]}"; do
        echo "$video"
    done
 else
    echo "No new video files were processed in this run."
 fi
	#!/bin/bash

	# Requirements:
	# Fabric (https://github.com/danielmiessler/fabric)
	# brew install yt-dlp

	# Ensure we're running in the directory where this script is located
	cd "$(dirname "$0")"

	# Set the playlist URL
	playlist_url="https://www.youtube.com/playlist?list=PLubfvXZfGDEvWQDQjvUXO5DXNVuWQhCKn"

	# Specify the Table of Contents file
	toc_file="../YouTube Table of Contents.md"

	# Initialize TOC file only if it doesn't exist
	if [ ! -f "$toc_file" ]; then
	echo "# Table of Contents" > "$toc_file"
	echo "Created Table of Contents file."
	fi

	# Initialize an array to keep track of processed videos
	processed_videos=()

	# Fetch playlist data using yt-dlp and parse it with jq
	entries=$(yt-dlp -J --flat-playlist "$playlist_url" \| jq -r '.entries[] \| "\(.id) \(.title)"')

	# Count total videos
	total=$(echo "$entries" \| wc -l \| awk '{print $1}')
	echo "Total videos in playlist: $total"

	# Counter for progress
	count=0

	# Process each video in the playlist
	echo "$entries" \| while IFS=' ' read -r id title; do
	# Skip "deleted video" entries
	title_lower=$(echo "$title" \| tr '[:upper:]' '[:lower:]')

	if [[ "$title_lower" == "deleted video" \|\| -z "$id" ]]; then
	echo "Skipping deleted video or invalid entry."
	continue
	fi

	# Increment counter
	((count++))

	# Clean title to remove any non-alphanumeric characters except spaces,
	# replace multiple spaces with a single space, and trim trailing spaces
	clean_title=$(echo "$title" \| sed 's/[^a-zA-Z0-9 ]//g' \| sed 's/ / /g' \| sed 's/[[:space:]]$//')

	# Construct the video URL
	video_url="https://www.youtube.com/watch?v=$id"

	# Log processing
	echo "Processing ($count/$total): $title"

	video_file="./Videos/${clean_title}.mp4"
	markdown_file="${clean_title}.md"

	# Check for the existence of the video file using the cleaned title
	if [[ -f "$video_file" ]]; then
	echo "Video already downloaded: $title"
	else
	echo "Downloading video: $title"
	yt-dlp -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4' -o "$video_file" "$video_url"
	# Add the video file to the list of processed videos
	processed_videos+=("$video_file")
	fi

	# Create markdown file if it doesn't exist
	if [[ ! -f "$markdown_file" ]]; then
	echo "Creating markdown file: $markdown_file"
	echo "![[${clean_title}.mp4]]" > "$markdown_file"
	echo "[YouTube Link]($video_url)" >> "$markdown_file"
	echo "- [ ] Watched" >> "$markdown_file"
	fi

	# Add the title to the ToC if not already present
	if ! grep -Fq -- "- [[$clean_title]]" "$toc_file"; then
	echo "- [[$clean_title]]" >> "$toc_file"
	echo "Added '$clean_title' to Table of Contents."
	fi

	# Skip processing if #no-transcript tag exists
	if grep -q "^#no-transcript" "$markdown_file"; then
	echo "No transcript available, skipping further processing for $title."
	continue
	fi

	# Generate transcript if missing
	if ! grep -q "^# Transcript" "$markdown_file"; then
	echo "Checking for available transcript for $title..."
	transcript_available=$(yt --transcript "$video_url" 2>/dev/null)

	if [[ $transcript_available == "Transcript not available" ]]; then
	echo "No transcript available for $title."
	echo "#no-transcript" >> "$markdown_file"
	continue
	else
	echo "Transcript available from YouTube."
	transcript=$(echo "$transcript_available" \| fabric --pattern ped_bjj_transcript_cleanup)
	echo "Transcript cleaned."
	echo -e "\n# Transcript\n$transcript" >> "$markdown_file"
	fi
	fi

	# Generate synopsis if missing
	if ! grep -q "^# Synopsis" "$markdown_file"; then
	echo "Generating synopsis for $title..."
	synopsis=$(echo "$transcript" \| fabric --pattern ped_bjj_synopsis \| sed '/^#/d')
	echo "Synopsis generated."
	echo -e "\n# Synopsis\n$synopsis" >> "$markdown_file"
	fi

	# Generate glossary if missing
	if ! grep -q "^# Glossary" "$markdown_file"; then
	echo "Generating glossary for $title..."
	glossary=$(echo "$transcript" \| fabric --pattern ped_bjj_glossary)
	if [[ -n $glossary && ! $glossary =~ "^# Glossary" ]]; then
	echo "Glossary generated."
	echo -e "$glossary" >> "$markdown_file"
	else
	echo "No new glossary content to add."
	fi
	fi

	echo "Processed $count of $total videos."
	done

	echo "Download complete. Markdown files generated."

	# Log the list of new video files processed
	if [ ${#processed_videos[@]} -gt 0 ]; then
	echo "Processed new video files in this run:"
	for video in "${processed_videos[@]}"; do
	echo "$video"
	done
	else
	echo "No new video files were processed in this run."
	fi