Skip to content

Instantly share code, notes, and snippets.

@milleniumbug
Last active February 13, 2025 21:29
Show Gist options
  • Save milleniumbug/2a0462d9524a9c05576dab3a93941f52 to your computer and use it in GitHub Desktop.
Save milleniumbug/2a0462d9524a9c05576dab3a93941f52 to your computer and use it in GitHub Desktop.
!/bin/bash
API="http://127.0.0.1:5000"
EMITHEADER=1
PROMPTFILE=`mktemp`
PROMPTSTART=$'Write detailed free-form summary in six paragraphs of this part of the transcript of the following YouTube video.\n\nTEXT: '
SPLIT_LINES=48000
TRANSCRIPTDIR=`mktemp -d`
gettranscript "$1" en 2> /dev/null | sed -e 's/<\/b>/-/g' -e 's/<[^>]*>//g' | split -l "$SPLIT_LINES" - "${TRANSCRIPTDIR}/tr"
REQUESTFILE=`mktemp`
if [ "$xEMITHEADER" != "x0" ]; then
VIDEOMETA=`yt-dlp --skip-download -j "$1" | jq '{title,duration,uploader}'`
DURATION=`printf "%s" "$VIDEOMETA" | jq -r '.duration'`
TITLE=`printf "%s" "$VIDEOMETA" | jq -r '.title'`
UPLOADER=`printf "%s" "$VIDEOMETA" | jq -r '.uploader'`
R="$DURATION"
SECS=`expr "$R" % 60`
R=`expr "$R" / 60`
MINS=`expr "$R" % 60`
HOURS=`expr "$R" / 60`
printf 'The following is an AI-generated summary of the YouTube video titled "%s" (%d:%02d:%02d) by "%s"' "$TITLE" "$HOURS" "$MINS" "$SECS" "$UPLOADER"
MODEL=`curl "${API}/v1/chat/completions" -s -H "Content-Type: application/json" --data '{ "messages": [ {"role": "user", "content": "Hello"} ] }' | jq -r '.model'`
printf '\nModel used: %s' "$MODEL"
printf "\n\n"
fi
issue_summarize() {
echo "$PROMPTSTART" > "$PROMPTFILE"
cat "$1" >> "$PROMPTFILE"
jq -n --rawfile PROMPT "$PROMPTFILE" '{ "messages": [ { "role": "user", "content": $PROMPT } ], "mode": "instruct", "max_tokens": 2400, "stream": true }' > "$REQUESTFILE"
if [ "x$DEBUG" != "x" ]; then
cat "$REQUESTFILE"
fi
curl "${API}/v1/chat/completions" -s -H "Content-Type: application/json" --data-binary @"$REQUESTFILE" | \
while read -r line; do
# Skip empty lines (common in SSE)
if [[ -n "$line" ]] && [[ "x$line" != "x: ping"* ]]
then
line="${line#data: }"
# Parse the JSON response
printf "%s" "$line" | jq -j 'select(.choices[0].delta.content != null) | .choices[0].delta.content'
fi
done
}
for TRANSCRIPT_PART in "${TRANSCRIPTDIR}"/tr*; do
issue_summarize "$TRANSCRIPT_PART"
done
rm "$PROMPTFILE"
rm "$REQUESTFILE"
rm -r "$TRANSCRIPTDIR"
#!/bin/bash
# gettranscript.sh url [lang]
set -euo pipefail
URL="$1"
LANG="${2:-en}"
p=`mktemp`
dir=$(dirname "$p")
file=$(basename "$p")
cd "$dir"
yt-dlp \
-o "$file" \
--write-subs \
--write-auto-subs \
--sub-langs "$LANG" \
--skip-download \
"$URL" > /dev/stderr
ffmpeg -i "${file}.${LANG}.vtt" "${file}.${LANG}.srt"
cat "${file}.${LANG}.srt" | grep -v -- "-->" | grep -v -P '^[0-9]+' | grep -v -P -- '^\s*$' | dos2unix | uniq | tr \\n ' ' | fmt -w 120
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment