reasonableperson · May 8, 2025 01:09
diff --git a/whisper-stream.sh b/whisper-stream.sh
 #!/bin/bash

 # whisper-stream.sh
 #
 # Take a url supported by yt-dlp, dump 30-second segments to the current
 # directory named by unix timestamp, and transcribe each segment using Whisper.
 #
 # example: TZ=Australia/Canberra ./whisper-stream.sh "https://..."
 #
 # The time displayed is the time when ffmpeg first opens the segment for
 # writing (not when the 15 seconds are up), so adding the offset printed by
 # Whisper should give you the approximate time when your computer received the
 # broadcast words. Set the TZ environment variable to the timezone where the
 # video was recorded for an estimate of when the words were spoken which does
 # not account for broadcast delay.

 yt-dlp "$1" -o - 2>/dev/null |
  ffmpeg -f segment -segment_time 30 -strftime 1 %s.mp4 -i - -v verbose 2>&1 |
  grep -Po --line-buffered "Opening '\K\d+" |
  xargs -I _ bash -c 'echo; date -d @_; inotifywait -qqe CLOSE _.mp4; whisper --model medium.en _.mp4'
	#!/bin/bash

	# whisper-stream.sh
	#
	# Take a url supported by yt-dlp, dump 30-second segments to the current
	# directory named by unix timestamp, and transcribe each segment using Whisper.
	#
	# example: TZ=Australia/Canberra ./whisper-stream.sh "https://..."
	#
	# The time displayed is the time when ffmpeg first opens the segment for
	# writing (not when the 15 seconds are up), so adding the offset printed by
	# Whisper should give you the approximate time when your computer received the
	# broadcast words. Set the TZ environment variable to the timezone where the
	# video was recorded for an estimate of when the words were spoken which does
	# not account for broadcast delay.

	yt-dlp "$1" -o - 2>/dev/null \|
	ffmpeg -f segment -segment_time 30 -strftime 1 %s.mp4 -i - -v verbose 2>&1 \|
	grep -Po --line-buffered "Opening '\K\d+" \|
	xargs -I _ bash -c 'echo; date -d @_; inotifywait -qqe CLOSE _.mp4; whisper --model medium.en _.mp4'