davidteren · November 9, 2024 16:25
diff --git a/transcribe-whisper-1.rb b/transcribe-whisper-1.rb
 #!/usr/bin/env ruby

 require 'open3'
 require 'signal'
 require 'openai'

 # Define the silence threshold and duration for detection
 @silence_threshold = '-30dB'
 @silence_duration = 0.5

 class AudioTranscriberApi
  def initialize(access_token:)
    @access_token = access_token
    @client = OpenAI::Client.new(
      access_token: @access_token,
      request_timeout: 20
      )
  end

  def transcribe(audio_file)
    retries = 0
    max_retries = 5

    begin
      parameters = {
        model: "whisper-1",
        file: File.open(audio_file, "rb"),
        language: "en"
      }

      response = @client.audio.transcribe(
        parameters: parameters
      )
      response["text"]
    rescue StandardError => e
      puts "Error transcribing audio file: #{e.message}"
      if retries < max_retries
        retries += 1
        puts "Retrying audio transcription for #{audio_file} (#{retries} times) after exponential backoff (#{2 ** retries} seconds)"
        sleep 2 ** retries
      else
        raise e
      end
      retry
    end
  end

 end

 def filename(file_index)
  "output_#{file_index.to_s.rjust(3, '0')}.wav"
 end

 def start_ffmpeg(file_index)
  file_name = filename(file_index)
  sox_cmd = [
    'sox',
    '-t', 'coreaudio',
    '-d', # Use default audio input device
    '-c', '2', # Stereo channels
    '-r', '44100', # Sample rate
    '-b', '16', # Bit depth
    '-e', 'signed-integer',
    '-t', 'wav', # Output as WAV format for piping
    '-'
  ]

  ffmpeg_cmd = [
    'ffmpeg',
    '-f', 'wav',
    '-i', '-', # Input from sox via pipe
    '-af', "silencedetect=n=#{@silence_threshold}:d=#{@silence_duration}",
    '-c:a', 'pcm_s16le',
    '-y', # Overwrite output files
    file_name
  ]

  # Start sox and pipe its output to ffmpeg
  sox_stdin, sox_stdout, sox_stderr, sox_wait_thr = Open3.popen3(*sox_cmd)
  stdin, stdout, stderr, ffmpeg_wait_thr = Open3.popen3(*ffmpeg_cmd)

  # Redirect sox output to ffmpeg input
  Thread.new do
    while (line = sox_stdout.gets)
      stdin.puts line
    end
  end

  # Return the threads and process handles
  return sox_stdin, stdout, stderr, sox_wait_thr, ffmpeg_wait_thr
 end

 def transcribe_audio(file_index)
  Thread.new do
    file_name = filename(file_index)
    transcriber = AudioTranscriberApi.new(access_token: ENV['OPENAI_API_KEY'])
    transcription = transcriber.transcribe(file_name)
    puts transcription
    File.delete(file_name)
  end
 end

 # Monitor for silence detection in stderr
 def monitor_for_silence
  Thread.new do
    @stderr.each do |line|
      if line.include?("silence_start") && !line.include?("silence_start: 0")
        # Gracefully interrupt the current ffmpeg process
        Process.kill("TERM", @sox_wait_thr.pid) if @sox_wait_thr.alive?
        Process.kill("TERM", @ffmpeg_wait_thr.pid) if @ffmpeg_wait_thr.alive?

        # Start a new recording
        transcribe_audio(@file_index)
        @file_index += 1
        @stdin, @stdout, @stderr, @sox_wait_thr, @ffmpeg_wait_thr = start_ffmpeg(@file_index)

        monitor_for_silence
        break
      end
    end
  end
 end

 # Start the initial recording
 @file_index = 1
 @stdin, @stdout, @stderr, @sox_wait_thr, @ffmpeg_wait_thr = start_ffmpeg(@file_index)
 monitor_for_silence

 # Keep the main thread alive to handle signals
 sleep
	#!/usr/bin/env ruby

	require 'open3'
	require 'signal'
	require 'openai'

	# Define the silence threshold and duration for detection
	@silence_threshold = '-30dB'
	@silence_duration = 0.5

	class AudioTranscriberApi
	def initialize(access_token:)
	@access_token = access_token
	@client = OpenAI::Client.new(
	access_token: @access_token,
	request_timeout: 20
	)
	end

	def transcribe(audio_file)
	retries = 0
	max_retries = 5

	begin
	parameters = {
	model: "whisper-1",
	file: File.open(audio_file, "rb"),
	language: "en"
	}

	response = @client.audio.transcribe(
	parameters: parameters
	)
	response["text"]
	rescue StandardError => e
	puts "Error transcribing audio file: #{e.message}"
	if retries < max_retries
	retries += 1
	puts "Retrying audio transcription for #{audio_file} (#{retries} times) after exponential backoff (#{2 ** retries} seconds)"
	sleep 2 ** retries
	else
	raise e
	end
	retry
	end
	end

	end

	def filename(file_index)
	"output_#{file_index.to_s.rjust(3, '0')}.wav"
	end

	def start_ffmpeg(file_index)
	file_name = filename(file_index)
	sox_cmd = [
	'sox',
	'-t', 'coreaudio',
	'-d', # Use default audio input device
	'-c', '2', # Stereo channels
	'-r', '44100', # Sample rate
	'-b', '16', # Bit depth
	'-e', 'signed-integer',
	'-t', 'wav', # Output as WAV format for piping
	'-'
	]

	ffmpeg_cmd = [
	'ffmpeg',
	'-f', 'wav',
	'-i', '-', # Input from sox via pipe
	'-af', "silencedetect=n=#{@silence_threshold}:d=#{@silence_duration}",
	'-c:a', 'pcm_s16le',
	'-y', # Overwrite output files
	file_name
	]

	# Start sox and pipe its output to ffmpeg
	sox_stdin, sox_stdout, sox_stderr, sox_wait_thr = Open3.popen3(*sox_cmd)
	stdin, stdout, stderr, ffmpeg_wait_thr = Open3.popen3(*ffmpeg_cmd)

	# Redirect sox output to ffmpeg input
	Thread.new do
	while (line = sox_stdout.gets)
	stdin.puts line
	end
	end

	# Return the threads and process handles
	return sox_stdin, stdout, stderr, sox_wait_thr, ffmpeg_wait_thr
	end

	def transcribe_audio(file_index)
	Thread.new do
	file_name = filename(file_index)
	transcriber = AudioTranscriberApi.new(access_token: ENV['OPENAI_API_KEY'])
	transcription = transcriber.transcribe(file_name)
	puts transcription
	File.delete(file_name)
	end
	end

	# Monitor for silence detection in stderr
	def monitor_for_silence
	Thread.new do
	@stderr.each do \|line\|
	if line.include?("silence_start") && !line.include?("silence_start: 0")
	# Gracefully interrupt the current ffmpeg process
	Process.kill("TERM", @sox_wait_thr.pid) if @sox_wait_thr.alive?
	Process.kill("TERM", @ffmpeg_wait_thr.pid) if @ffmpeg_wait_thr.alive?

	# Start a new recording
	transcribe_audio(@file_index)
	@file_index += 1
	@stdin, @stdout, @stderr, @sox_wait_thr, @ffmpeg_wait_thr = start_ffmpeg(@file_index)

	monitor_for_silence
	break
	end
	end
	end
	end

	# Start the initial recording
	@file_index = 1
	@stdin, @stdout, @stderr, @sox_wait_thr, @ffmpeg_wait_thr = start_ffmpeg(@file_index)
	monitor_for_silence

	# Keep the main thread alive to handle signals
	sleep
No results found