kalharbi · January 26, 2025 11:22
diff --git a/README.md b/README.md
diff --git a/Dockerfile b/Dockerfile
 FROM python:3.9-slim

 WORKDIR /app

 COPY . /app

 RUN pip install --no-cache-dir -r requirements.txt \
    && apt-get update \
    && apt-get install -y ffmpeg \
    && useradd -m appuser \
    && chown -R appuser /app

 USER appuser

 EXPOSE 8080

 CMD ["python", "transcribe.py"]
diff --git a/requirements.txt b/requirements.txt
 numpy
 soundfile
 pydub
 openai-whisper
 setuptools-rust
diff --git a/transcribe.py b/transcribe.py
 import numpy as np
 import io
 import soundfile as sf
 import whisper
 import os
 from pydub import AudioSegment


 print("loading model ...")
 model = whisper.load_model("tiny")
 print("model loaded.")

 def transcribe(audio_file):
    if not os.path.exists(audio_file):
        print("No audio provided")
        os.exit(1)
      
    # Read the audio file with pydub
    audio = AudioSegment.from_file(audio_file)
    audio = audio.set_channels(1)
    audio = audio.set_frame_rate(16000)
    
    # Convert audio to WAV format
    wav_io = io.BytesIO()
    audio.export(wav_io, format="wav")
    wav_io.seek(0)
    
    # Use soundfile to read the WAV data
    audio_data, sample_rate = sf.read(wav_io)
    audio_array = np.array(audio_data, dtype=np.float32)
    
    # Transcribe the audio with Whisper model
    result = model.transcribe(audio_array)
    transcript_text = result["text"]
    
    return transcript_text


 # Read the audio file path name frm the command line
 print("Usage: python whisper.py <audio_file>")
 audio_file = os.sys.argv[1]
 print(transcribe(audio_file))
	FROM python:3.9-slim

	WORKDIR /app

	COPY . /app

	RUN pip install --no-cache-dir -r requirements.txt \
	&& apt-get update \
	&& apt-get install -y ffmpeg \
	&& useradd -m appuser \
	&& chown -R appuser /app

	USER appuser

	EXPOSE 8080

	CMD ["python", "transcribe.py"]
	import numpy as np
	import io
	import soundfile as sf
	import whisper
	import os
	from pydub import AudioSegment


	print("loading model ...")
	model = whisper.load_model("tiny")
	print("model loaded.")

	def transcribe(audio_file):
	if not os.path.exists(audio_file):
	print("No audio provided")
	os.exit(1)

	# Read the audio file with pydub
	audio = AudioSegment.from_file(audio_file)
	audio = audio.set_channels(1)
	audio = audio.set_frame_rate(16000)

	# Convert audio to WAV format
	wav_io = io.BytesIO()
	audio.export(wav_io, format="wav")
	wav_io.seek(0)

	# Use soundfile to read the WAV data
	audio_data, sample_rate = sf.read(wav_io)
	audio_array = np.array(audio_data, dtype=np.float32)

	# Transcribe the audio with Whisper model
	result = model.transcribe(audio_array)
	transcript_text = result["text"]

	return transcript_text


	# Read the audio file path name frm the command line
	print("Usage: python whisper.py <audio_file>")
	audio_file = os.sys.argv[1]
	print(transcribe(audio_file))