Skip to content

Instantly share code, notes, and snippets.

@kalharbi
Created January 26, 2025 11:22
Show Gist options
  • Save kalharbi/8460bf2fe8f19c9ff80122c91a1cf6bd to your computer and use it in GitHub Desktop.
Save kalharbi/8460bf2fe8f19c9ff80122c91a1cf6bd to your computer and use it in GitHub Desktop.
Whisper-STT CLI in Docker

Usage

  1. Download Docker for Desktop
  2. Create an audio file (.wav) and save it in your Desktop folder. Example: ~/Desktop/sample-audio.wav
  3. Build the Docker image and transcribe the audio
docker build -t whisper-app .
docker run -it --rm -v ~/Desktop:/app/audio whisper-app python transcribe.py /app/audio/sample-audio.wav
  1. You may load a different model in the transcribe.py and rebuild the docker image and re-run the docker container.
FROM python:3.9-slim
WORKDIR /app
COPY . /app
RUN pip install --no-cache-dir -r requirements.txt \
&& apt-get update \
&& apt-get install -y ffmpeg \
&& useradd -m appuser \
&& chown -R appuser /app
USER appuser
EXPOSE 8080
CMD ["python", "transcribe.py"]
numpy
soundfile
pydub
openai-whisper
setuptools-rust
import numpy as np
import io
import soundfile as sf
import whisper
import os
from pydub import AudioSegment
print("loading model ...")
model = whisper.load_model("tiny")
print("model loaded.")
def transcribe(audio_file):
if not os.path.exists(audio_file):
print("No audio provided")
os.exit(1)
# Read the audio file with pydub
audio = AudioSegment.from_file(audio_file)
audio = audio.set_channels(1)
audio = audio.set_frame_rate(16000)
# Convert audio to WAV format
wav_io = io.BytesIO()
audio.export(wav_io, format="wav")
wav_io.seek(0)
# Use soundfile to read the WAV data
audio_data, sample_rate = sf.read(wav_io)
audio_array = np.array(audio_data, dtype=np.float32)
# Transcribe the audio with Whisper model
result = model.transcribe(audio_array)
transcript_text = result["text"]
return transcript_text
# Read the audio file path name frm the command line
print("Usage: python whisper.py <audio_file>")
audio_file = os.sys.argv[1]
print(transcribe(audio_file))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment