Skip to content

Instantly share code, notes, and snippets.

@sethwololo
Last active April 20, 2023 20:30
Show Gist options
  • Save sethwololo/1a9e6a55334a4709e4e6a659d822923a to your computer and use it in GitHub Desktop.
Save sethwololo/1a9e6a55334a4709e4e6a659d822923a to your computer and use it in GitHub Desktop.
whispercpp transcription using python bindings
import ffmpeg
import numpy as np
import functools as f
from whispercpp import Whisper
from pathlib import Path
def convert_to_numpy_array(file_path: str | Path):
try:
out, _ = (
ffmpeg.input(file_path, threads=0)
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar="16k")
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True)
)
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
except ffmpeg.Error as e:
raise RuntimeError(f"Failed to load audio: {e.stderr.decode}")
@f.lru_cache(maxsize=1)
def get_whisper() -> Whisper:
whisper_instance = Whisper.from_pretrained("base")
whisper_instance.params.with_language("pt")
return whisper_instance
def transcribe_audio(file_path: str | Path):
file_data = convert_to_numpy_array(file_path)
whisper = get_whisper()
return whisper.transcribe(file_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment