sethwololo · April 20, 2023 20:30
diff --git a/whispercpp.py b/whispercpp.py
 import ffmpeg
 import numpy as np
 import functools as f
 from whispercpp import Whisper
 from pathlib import Path


 def convert_to_numpy_array(file_path: str | Path):
    try:
        out, _ = (
            ffmpeg.input(file_path, threads=0)
            .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar="16k")
            .overwrite_output()
            .run(capture_stdout=True, capture_stderr=True)
        )

        return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
    except ffmpeg.Error as e:
        raise RuntimeError(f"Failed to load audio: {e.stderr.decode}")


 @f.lru_cache(maxsize=1)
 def get_whisper() -> Whisper:
    whisper_instance = Whisper.from_pretrained("base")
    whisper_instance.params.with_language("pt")
    return whisper_instance


 def transcribe_audio(file_path: str | Path):
    file_data = convert_to_numpy_array(file_path)
    whisper = get_whisper()
    return whisper.transcribe(file_data)
	import ffmpeg
	import numpy as np
	import functools as f
	from whispercpp import Whisper
	from pathlib import Path


	def convert_to_numpy_array(file_path: str \| Path):
	try:
	out, _ = (
	ffmpeg.input(file_path, threads=0)
	.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar="16k")
	.overwrite_output()
	.run(capture_stdout=True, capture_stderr=True)
	)

	return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
	except ffmpeg.Error as e:
	raise RuntimeError(f"Failed to load audio: {e.stderr.decode}")


	@f.lru_cache(maxsize=1)
	def get_whisper() -> Whisper:
	whisper_instance = Whisper.from_pretrained("base")
	whisper_instance.params.with_language("pt")
	return whisper_instance


	def transcribe_audio(file_path: str \| Path):
	file_data = convert_to_numpy_array(file_path)
	whisper = get_whisper()
	return whisper.transcribe(file_data)