Created
August 9, 2024 23:40
-
-
Save thewh1teagle/2ad98796179dfdde46801832fed930b1 to your computer and use it in GitHub Desktop.
whisper onnxruntime all in one
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| export model from https://github.com/microsoft/Olive/tree/main/examples/whisper | |
| Audio should be 16khz mono | |
| ffmpeg -i input.wav -ar 16000 -ac 1 -c:a pcm_s16le output.wav | |
| """ | |
| import numpy as np | |
| import onnxruntime | |
| from onnxruntime_extensions import get_library_path | |
| audio_file = "single.wav" | |
| model = "whisper_cpu_int8_cpu-cpu_model.onnx" | |
| with open(audio_file, "rb") as f: | |
| audio = np.asarray(list(f.read()), dtype=np.uint8) | |
| inputs = { | |
| "audio_stream": np.array([audio]), | |
| "max_length": np.array([30], dtype=np.int32), | |
| "min_length": np.array([1], dtype=np.int32), | |
| "num_beams": np.array([5], dtype=np.int32), | |
| "num_return_sequences": np.array([1], dtype=np.int32), | |
| "length_penalty": np.array([1.0], dtype=np.float32), | |
| "repetition_penalty": np.array([1.0], dtype=np.float32), | |
| } | |
| options = onnxruntime.SessionOptions() | |
| options.register_custom_ops_library(get_library_path()) | |
| session = onnxruntime.InferenceSession(model, options, providers=["CPUExecutionProvider"]) | |
| outputs = session.run(None, inputs)[0][0][0] | |
| print(outputs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment