Created
July 1, 2021 18:20
-
-
Save seidler2547/0ebfd65f858695e97d7f6f070f3d1425 to your computer and use it in GitHub Desktop.
Speech to text from mp3/mp4/url using vosk+ffmpeg
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from vosk import Model, KaldiRecognizer, SetLogLevel | |
import sys | |
import os | |
import wave | |
import subprocess | |
import json | |
SetLogLevel(0) | |
if not os.path.exists("model"): | |
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.") | |
exit (1) | |
sample_rate=16000 | |
model = Model("model") | |
rec = KaldiRecognizer(model, sample_rate) | |
process = subprocess.Popen(['ffmpeg', '-loglevel', 'quiet', '-i', | |
sys.argv[1], | |
'-ar', str(sample_rate) , '-ac', '1', '-f', 's16le', '-'], | |
stdout=subprocess.PIPE) | |
while True: | |
data = process.stdout.read(32000) | |
if len(data) == 0: | |
break | |
if rec.AcceptWaveform(data): | |
res = json.loads(rec.Result()) | |
print(res['text'], end = ' ') | |
sys.stdout.flush() | |
res = json.loads(rec.FinalResult()) | |
print(res['text']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment