Skip to content

Instantly share code, notes, and snippets.

@HimbeersaftLP
Created March 13, 2021 17:10
Show Gist options
  • Save HimbeersaftLP/06a3497fcfd9ee9fd142e992e1235e40 to your computer and use it in GitHub Desktop.
Save HimbeersaftLP/06a3497fcfd9ee9fd142e992e1235e40 to your computer and use it in GitHub Desktop.
Minimal Python DeepSpeech Example
import sys
import deepspeech as ds
# https://deepspeech.readthedocs.io/en/v0.9.3/Python-Examples.html
# English and Chinese: https://github.com/mozilla/DeepSpeech/releases
# German: https://github.com/AASHISHAG/deepspeech-german#trained-models
print("Loading model...")
model = ds.Model("deepspeech-0.9.3-models.pbmm")
# model = ds.Model("deepspeech-german-0.9.0-output_graph.pbmm")
print("Loading scorer...")
model.enableExternalScorer("deepspeech-0.9.3-models.scorer")
# model.enableExternalScorer("deepspeech-german-0.9.0-kenlm.scorer")
fs = 16000
if (len(sys.argv) < 2):
# https://python-sounddevice.readthedocs.io/en/latest/usage.html#recording
import sounddevice as sd
duration = 5 # seconds
print("No wav file provided, recording now...")
audioData = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype="int16")
sd.wait()
audioData = list(map(lambda channels : channels[0], audioData))
print("Done recording!")
else:
# https://stackoverflow.com/a/65378537/
import wave
def read_wav(path):
with wave.open(path, "rb") as wav:
nchannels, sampwidth, framerate, nframes, _, _ = wav.getparams()
signed = sampwidth > 1 # 8 bit wavs are unsigned
byteorder = sys.byteorder # wave module uses sys.byteorder for bytes
values = [] # e.g. for stereo, values[i] = [left_val, right_val]
for _ in range(nframes):
frame = wav.readframes(1) # read next frame
channel_vals = [] # mono has 1 channel, stereo 2, etc.
for channel in range(nchannels):
as_bytes = frame[channel * sampwidth: (channel + 1) * sampwidth]
as_int = int.from_bytes(as_bytes, byteorder, signed=signed)
channel_vals.append(as_int)
values.append(channel_vals[0])
return values, framerate, sampwidth * 8
audioData, framerate, bits = read_wav(sys.argv[1])
if (framerate != fs):
print("Must be 16khz!")
sys.exit(1)
if (bits != 16):
print("Must be 16bit!")
sys.exit(1)
print("Processing...")
print(model.stt(audioData))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment