Last active
April 25, 2023 17:11
-
-
Save SealtielFreak/e7d94a22de0688fe9411c82aa67d2c55 to your computer and use it in GitHub Desktop.
Inscription voice with Tensorflow and Pyaduio
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import contextlib | |
import numpy as np | |
import pyaudio | |
from tensorflow import keras | |
CHUNK = 1024 | |
FORMAT = pyaudio.paInt16 | |
CHANNELS = 1 | |
RATE = 44100 | |
RECORD_SECONDS = 10 | |
FILENAME_SAVE_MODEL = "inscription_voice_model.h5" | |
EPOCHS = 10 | |
@contextlib.contextmanager | |
def open_pyaudio(): | |
_p = pyaudio.PyAudio() | |
yield _p | |
_p.terminate() | |
@contextlib.contextmanager | |
def open_stream(p, **kwargs): | |
stream = p.open(**kwargs) | |
yield stream | |
stream.stop_stream() | |
stream.close() | |
def find_working_microphone(): | |
with open_pyaudio() as p: | |
for i in range(p.get_device_count()): | |
device_info = p.get_device_info_by_index(i) | |
if device_info['maxInputChannels'] > 0: | |
try: | |
with open_stream(p, input_device_index=i, format=FORMAT, channels=CHANNELS, rate=RATE, input=True): | |
pass | |
return i | |
except OSError: | |
continue | |
return None | |
def recording_data_audio(record_seconds, output_device_index): | |
frames = [] | |
with open_pyaudio() as p: | |
with open_stream(p, format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK, | |
output_device_index=output_device_index) as stream: | |
for i in range(0, int(RATE / CHUNK * record_seconds)): | |
frames.append(stream.read(CHUNK)) | |
audio = np.frombuffer(b''.join(frames), dtype=np.int16) | |
audio = audio.reshape((1, audio.shape[0], 1)) | |
return audio | |
def inscription_voice(record_seconds, output_device_index): | |
print(f"Please speak for the user voice enrollment...") | |
audio = recording_data_audio(record_seconds, output_device_index) | |
model = keras.Sequential([ | |
keras.layers.Flatten(input_shape=(audio.shape[1], 1)), | |
keras.layers.Dense(128, activation='relu'), | |
keras.layers.Dense(64, activation='relu'), | |
keras.layers.Dense(10, activation='softmax') | |
]) | |
print("Ok... Training voice model") | |
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) | |
model.fit(audio, np.array([0]), epochs=EPOCHS, verbose=0) | |
model.save(FILENAME_SAVE_MODEL) | |
print("Finalize training voice model") | |
return model | |
def check_voice(model, record_seconds, output_device_index): | |
print("The voice belongs to the registered speaker...") | |
audio = recording_data_audio(record_seconds, output_device_index) | |
print("Ok... Checking voice") | |
prediction = model.predict(audio) | |
if np.argmax(prediction) == 0: | |
print("Success! The voice belongs to the registered speaker") | |
else: | |
print("Failure. The voice does not belong to the registered speaker") | |
if __name__ == "__main__": | |
print("Checking microphone...") | |
working_microphone_index = find_working_microphone() | |
if working_microphone_index is None: | |
print("No microphone was found that works") | |
else: | |
print(f'The index of the working microphone is: {working_microphone_index}') | |
print("=" * 80) | |
voice_model = inscription_voice(RECORD_SECONDS, working_microphone_index) | |
print("=" * 80) | |
check_voice(voice_model, RECORD_SECONDS, working_microphone_index) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment