Created July 9, 2019 00:43
import os
import speech_recognition as sr
from pydub import AudioSegment
from pydub.playback import play
from gtts import gTTS as tts
def speak(text):
"""Say something"""
# Write assistant's output to console
# Save audio file
speech = tts(text=text, lang='en')
speech_file = 'input.mp3'
# Play audio file
sound = AudioSegment.from_mp3(speech_file)
def capture():
"""Capture audio"""
rec = sr.Recognizer()
with sr.Microphone() as source:
print('I\'M LISTENING...')
audio = rec.listen(source, phrase_time_limit=5)
text = rec.recognize_google(audio, language='en-US')
return text
speak('Sorry, I could not understand what you said.')
return 0
def process_text(name, input):
"""Process what is said"""
speak(name + ', you said: "' + input + '".')
if __name__ == "__main__":
# First get name
speak('What is your name?')
name = capture()
speak('Hello, ' + name + '.')
# Then just keep listening & responding
while 1:
speak('What do you have to say?')
captured_text = capture().lower()
if captured_text == 0:
if 'quit' in str(captured_text):
speak('OK, bye, ' + name + '.')
# Process captured text
process_text(name, captured_text)
