Created
September 19, 2019 04:37
-
-
Save ntuaha/d4598a73791c393fbfa4809c113762a9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyaudio | |
import wave | |
from array import array | |
import subprocess | |
import numpy | |
import soundfile as sf | |
import io | |
import base64 | |
import requests | |
import json | |
from AI_ESB import ai | |
import subprocess | |
FORMAT = pyaudio.paInt16 | |
CHANNELS = 1 | |
RATE = 16000 | |
CHUNK = int(16000/2) | |
RECORD_SECONDS = 15 | |
THRESHOLD = 5000 | |
def main(): | |
# recording prerequisites | |
talk() | |
# end of recording | |
def google_stt(audio): | |
g = base64.b64encode(audio) | |
data = '{"config": { "encoding":"FLAC","sampleRateHertz":16000,"languageCode":"cmn-Hant-TW"},"audio": { "content": "%s" }}' % g.decode( | |
'utf-8') | |
url = "https://speech.googleapis.com/v1/speech:recognize" | |
querystring = {"key": "要填自己的喔"} | |
headers = {'cache-control': "no-cache"} | |
response = requests.request( | |
"POST", url, data=data, headers=headers, params=querystring) | |
ans = json.loads(response.text) | |
text = '' | |
if (ans == {}): | |
return text | |
for alt in ans['results']: | |
for item in alt['alternatives']: | |
text = text + item['transcript'] | |
print(text) | |
return text | |
def t2s(text): | |
file = "output2.mp3" | |
url = "https://translate.google.com/translate_tts" | |
querystring = {"ie": "UTF-8", "total": "1", "idx": "0", | |
"textlen": "128", "client": "tw-ob", "q": text, "tl": "zh-TW"} | |
headers = {'cache-control': "no-cache"} | |
response = requests.request( | |
"GET", url, headers=headers, params=querystring, stream=True) | |
with open(file, 'wb') as f: | |
for chunk in response.iter_content(chunk_size=1024): | |
if chunk: # filter out keep-alive new chunks | |
f.write(chunk) | |
f.flush() | |
#subprocess.call('/usr/bin/play %s' % file, shell=True) | |
subprocess.call(['/usr/bin/afplay', file]) | |
def talk(): | |
# starting recording | |
while 1: | |
# try: | |
audio = pyaudio.PyAudio() # instantiate the pyaudio | |
stream = audio.open(format=FORMAT, channels=CHANNELS, | |
rate=RATE, | |
input=True, | |
frames_per_buffer=CHUNK) | |
frames = [] | |
status = 'ready' | |
while 1: | |
data = stream.read(CHUNK) | |
data_chunk = array('h', data) | |
vol = max(data_chunk) | |
print(vol) | |
if(vol >= THRESHOLD and status == 'ready'): | |
print("something said") | |
frames.append(data) | |
status = 'recording' | |
print(status) | |
elif(status == 'ready'): | |
frames = [data] | |
elif(vol >= THRESHOLD and status == 'recording'): | |
frames.append(data) | |
elif(vol < THRESHOLD and status == 'recording'): | |
status = 'stop' | |
frames.append(data) | |
print(status) | |
break | |
else: | |
print("nothing") | |
print('save...\n') | |
# 取出wave buffer轉到 numpy | |
decoded = numpy.fromstring(b''.join(frames), 'Int16') | |
flac_buffer = io.BytesIO() | |
# 將numpy 轉成 flac binary | |
sf.write(flac_buffer, decoded, samplerate=RATE, | |
format="FLAC", subtype="PCM_16") | |
print('save...\n') | |
google_text = google_stt(flac_buffer.getvalue()) | |
if (google_text != ''): | |
pass | |
t = ai.talk(google_text) | |
# print(t) | |
t2s(t) | |
else: | |
print("QQ沒有回應") | |
stream.stop_stream() | |
stream.close() | |
audio.terminate() | |
# except : | |
# print('terminal...\n') | |
# break | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment