Skip to content

Instantly share code, notes, and snippets.

@dauuricus
Created March 6, 2021 21:39
Show Gist options
  • Save dauuricus/79e8415c8425243398f03588dafb5de3 to your computer and use it in GitHub Desktop.
Save dauuricus/79e8415c8425243398f03588dafb5de3 to your computer and use it in GitHub Desktop.
GoogleColab
#!/usr/bin/env python3
from vosk import Model, KaldiRecognizer, SetLogLevel
import sys
import os
import wave
import json
from googletrans import Translator
import h2.connection
import h2.config
import datetime
path = '/content/vosk-api/python/example/'
SetLogLevel(-1)
#SetLogLevel(0)
if not os.path.exists("model"):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
exit (1)
#wf = wave.open(path+'/test.wav',"rb")#English test sample
wf = wave.open(path+'/test1.wav',"rb")#Chinese lang test sample
sound = path+'/test1.wav'
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
print ("Audio file must be WAV format mono PCM.")
exit (1)
model = Model("model")
rec = KaldiRecognizer(model, wf.getframerate())
#obj_num = 1
def composit(line):
##2 lines to 1 line
extract = line
text_compo = []
txt = ''
for count,text in enumerate(extract):
if (count+1)%2 == 0:
txt = text_compo.pop()
txt += ' ' +text
text_compo.append(txt)
else :
txt = text
text_compo.append(txt)
return text_compo
#print(text_compo)
def google_a(line):
line[:] = [l.strip() for l in line]
line[:] = [l.rstrip('\n') for l in line]
line[:] = [l.rstrip('\r') for l in line]
line[:] = [a for a in line if a != '']
line[:] = [l.replace('\n',' ') for l in line]
#translator = Translator()
num = 5
# global obj_num
print()
print('contents')
print()
for count, l in enumerate(line):
if count +1< num:
ln = str(count+1)
ln = ln.zfill(4)
#print('cn ',ln,' ', l) # original text
print('en ',ln,' ', l) # original text
else:
num += 5
ln = str(count+1)
ln = ln.zfill(4)
#print('cn ',ln,' ',l) # original text
print('en ',ln,' ', l) # original text
# obj_num = obj_num + 1
# print()
# print("--- translator :", obj_num)
# print()
#del translator
num = 5
print()
translator = Translator()
# obj_num += 1
for count, l in enumerate(line):
if count +1< num:
translated = translator.translate(l, dest='ja')
#print('cn ',count+1,' ', l) # original text
ln = str(count+1)
ln = ln.zfill(4)
print('ja ',ln,' ',translated.text)
else:
num += 5
translated = translator.translate(l, dest='ja')
#print('cn ',count+1,' ',l) # original text
ln = str(count+1)
ln = ln.zfill(4)
print('ja ',ln,' ',translated.text)
del translator
# obj_num = obj_num + 1
# print()
# print("--- translator :", obj_num)
# print()
translator = Translator()
del translator
print()
translator = Translator()
num = 5
# obj_num += 1
for count, l in enumerate(line):
if count +1< num:
translated = translator.translate(l, dest='zh-tw')
#print('cn ',count+1,' ', l) # original text
ln = str(count+1)
ln = ln.zfill(4)
print('tw ',ln,' ',translated.text)
else:
num += 5
translated = translator.translate(l, dest='zh-tw')
#print('cn ',count+1,' ',l) # original text
ln = str(count+1)
ln = ln.zfill(4)
print('tw ',ln,' ',translated.text)
del translator
# obj_num = obj_num + 1
# print()
# print("--- translator :", obj_num)
# print()
translator = Translator()
del translator
def google(line):
line[:] = [l.strip() for l in line]
line[:] = [l.rstrip('\n') for l in line]
line[:] = [l.rstrip('\r') for l in line]
line[:] = [a for a in line if a != '']
line[:] = [l.replace('\n',' ') for l in line]
translator = Translator()
num = 10
# global obj_num
#print(len(line))
index = int(len(line))
for count, l in enumerate(line):
if count +1< num:
if count == index - 1:
translated_1 = translator.translate(l, dest='ja')
translated_2 = translator.translate(l, dest='zh-tw')
#translated_3 = translator.translate(l, dest='ko')
#translated_4 = translator.translate(l, dest='tr')
#ln = str(count+1)
#ln = ln.zfill(4)
#print('cn ',ln,' ', l) # original text
print('en ',' ', l) # original text
print('ja ',' ', translated_1.text)
print('tw ',' ', translated_2.text)
#print('ko ',' ', translated_3.text)
#print('tr ',ln,' ', translated_4.text)
else:
continue
else:
num += 10
if count == index - 1:
translated_1 = translator.translate(l, dest='ja')
translated_2 = translator.translate(l, dest='zh-tw')
#translated_3 = translator.translate(l, dest='ko')
#translated_4 = translator.translate(l, dest='tr')
#ln = str(count+1)
#ln = ln.zfill(4)
#print('cn ',ln,' ', l) # original text
print('en ',' ', l) # original text
print('ja ',' ',translated_1.text)
print('tw ',' ', translated_2.text)
#print('ko ',' ', translated_3.text)
#print('tr ',ln,' ', translated_4.text)
del translator
# obj_num = obj_num + 1
# print()
# print("--- translator :", obj_num)
# print()
translator = Translator()
else:
continue
del translator
def fmttime(seconds):
secs = seconds #millisecs / 1000.0
d = datetime.timedelta(seconds=secs)
t = (datetime.datetime.min + d).time()
milli = t.strftime('%f')[:3]
value = t.strftime('%H:%M:%S,') + milli
return value
lines = []
line = []
oder = 1
while True:
data = wf.readframes(4000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
#print(rec.Result())
res = json.loads(rec.Result())
for tr in res.keys():
#if isinstance(res[tr], dict) == False:
#print(tr)
if tr == 'result':
#print(rec.Result())
#print(res['result'][0]['start'])
starttime = fmttime(res['result'][0]['start'])
#print(res['result'][-1]['end'])
endtime = fmttime(res['result'][-1]['end'])
print(oder)
oder += 1
print(starttime,'-->',endtime)
del starttime,endtime
if res['text'] != '':
line.append(res['text'])
print(*line)
##google(line)
lines.append(line.pop())
print()
text_compo = composit(line)
else:
#print(rec.PartialResult())
continue
#print(rec.FinalResult())
res = json.loads(rec.FinalResult())
#print(res['text'])
line.append(res['text'])
print(oder)
starttime = fmttime(res['result'][0]['start'])
endtime = fmttime(res['result'][-1]['end'])
print(starttime,'-->',endtime)
print(*line)
##google(line)
lines.append(line.pop())
#text_compo = composit(lines)
#google_a(text_compo)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment