Last active
January 2, 2022 15:46
-
-
Save dauuricus/33e882d8c5db6eef407ad2ab34d6b958 to your computer and use it in GitHub Desktop.
deepspeech 0.9.3 json to subrip .srt for googlecolab
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import datetime | |
from google.colab import files | |
import copy | |
import sys | |
#sys.setrecursionlimit(30000) | |
uploaded = files.upload() | |
#upfilename = 'json.txt' | |
for fn in uploaded.keys(): | |
print('User uploaded file "{name}" with length {length} bytes'.format(name=fn, length=len(uploaded[fn]))) | |
upfilename = fn | |
def fmttime(seconds): | |
secs = seconds #millisecs / 1000.0 | |
d = datetime.timedelta(seconds=secs) | |
t = (datetime.datetime.min + d).time() | |
milli = t.strftime('%f')[:3] | |
value = t.strftime('%H:%M:%S,') + milli | |
return value | |
original_stdout = sys.stdout #""" stdout backup """ | |
filename = 'subtitle.srt' #""" print subtitle text to this file """ | |
with open(upfilename, 'r') as up_f: | |
line = up_f.read() | |
jso = json.loads(line) | |
###print(jso['transcripts'][0]['words']) | |
with open(filename,'w',encoding='utf8') as down_f: | |
sys.stdout = down_f #""" stdout to file """" | |
totaltime = 0 | |
sentence = [] | |
endtime = '' | |
starttime = '' | |
lastword_time = 0 | |
lineNum = 1 | |
def list_copy(n): | |
temp = [] | |
for i in range(n): | |
c = copy.deepcopy(jso['transcripts'][i]['words']) | |
temp.append(c) | |
print_word(temp,0) | |
#""" check confidence | |
def print_word(copy,n) : | |
if (len(copy[n])) > 0: | |
#print(len(copy[n])) | |
dic = copy[n].pop(0) | |
##for ob in jso['transcripts'][n]['words']: | |
key = [v for v in dic.values()] | |
print("confidence:",str(n)+':', key[0]) | |
#print("confidence:",str(n)+':', dic) | |
n +=1 | |
if n > len(copy)-1: | |
n = 0 | |
## for ob in dic.values(): | |
## print('confidence;',str(n)+':',ob) | |
## n += 1 | |
## if n > len(copy)-1: | |
## n = 0 | |
## break | |
print_word(copy,n) | |
else: | |
for k in range(len(copy)): | |
if k > len(copy) - 1: | |
return | |
break | |
else: | |
n += 1 | |
if n > len(copy) - 1: | |
n = 0 | |
if len(copy[n]) < 1: | |
continue | |
else: | |
print_word(copy,n) | |
## n += 1 | |
## if n > len(copy)-1: | |
## n = 0 | |
## if len(copy[n]) < 1: | |
## n += 1 | |
## if n > len(copy)-1: | |
## n = 0 | |
## if len(copy[n]) < 1: | |
## return | |
## else: | |
## print_word(copy,n) | |
#list_copy(3) | |
#confidence:0 | |
confidence =jso['transcripts'][0] | |
#print(confidence) | |
for i,ob in enumerate(confidence['words']): | |
#print(i,ob) | |
talk_start = True | |
talk_end = False | |
for key in ob: | |
if key == 'word': | |
###print(jso['transcripts'][0]['words'][i][key]) | |
if ob[key] != '': | |
sentence.append(ob.get(key)) | |
###print(*sentence) | |
elif key == 'start_time': | |
###print(jso['transcripts'][0]['words'][i][key]) | |
time = ob[key] | |
if time - lastword_time < 1: | |
talk_start = False | |
talk_end = False | |
elif time - lastword_time >= 1: # 1 secons silence | |
talk_start = False | |
talk_end = True | |
### block > | |
totaltime = 0 | |
endtime = fmttime(lastword_time) | |
if len(sentence) > 1: | |
temp = sentence.pop() | |
print(lineNum) | |
lineNum += 1 | |
print(starttime,'-->',endtime2) | |
# this word goes to next caption | |
kotoba = '' | |
for word in sentence: | |
kotoba += word + ' ' | |
print(kotoba.rstrip()) | |
print() | |
sentence.clear() | |
sentence.append(temp) # new caption | |
### < block | |
if len(sentence) == 1 : | |
talk_start = True | |
talk_end = False | |
starttime = fmttime(time) | |
p_time = time | |
elif key == 'duration': | |
###print(jso['transcripts'][0]['words'][i][key]) | |
totaltime += ob[key] | |
lastword_time = p_time + totaltime | |
endtime2 = fmttime(lastword_time) | |
#print('in :',fmttime(time),'>>',*sentence) | |
#print('end :',fmttime(time+totaltime)) | |
#print('bt :',fmttime(totaltime)) | |
if totaltime >= 4: # 4 seconds speech gose to 1 caption | |
### block > | |
totaltime = 0 | |
endtime = fmttime(lastword_time) | |
print(lineNum) | |
lineNum += 1 | |
print(starttime,'-->',endtime) | |
kotoba = '' | |
for word in sentence: | |
kotoba += word + ' ' | |
print(kotoba.rstrip()) | |
print() | |
sentence.clear() | |
### < block | |
elif totaltime < 4 and i + 1 == len(confidence['words']): | |
### block > | |
totaltime = 0 | |
endtime = fmttime(lastword_time) | |
print(lineNum) | |
lineNum += 1 | |
print(starttime,'-->',endtime) | |
kotoba = '' | |
for word in sentence: | |
kotoba += word + ' ' | |
print(kotoba.rstrip()) | |
print() | |
sentence.clear() | |
### < block | |
sys.stdout = original_stdout # stdout back | |
files.download(filename) # download .srt file |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://discourse.mozilla.org/t/do-deepspeech-have-subtitle-srt-output-mode-how-can-i-merge-words-into-the-proper-sentences/68749/12?u=haywhnk