Last active
December 30, 2021 14:50
-
-
Save royshil/3c62fb0d72d7ec083f488425da314bb0 to your computer and use it in GitHub Desktop.
Google Cloud Speech JSON to .str converter script, with timestamp keeping and multiple files support
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python3 | |
import os | |
import json | |
import sys | |
import argparse | |
import math | |
import subprocess | |
from tqdm import tqdm | |
parser = argparse.ArgumentParser(description='Convert google ml speech .json to .srt subtitles.') | |
parser.add_argument('files', metavar='F', type=str, nargs='+', | |
help='json files for converting') | |
parser.add_argument('--concat', dest='concat', action='store_const', | |
const=True, default=False, | |
help='concatenate the subtitles to a single .srt output file') | |
parser.add_argument('--fix_timestamps', '-t', dest='fix_timestamps', action='store_const', | |
const=True, default=False, | |
help='fix timestamps of consequent files') | |
args = parser.parse_args() | |
def mktime(x): | |
hours = math.floor(x / 3600) | |
minutes = math.floor((x - hours * 3600) / 60) | |
seconds = x % 60 | |
return ("%02d:%02d:%02.03f" % (hours, minutes, seconds)).replace('.', ',') | |
output_file = None | |
if args.concat and len(args.files) > 0: | |
output_filename = os.path.splitext(args.files[0])[0] + '.srt' | |
output_file = open(output_filename, 'w') | |
sub_id = 0 | |
# a single subtitle line template | |
sub_line_template = """%d | |
%s --> %s | |
%s | |
""" | |
def write_sub(sub, sub_start_time, sub_end_time): | |
global sub_id, sub_line_template | |
sentence = ' '.join(sub) | |
sub_id += 1 | |
output_file.write(sub_line_template % (sub_id, | |
mktime(sub_start_time), | |
mktime(sub_end_time).replace('.', ','), | |
sentence)) | |
for file_name in tqdm(args.files): | |
with open(file_name, 'r') as in_file: | |
trans_json = json.load(in_file) | |
if not args.concat: | |
output_filename = os.path.splitext(file_name)[0] + '.srt' | |
output_file = open(output_filename, 'w') | |
sub_id = 0 | |
# sometimes the results are in /results and sometimes in /response/results | |
results_element = trans_json['results'] if 'results' in trans_json else \ | |
trans_json['response']['results'] | |
start_timestamp = 0 | |
if args.fix_timestamps: | |
flac_file = os.path.splitext(file_name)[0] + '.flac' | |
p = subprocess.run(["ffprobe", "-i", flac_file, "-show_frames", "-show_entries", | |
"frame=pkt_pts_time", "-of", "csv=p=0", "-hide_banner", "-v", "0"], | |
capture_output=True) | |
start_timestamp = float(p.stdout.splitlines()[0]) | |
for i, a in enumerate(results_element): | |
sent = a['alternatives'][0] | |
words = sent['words'] # usually 20-30 words | |
num_words = len(words) | |
sub = [words[0]['word']] | |
sub_start_time = float(words[0]['startTime'][:-1]) + start_timestamp | |
sub_end_time = float(words[0]['endTime'][:-1]) + start_timestamp | |
sub_start_wi = 0 | |
sub_end_wi = 0 | |
wi = 0 # word index in sentence | |
while wi < num_words: | |
wi += 1 | |
if wi >= num_words: | |
if len(sub) > 0: | |
# write out the remaining words in the sentence | |
write_sub(sub, sub_start_time, sub_end_time) | |
break | |
next_word_start_time = float(words[wi]['startTime'][:-1]) + start_timestamp | |
next_word_end_time = float(words[wi]['endTime'][:-1]) + start_timestamp | |
# max 10 seconds and max 10 words | |
if sub_end_wi - sub_start_wi < 10 and next_word_start_time - sub_start_time < 10: | |
# add next word | |
sub.append(words[wi]['word']) | |
sub_end_time = next_word_end_time | |
sub_end_wi = wi | |
else: | |
# write a sub without next word | |
write_sub(sub, sub_start_time, sub_end_time) | |
# add next word to next sub | |
if wi < num_words: | |
sub = [words[wi]['word']] | |
sub_start_time = float(words[wi]['startTime'][:-1]) + start_timestamp | |
sub_end_time = float(words[wi]['endTime'][:-1]) + start_timestamp | |
sub_start_wi = wi | |
sub_end_wi = wi | |
if not args.concat: | |
output_file.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
When I execute the code I receive an error :