Last active
November 12, 2021 15:06
-
-
Save berinhard/7678b841919891225b86552c0978b877 to your computer and use it in GitHub Desktop.
Gist do convert Amazon's transcribe JSON to SRT format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
$ python json_to_srt.py <json_filepath> <output_srt> | |
Reference: https://aws.amazon.com/blogs/machine-learning/create-video-subtitles-with-translation-using-machine-learning/ | |
""" | |
import sys | |
import json | |
def getPhrasesFromTranscript( ts ): | |
# This function is intended to be called with the JSON structure output from the Transcribe service. However, | |
# if you only have the translation of the transcript, then you should call getPhrasesFromTranslation instead | |
# Now create phrases from the translation | |
items = ts['results']['items'] | |
#set up some variables for the first pass | |
phrase = {'words': []} | |
phrases = [] | |
nPhrase = True | |
x = 0 | |
c = 0 | |
print("==> Creating phrases from transcript...") | |
for item in items: | |
# if it is a new phrase, then get the start_time of the first item | |
if nPhrase == True: | |
if item["type"] == "pronunciation": | |
phrase["start_time"] = getTimeCode( float(item["start_time"]) ) | |
nPhrase = False | |
c+= 1 | |
else: | |
# We need to determine if this pronunciation or puncuation here | |
# Punctuation doesn't contain timing information, so we'll want | |
# to set the end_time to whatever the last word in the phrase is. | |
# Since we are reading through each word sequentially, we'll set | |
# the end_time if it is a word | |
if item["type"] == "pronunciation": | |
phrase["end_time"] = getTimeCode( float(item["end_time"]) ) | |
# in either case, append the word to the phrase... | |
phrase["words"].append(item['alternatives'][0]["content"]) | |
x += 1 | |
# now add the phrase to the phrases, generate a new phrase, etc. | |
if x == 10: | |
#print c, phrase | |
phrases.append(phrase) | |
phrase = {'words': []} | |
nPhrase = True | |
x = 0 | |
for p in phrases: | |
phrase = ' '.join(p['words']) | |
p['words'] = phrase.replace(' ,', ',').replace(' ?', '?').replace(' .', '.').replace(' !', '!') | |
return phrases | |
def getTimeCode( seconds ): | |
# Format and return a string that contains the converted number of seconds into SRT format | |
t_hund = int(seconds % 1 * 1000) | |
tseconds = int( seconds ) | |
tsecs = ((float( tseconds) / 60) % 1) * 60 | |
tmins = int( tseconds / 60 ) | |
return str( "%02d:%02d:%02d,%03d" % (00, tmins, int(tsecs), t_hund )) | |
if __name__ == '__main__': | |
assert 3 == len(sys.argv) | |
json_filename = sys.argv[1] | |
output_filename = sys.argv[2] | |
with open(json_filename) as fd: | |
data = json.load(fd) | |
phrases = getPhrasesFromTranscript(data) | |
with open(output_filename, 'w') as fd: | |
for i, content in enumerate(phrases): | |
fd.write(f'{i}\n') | |
fd.write(f'{content["start_time"]} --> {content["end_time"]}\n') | |
fd.write(f'{content["words"]}\n\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment