berinhard · November 12, 2021 15:06
diff --git a/json_to_srt.py b/json_to_srt.py
 """
 $ python json_to_srt.py <json_filepath> <output_srt>

 Reference: https://aws.amazon.com/blogs/machine-learning/create-video-subtitles-with-translation-using-machine-learning/
 """
 import sys
 import json


 def getPhrasesFromTranscript( ts ):
    # This function is intended to be called with the JSON structure output from the Transcribe service.  However,
    # if you only have the translation of the transcript, then you should call getPhrasesFromTranslation instead

    # Now create phrases from the translation
    items = ts['results']['items']

    #set up some variables for the first pass
    phrase =  {'words': []}
    phrases = []
    nPhrase = True
    x = 0
    c = 0

    print("==> Creating phrases from transcript...")

    for item in items:

        # if it is a new phrase, then get the start_time of the first item
        if nPhrase == True:
            if item["type"] == "pronunciation":
                phrase["start_time"] = getTimeCode( float(item["start_time"]) )
                nPhrase = False
            c+= 1
        else:
            # We need to determine if this pronunciation or puncuation here
            # Punctuation doesn't contain timing information, so we'll want
            # to set the end_time to whatever the last word in the phrase is.
            # Since we are reading through each word sequentially, we'll set
            # the end_time if it is a word
            if item["type"] == "pronunciation":
                phrase["end_time"] = getTimeCode( float(item["end_time"]) )

        # in either case, append the word to the phrase...
        phrase["words"].append(item['alternatives'][0]["content"])
        x += 1

        # now add the phrase to the phrases, generate a new phrase, etc.
        if x == 10:
            #print c, phrase
            phrases.append(phrase)
            phrase =  {'words': []}
            nPhrase = True
            x = 0

    for p in phrases:
        phrase = ' '.join(p['words'])
        p['words'] = phrase.replace(' ,', ',').replace(' ?', '?').replace(' .', '.').replace(' !', '!')
    return phrases


 def getTimeCode( seconds ):
    # Format and return a string that contains the converted number of seconds into SRT format

   t_hund = int(seconds % 1 * 1000)
   tseconds = int( seconds )
   tsecs = ((float( tseconds) / 60) % 1) * 60
   tmins = int( tseconds / 60 )
   return str( "%02d:%02d:%02d,%03d" % (00, tmins, int(tsecs), t_hund ))


 if __name__ == '__main__':
    assert 3 == len(sys.argv)
    json_filename = sys.argv[1]
    output_filename = sys.argv[2]

    with open(json_filename) as fd:
        data = json.load(fd)

    phrases = getPhrasesFromTranscript(data)

    with open(output_filename, 'w') as fd:
        for i, content in enumerate(phrases):
            fd.write(f'{i}\n')
            fd.write(f'{content["start_time"]} --> {content["end_time"]}\n')
            fd.write(f'{content["words"]}\n\n')
	"""
	$ python json_to_srt.py <json_filepath> <output_srt>

	Reference: https://aws.amazon.com/blogs/machine-learning/create-video-subtitles-with-translation-using-machine-learning/
	"""
	import sys
	import json


	def getPhrasesFromTranscript( ts ):
	# This function is intended to be called with the JSON structure output from the Transcribe service. However,
	# if you only have the translation of the transcript, then you should call getPhrasesFromTranslation instead

	# Now create phrases from the translation
	items = ts['results']['items']

	#set up some variables for the first pass
	phrase = {'words': []}
	phrases = []
	nPhrase = True
	x = 0
	c = 0

	print("==> Creating phrases from transcript...")

	for item in items:

	# if it is a new phrase, then get the start_time of the first item
	if nPhrase == True:
	if item["type"] == "pronunciation":
	phrase["start_time"] = getTimeCode( float(item["start_time"]) )
	nPhrase = False
	c+= 1
	else:
	# We need to determine if this pronunciation or puncuation here
	# Punctuation doesn't contain timing information, so we'll want
	# to set the end_time to whatever the last word in the phrase is.
	# Since we are reading through each word sequentially, we'll set
	# the end_time if it is a word
	if item["type"] == "pronunciation":
	phrase["end_time"] = getTimeCode( float(item["end_time"]) )

	# in either case, append the word to the phrase...
	phrase["words"].append(item['alternatives'][0]["content"])
	x += 1

	# now add the phrase to the phrases, generate a new phrase, etc.
	if x == 10:
	#print c, phrase
	phrases.append(phrase)
	phrase = {'words': []}
	nPhrase = True
	x = 0

	for p in phrases:
	phrase = ' '.join(p['words'])
	p['words'] = phrase.replace(' ,', ',').replace(' ?', '?').replace(' .', '.').replace(' !', '!')
	return phrases


	def getTimeCode( seconds ):
	# Format and return a string that contains the converted number of seconds into SRT format

	t_hund = int(seconds % 1 * 1000)
	tseconds = int( seconds )
	tsecs = ((float( tseconds) / 60) % 1) * 60
	tmins = int( tseconds / 60 )
	return str( "%02d:%02d:%02d,%03d" % (00, tmins, int(tsecs), t_hund ))


	if __name__ == '__main__':
	assert 3 == len(sys.argv)
	json_filename = sys.argv[1]
	output_filename = sys.argv[2]

	with open(json_filename) as fd:
	data = json.load(fd)

	phrases = getPhrasesFromTranscript(data)

	with open(output_filename, 'w') as fd:
	for i, content in enumerate(phrases):
	fd.write(f'{i}\n')
	fd.write(f'{content["start_time"]} --> {content["end_time"]}\n')
	fd.write(f'{content["words"]}\n\n')