MattSegal · August 1, 2020 08:40
diff --git a/tts.py b/tts.py
 """
 Synthesizes speech from the input string of text.
 from reader.services.translate.polly import text_to_speech_mp3
 reading = Reading.objects.last()
 with open('./test.mp3', 'wb') as f:
    text_to_speech_mp3(reading, f)
 """
 from io import BytesIO

 import boto3
 from pydub import AudioSegment

 from .transcript import get_sample_parts, get_transcript_parts

 polly = boto3.Session(region_name='ap-southeast-2').client('polly')

 VOICE_NAME = 'Matthew'
 MAX_REQUEST_LENGTH = 2500

 def text_to_speech_mp3(reading, out_file):
    paragraphs = get_transcript_parts(reading)
    mp3_parts = get_mp3_parts(paragraphs)
    write_reading_audio(mp3_parts, out_file)


 def write_reading_audio(mp3_parts, out_file):
    speech_audio = None
    half_sec_pause = AudioSegment.silent(duration=500)
    for mp3_part in mp3_parts:
        audio_segment = AudioSegment.from_mp3(mp3_part)
        if speech_audio:
            speech_audio += audio_segment
        else:
            speech_audio = audio_segment

        # Add a 0.5s pause after a paragraph
        speech_audio += half_sec_pause

    speech_audio.export(out_file, format='mp3')


 def get_mp3_parts(paragraphs):
    # TODO - multi thread this to get better performance
    parts = []
    for counter, paragraph in enumerate(paragraphs):
        text_batches = batch(paragraph, MAX_REQUEST_LENGTH)
        for text_batch in text_batches:
            parts.append(text_to_mp3_bytes(text_batch))

    return parts


 def text_to_mp3_bytes(text):
    assert len(text) <= 5000
    response = polly.synthesize_speech(
        VoiceId=VOICE_NAME,
        OutputFormat='mp3', 
        Text =text
    )
    return BytesIO(response['AudioStream'].read())


 def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx : min(ndx + n, l)]
	"""
	Synthesizes speech from the input string of text.
	from reader.services.translate.polly import text_to_speech_mp3
	reading = Reading.objects.last()
	with open('./test.mp3', 'wb') as f:
	text_to_speech_mp3(reading, f)
	"""
	from io import BytesIO

	import boto3
	from pydub import AudioSegment

	from .transcript import get_sample_parts, get_transcript_parts

	polly = boto3.Session(region_name='ap-southeast-2').client('polly')

	VOICE_NAME = 'Matthew'
	MAX_REQUEST_LENGTH = 2500

	def text_to_speech_mp3(reading, out_file):
	paragraphs = get_transcript_parts(reading)
	mp3_parts = get_mp3_parts(paragraphs)
	write_reading_audio(mp3_parts, out_file)


	def write_reading_audio(mp3_parts, out_file):
	speech_audio = None
	half_sec_pause = AudioSegment.silent(duration=500)
	for mp3_part in mp3_parts:
	audio_segment = AudioSegment.from_mp3(mp3_part)
	if speech_audio:
	speech_audio += audio_segment
	else:
	speech_audio = audio_segment

	# Add a 0.5s pause after a paragraph
	speech_audio += half_sec_pause

	speech_audio.export(out_file, format='mp3')


	def get_mp3_parts(paragraphs):
	# TODO - multi thread this to get better performance
	parts = []
	for counter, paragraph in enumerate(paragraphs):
	text_batches = batch(paragraph, MAX_REQUEST_LENGTH)
	for text_batch in text_batches:
	parts.append(text_to_mp3_bytes(text_batch))

	return parts


	def text_to_mp3_bytes(text):
	assert len(text) <= 5000
	response = polly.synthesize_speech(
	VoiceId=VOICE_NAME,
	OutputFormat='mp3',
	Text =text
	)
	return BytesIO(response['AudioStream'].read())


	def batch(iterable, n=1):
	l = len(iterable)
	for ndx in range(0, l, n):
	yield iterable[ndx : min(ndx + n, l)]