Skip to content

Instantly share code, notes, and snippets.

@MattSegal
Created August 1, 2020 08:40
Show Gist options
  • Save MattSegal/8b7ac768300101b49708e151c3f28e7d to your computer and use it in GitHub Desktop.
Save MattSegal/8b7ac768300101b49708e151c3f28e7d to your computer and use it in GitHub Desktop.
"""
Synthesizes speech from the input string of text.
from reader.services.translate.polly import text_to_speech_mp3
reading = Reading.objects.last()
with open('./test.mp3', 'wb') as f:
text_to_speech_mp3(reading, f)
"""
from io import BytesIO
import boto3
from pydub import AudioSegment
from .transcript import get_sample_parts, get_transcript_parts
polly = boto3.Session(region_name='ap-southeast-2').client('polly')
VOICE_NAME = 'Matthew'
MAX_REQUEST_LENGTH = 2500
def text_to_speech_mp3(reading, out_file):
paragraphs = get_transcript_parts(reading)
mp3_parts = get_mp3_parts(paragraphs)
write_reading_audio(mp3_parts, out_file)
def write_reading_audio(mp3_parts, out_file):
speech_audio = None
half_sec_pause = AudioSegment.silent(duration=500)
for mp3_part in mp3_parts:
audio_segment = AudioSegment.from_mp3(mp3_part)
if speech_audio:
speech_audio += audio_segment
else:
speech_audio = audio_segment
# Add a 0.5s pause after a paragraph
speech_audio += half_sec_pause
speech_audio.export(out_file, format='mp3')
def get_mp3_parts(paragraphs):
# TODO - multi thread this to get better performance
parts = []
for counter, paragraph in enumerate(paragraphs):
text_batches = batch(paragraph, MAX_REQUEST_LENGTH)
for text_batch in text_batches:
parts.append(text_to_mp3_bytes(text_batch))
return parts
def text_to_mp3_bytes(text):
assert len(text) <= 5000
response = polly.synthesize_speech(
VoiceId=VOICE_NAME,
OutputFormat='mp3',
Text =text
)
return BytesIO(response['AudioStream'].read())
def batch(iterable, n=1):
l = len(iterable)
for ndx in range(0, l, n):
yield iterable[ndx : min(ndx + n, l)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment