Created
August 1, 2020 08:40
-
-
Save MattSegal/8b7ac768300101b49708e151c3f28e7d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Synthesizes speech from the input string of text. | |
from reader.services.translate.polly import text_to_speech_mp3 | |
reading = Reading.objects.last() | |
with open('./test.mp3', 'wb') as f: | |
text_to_speech_mp3(reading, f) | |
""" | |
from io import BytesIO | |
import boto3 | |
from pydub import AudioSegment | |
from .transcript import get_sample_parts, get_transcript_parts | |
polly = boto3.Session(region_name='ap-southeast-2').client('polly') | |
VOICE_NAME = 'Matthew' | |
MAX_REQUEST_LENGTH = 2500 | |
def text_to_speech_mp3(reading, out_file): | |
paragraphs = get_transcript_parts(reading) | |
mp3_parts = get_mp3_parts(paragraphs) | |
write_reading_audio(mp3_parts, out_file) | |
def write_reading_audio(mp3_parts, out_file): | |
speech_audio = None | |
half_sec_pause = AudioSegment.silent(duration=500) | |
for mp3_part in mp3_parts: | |
audio_segment = AudioSegment.from_mp3(mp3_part) | |
if speech_audio: | |
speech_audio += audio_segment | |
else: | |
speech_audio = audio_segment | |
# Add a 0.5s pause after a paragraph | |
speech_audio += half_sec_pause | |
speech_audio.export(out_file, format='mp3') | |
def get_mp3_parts(paragraphs): | |
# TODO - multi thread this to get better performance | |
parts = [] | |
for counter, paragraph in enumerate(paragraphs): | |
text_batches = batch(paragraph, MAX_REQUEST_LENGTH) | |
for text_batch in text_batches: | |
parts.append(text_to_mp3_bytes(text_batch)) | |
return parts | |
def text_to_mp3_bytes(text): | |
assert len(text) <= 5000 | |
response = polly.synthesize_speech( | |
VoiceId=VOICE_NAME, | |
OutputFormat='mp3', | |
Text =text | |
) | |
return BytesIO(response['AudioStream'].read()) | |
def batch(iterable, n=1): | |
l = len(iterable) | |
for ndx in range(0, l, n): | |
yield iterable[ndx : min(ndx + n, l)] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment