Created
July 25, 2023 14:50
-
-
Save ddrscott/8d7f88727b9d9bd6a416cb5b9fb1b507 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torchaudio | |
from speechbrain.pretrained import Tacotron2 | |
from speechbrain.pretrained import HIFIGAN | |
# Intialize TTS (tacotron2) and Vocoder (HiFIGAN) | |
tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts") | |
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder") | |
# Running the TTS | |
mel_output, mel_length, alignment = tacotron2.encode_text("This is an open-source toolkit for the development of speech technologies.") | |
# Running Vocoder (spectrogram-to-waveform) | |
waveforms = hifi_gan.decode_batch(mel_output) | |
from IPython.display import Audio | |
Audio(waveforms.squeeze(), rate=22050) |
Author
ddrscott
commented
Jul 25, 2023
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment