ddrscott · July 25, 2023 14:50 · ddrscott · Jul 25, 2023
diff --git a/tts-speech.py b/tts-speech.py
 import torchaudio
 from speechbrain.pretrained import Tacotron2
 from speechbrain.pretrained import HIFIGAN

 # Intialize TTS (tacotron2) and Vocoder (HiFIGAN)
 tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
 hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")

 # Running the TTS
 mel_output, mel_length, alignment = tacotron2.encode_text("This is an open-source toolkit for the development of speech technologies.")

 # Running Vocoder (spectrogram-to-waveform)
 waveforms = hifi_gan.decode_batch(mel_output)

 from IPython.display import Audio
 Audio(waveforms.squeeze(), rate=22050)
	import torchaudio
	from speechbrain.pretrained import Tacotron2
	from speechbrain.pretrained import HIFIGAN

	# Intialize TTS (tacotron2) and Vocoder (HiFIGAN)
	tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
	hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")

	# Running the TTS
	mel_output, mel_length, alignment = tacotron2.encode_text("This is an open-source toolkit for the development of speech technologies.")

	# Running Vocoder (spectrogram-to-waveform)
	waveforms = hifi_gan.decode_batch(mel_output)

	from IPython.display import Audio
	Audio(waveforms.squeeze(), rate=22050)