Created
March 24, 2023 07:43
-
-
Save qxj/16acb506c2fa98a8a93e096292f99a0b to your computer and use it in GitHub Desktop.
Azure text to speech demo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# coding: utf-8 | |
""" | |
Speech synthesis samples for the Microsoft Cognitive Services Speech SDK | |
https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/samples/python/console/speech_synthesis_sample.py | |
""" | |
import os | |
import azure.cognitiveservices.speech as speechsdk | |
# Set up the subscription info for the Speech Service: | |
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION')) | |
audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True) | |
# https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=tts#prebuilt-neural-voices | |
speech_config.speech_synthesis_voice_name='zh-CN-YunfengNeural' | |
def speech_synthesis_to_file(text, file_name = "outputaudio.wav"): | |
"""performs speech synthesis to a wave file""" | |
# Creates a speech synthesizer using file as audio output. | |
file_config = speechsdk.audio.AudioOutputConfig(filename=file_name) | |
# https://docs.microsoft.com/azure/cognitive-services/speech-service/rest-text-to-speech#audio-outputs | |
ext = os.path.splitext(file_name) | |
if ext == ".mp3": | |
speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3) | |
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=file_config) | |
result = speech_synthesizer.speak_text_async(text).get() | |
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted: | |
print("Speech synthesized for text [{}], and the audio was saved to [{}]".format(text, file_name)) | |
elif result.reason == speechsdk.ResultReason.Canceled: | |
cancellation_details = result.cancellation_details | |
print("Speech synthesis canceled: {}".format(cancellation_details.reason)) | |
if cancellation_details.reason == speechsdk.CancellationReason.Error: | |
print("Error details: {}".format(cancellation_details.error_details)) | |
text = '''众所周知,超图结构有助于图中节点的高阶关系建模,并且有利于建立节点的多种关系,受到众多研究者青睐。然而,与普通图网络类似,超图中静态的启发式拓扑结构与现实中动态演变的图节点关系相悖,限制了超图的学习效果。 | |
针对上述问题,我们来看一篇阿里发表在CIKM2022上发表的文章,动态超图协同过滤。文中提出了一种可微的轻量级多层超图学习器,它可以在训练过程中在不同的层动态地学习超图结构。 | |
论文所提模型DHLCF在Yelp, Gowalla和LastFM-2K数据集上针对NDCG@10上分别取得了14.91%、14.67%和25.67%的改进。 | |
''' | |
filename = 'tts_demo.wav' | |
speech_synthesis_to_file(text, filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment