Last active
September 10, 2024 19:50
-
-
Save xjohjrdy/11b13c9d9f3a2d84b1f22148c429b163 to your computer and use it in GitHub Desktop.
返回的音频没有文件头,所以我随便加的一个文件头,但显示的音频时长有问题。如果播放器不能正常播放,可以使用Chrome播放。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding:utf-8 -*- | |
import sys | |
reload(sys) | |
sys.setdefaultencoding('utf-8') | |
from ws4py.client.threadedclient import WebSocketClient | |
import binascii | |
class WSClient(WebSocketClient): | |
def __init__(self, url, text, filename): | |
self.fp = open(filename, 'wb') | |
self.fp.write(binascii.unhexlify('524946460000000057415645666d74201000000001000200803e000000fa0000040010006461746100000000')) | |
self.text = text | |
super(WSClient, self).__init__(url) | |
def opened(self): | |
self.send('Content-Type:application/json; charset=utf-8\r\n\r\nPath:speech.config\r\n\r\n{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"false","wordBoundaryEnabled":"true"},"outputFormat":"audio-24khz-48kbitrate-mono-mp3"}}}}\r\n') | |
self.send("X-RequestId:fe83fbefb15c7739fe674d9f3e81d38f\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice name='Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)'><prosody pitch='+0Hz' rate ='+0%' volume='+0%'>"+self.text+"</prosody></voice></speak>\r\n") | |
def received_message(self, m): | |
if 'turn.end' in m.data: | |
self.close() | |
self.fp.close() | |
elif 'Path:audio\r\n' in m.data: | |
self.fp.write(m.data.split('Path:audio\r\n')[1]) | |
else: | |
# print(m) | |
pass | |
if __name__ == '__main__': | |
url = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4' | |
text = '浙江温州,浙江温州,最大皮革厂,江南皮革厂倒闭了!老板黄鹤吃喝嫖赌,欠下了3.5个亿,带着他的小姨子跑了。我们没有办法,拿着钱包抵工资。原价都是三百多、二百多、一百多的钱包,通通二十块,通通二十块!黄鹤你不是人,我们辛辛苦苦给你干了大半年,你不发工资,你还我血汗钱,还我血汗钱!' | |
filename = '/tmp/test.wav' | |
ws = WSClient(url, text, filename) | |
ws.connect() | |
ws.run_forever() |
line 5, in
from ws4py.client.threadedclient import WebSocketClient
ModuleNotFoundError: No module named 'ws4py'
sudo pip install ws4py
保存为mp3行了
请问下大家在哪里 wordBoundaryEnabled 词边界位置信息。
如何重用一个 wss connection
每次wss connect 都要花费不少时间。
能否用同一个 wss 做多次的tts 请求
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
请问如何解决的呢,我的也是16khz 32kbps的