Last active
April 22, 2025 18:53
-
-
Save pathikrit/bc0ecae2189f3f167a298465d3740e55 to your computer and use it in GitHub Desktop.
Cloud TTS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
import json | |
import base64 | |
def speak(text: str, voice: str = "Joanna"): | |
polly_client = boto3.client('polly') | |
def synthensize_speech(speech_marks: bool): | |
response = polly_client.synthesize_speech( | |
VoiceId=voice, | |
OutputFormat='json' if speech_marks else 'mp3', | |
Text=text, | |
Engine='generative', | |
SpeechMarkTypes=['word'] if speech_marks else [] | |
) | |
if speech_marks: | |
return [json.loads(line.decode('utf-8')) for line in response['AudioStream'].iter_lines() if line] | |
else: | |
audio_bytes = response['AudioStream'].read() | |
return f"data:audio/mpeg;base64,{base64.b64encode(audio_bytes).decode('utf-8')}" | |
return { | |
"speech_marks": synthensize_speech(speech_marks=True), | |
"b64_audio_uri": synthensize_speech(speech_marks=False) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from google.cloud import texttospeech_v1beta1 as tts | |
from google.api_core.client_options import ClientOptions | |
def speak( | |
text: str, | |
chirp3_voice: str = "Zephyr", | |
speed: float = 0.8, | |
lang: str = "en-US" | |
): | |
""" | |
:param chirp3_voice: https://cloud.google.com/text-to-speech/docs/chirp3-hd | |
""" | |
client = tts.TextToSpeechClient(client_options=ClientOptions(api_key=os.getenv("GEMINI_API_KEY"))) | |
request=tts.SynthesizeSpeechRequest( | |
input=tts.SynthesisInput(text=text), | |
voice=tts.VoiceSelectionParams(language_code=lang, name="-".join([lang, "Chirp3", "HD", chirp3_voice])), | |
audio_config=tts.AudioConfig(audio_encoding=tts.AudioEncoding.MP3, speaking_rate=speed), | |
enable_time_pointing=[tts.SynthesizeSpeechRequest.TimepointType.SSML_MARK], | |
) | |
response = client.synthesize_speech(request) | |
return { | |
"speech_marks": [{"mark_name": tp.mark_name, "time_seconds": tp.time_seconds} for tp in response.timepoints], | |
"b64_audio_uri": f"data:audio/mpeg;base64,{base64.b64encode(response.audio_content).decode('utf-8')}" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment