the-code-rider · November 2, 2025 13:55
diff --git a/cartesia-tts.py b/cartesia-tts.py
 import os
 import requests
 from dotenv import load_dotenv
 import numpy as np
 import soundfile as sf

 # Load environment variables from .env file
 load_dotenv()

 # Base URL for Cartesia API
 CARTESIA_TTS_URL = "https://api.cartesia.ai/tts/bytes"


 def get_headers():
    """Return headers for Cartesia API."""
    return {
        "Cartesia-Version": os.getenv("CARTESIA_VERSION", "2024-01-01"),
        "Authorization": f"Bearer {os.getenv('CARTESIA_API_TOKEN')}",
        "Content-Type": "application/json"
    }


 def build_payload(model_id: str, transcript: str, voice_id: str, mode: str = "id"):
    """Build the JSON payload for TTS request."""
    return {
        "model_id": model_id,
        "transcript": transcript,
        "voice": {
            "mode": mode,
            "id": voice_id
        },
        "language": "en",
        "generation_config": {
            "volume": 1,
            "speed": 1,
            "emotion": "neutral"
        },
        "output_format": {
            "container": "raw",
            "encoding": "pcm_f32le",
            "sample_rate": 8000
        },
        "save": False,
        "speed": "normal"
    }


 def synthesize_speech(model_id: str, transcript: str, voice_id: str, mode: str = "id"):
    """Call Cartesia API and return the response."""
    headers = get_headers()
    payload = build_payload(model_id, transcript, voice_id, mode)

    response = requests.post(CARTESIA_TTS_URL, json=payload, headers=headers)
    response.raise_for_status()  # Raise error for bad status codes

    return response.content


 def pcm_f32le_raw_to_wav(raw: bytes, path: str, sample_rate: int = 8000, channels: int = 1) -> None:
    # Interpret raw bytes as float32 little-endian and shape (N, channels)
    audio = np.frombuffer(raw, dtype="<f4")
    if channels > 1:
        audio = audio.reshape(-1, channels)
    sf.write(path, audio, samplerate=sample_rate, subtype="FLOAT")  # Writes IEEE float WAV


 if __name__ == "__main__":
    # Example usage
    model = "sonic-3"
    voice = "cc00e582-ed66-4004-8336-0175b85c85f6"
    text = "You need to wrap the raw samples into a WAV. Two ways:"
    output_path = 'speech4.wav'

    result = synthesize_speech(model, text, voice)
    pcm_f32le_raw_to_wav(result, output_path)
	import os
	import requests
	from dotenv import load_dotenv
	import numpy as np
	import soundfile as sf

	# Load environment variables from .env file
	load_dotenv()

	# Base URL for Cartesia API
	CARTESIA_TTS_URL = "https://api.cartesia.ai/tts/bytes"


	def get_headers():
	"""Return headers for Cartesia API."""
	return {
	"Cartesia-Version": os.getenv("CARTESIA_VERSION", "2024-01-01"),
	"Authorization": f"Bearer {os.getenv('CARTESIA_API_TOKEN')}",
	"Content-Type": "application/json"
	}


	def build_payload(model_id: str, transcript: str, voice_id: str, mode: str = "id"):
	"""Build the JSON payload for TTS request."""
	return {
	"model_id": model_id,
	"transcript": transcript,
	"voice": {
	"mode": mode,
	"id": voice_id
	},
	"language": "en",
	"generation_config": {
	"volume": 1,
	"speed": 1,
	"emotion": "neutral"
	},
	"output_format": {
	"container": "raw",
	"encoding": "pcm_f32le",
	"sample_rate": 8000
	},
	"save": False,
	"speed": "normal"
	}


	def synthesize_speech(model_id: str, transcript: str, voice_id: str, mode: str = "id"):
	"""Call Cartesia API and return the response."""
	headers = get_headers()
	payload = build_payload(model_id, transcript, voice_id, mode)

	response = requests.post(CARTESIA_TTS_URL, json=payload, headers=headers)
	response.raise_for_status() # Raise error for bad status codes

	return response.content


	def pcm_f32le_raw_to_wav(raw: bytes, path: str, sample_rate: int = 8000, channels: int = 1) -> None:
	# Interpret raw bytes as float32 little-endian and shape (N, channels)
	audio = np.frombuffer(raw, dtype="<f4")
	if channels > 1:
	audio = audio.reshape(-1, channels)
	sf.write(path, audio, samplerate=sample_rate, subtype="FLOAT") # Writes IEEE float WAV


	if __name__ == "__main__":
	# Example usage
	model = "sonic-3"
	voice = "cc00e582-ed66-4004-8336-0175b85c85f6"
	text = "You need to wrap the raw samples into a WAV. Two ways:"
	output_path = 'speech4.wav'

	result = synthesize_speech(model, text, voice)
	pcm_f32le_raw_to_wav(result, output_path)
No results found