Skip to content

Instantly share code, notes, and snippets.

@THeK3nger
Last active October 20, 2024 11:38
Show Gist options
  • Save THeK3nger/882a31f52bb002dac155ad95529c3680 to your computer and use it in GitHub Desktop.
Save THeK3nger/882a31f52bb002dac155ad95529c3680 to your computer and use it in GitHub Desktop.
Unity script for using ElevenLabs TTS service
/**
* An example script on how to use ElevenLabs APIs in a Unity script.
*
* More info at https://www.davideaversa.it/blog/elevenlabs-text-to-speech-unity-script/
*/
using System;
using System.Collections;
using System.Text;
using Newtonsoft.Json;
using UnityEngine;
using UnityEngine.Events;
using UnityEngine.Networking;
public class ElevenlabsAPI : MonoBehaviour {
[SerializeField]
private string _voiceId;
[SerializeField]
private string _apiKey;
[SerializeField]
private string _apiUrl = "https://api.elevenlabs.io";
private AudioClip _audioClip;
// If true, the audio will be streamed instead of downloaded
// Unfortunately, Unity has some problems with streaming audio
// but I left this option here in case you want to try it.
public bool Streaming;
[Range(0, 4)]
public int LatencyOptimization;
// This event is used to broadcast the received AudioClip
public UnityEvent<AudioClip> AudioReceived;
public ElevenlabsAPI(string apiKey, string voiceId) {
_apiKey = apiKey;
_voiceId = voiceId;
}
public void GetAudio(string text) {
StartCoroutine(DoRequest(text));
}
IEnumerator DoRequest(string message) {
var postData = new TextToSpeechRequest {
text = message,
model_id = "eleven_monolingual_v1"
};
// TODO: This could be easily exposed in the Unity inspector,
// but I had no use for it in my work demo.
var voiceSetting = new VoiceSettings {
stability = 0,
similarity_boost = 0,
style = 0.5f,
use_speaker_boost = true
};
postData.voice_settings = voiceSetting;
var json = JsonConvert.SerializeObject(postData);
var uH = new UploadHandlerRaw(Encoding.ASCII.GetBytes(json));
var stream = (Streaming) ? "/stream" : "";
var url = $"{_apiUrl}/v1/text-to-speech/{_voiceId}{stream}?optimize_streaming_latency={LatencyOptimization}";
var request = UnityWebRequest.Post(url, json);
var downloadHandler = new DownloadHandlerAudioClip(url, AudioType.MPEG);
if (Streaming) {
downloadHandler.streamAudio = true;
}
request.uploadHandler = uH;
request.downloadHandler = downloadHandler;
request.SetRequestHeader("Content-Type", "application/json");
request.SetRequestHeader("xi-api-key", _apiKey);
request.SetRequestHeader("Accept", "audio/mpeg");
yield return request.SendWebRequest();
if (request.result != UnityWebRequest.Result.Success) {
Debug.LogError("Error downloading audio: " + request.error);
yield break;
}
AudioClip audioClip = downloadHandler.audioClip;
AudioReceived.Invoke(audioClip);
request.Dispose();
}
[Serializable]
public class TextToSpeechRequest {
public string text;
public string model_id; // eleven_monolingual_v1
public VoiceSettings voice_settings;
}
[Serializable]
public class VoiceSettings {
public int stability; // 0
public int similarity_boost; // 0
public float style; // 0.5
public bool use_speaker_boost; // true
}
}
using System;
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.Networking;
using Newtonsoft.Json;
/**
* Optional utility class to fetch all the available Voices in the ElevenLabs account.
* This may be useful if you want to allow the player to customize the voice at runtime.
*/
public class ElevenVoices : MonoBehaviour
{
public List<VoiceExposed> Voices = new List<VoiceExposed>();
[SerializeField]
private string _apiKey;
[SerializeField]
private string _apiUrl = "https://api.elevenlabs.io";
void Start()
{
// In this example we populate the Voices list on Start. But you can do that on demand
// if you prefer. After all, we do not need to do this every time we start the game.
StartCoroutine(DoRequest());
}
IEnumerator DoRequest()
{
var url = $"{_apiUrl}/v1/voices";
using (UnityWebRequest request = UnityWebRequest.Get(url))
{
request.SetRequestHeader("xi-api-key", _apiKey);
yield return request.SendWebRequest();
if (request.result != UnityWebRequest.Result.Success)
{
Debug.LogError("Error fetching voices: " + request.error);
yield break;
}
var jsonResponse = request.downloadHandler.text;
var response = JsonUtility.FromJson<ApiResponse>(jsonResponse);
foreach (var voice in response.voices)
{
Voices.Add(new VoiceExposed
{
voice_id = voice.voice_id,
name = voice.name
});
}
}
}
[Serializable]
public class VoiceExposed
{
public string voice_id;
public string name;
}
[Serializable]
public class Voice
{
public string voice_id;
public string name;
public List<Sample> samples;
// Define other properties as needed
}
[Serializable]
public class Sample
{
public string sample_id;
public string file_name;
public string mime_type;
public int size_bytes;
public string hash;
// Define other properties as needed
}
[Serializable]
public class ApiResponse
{
public List<Voice> voices;
}
}
@firdodev
Copy link

how to use it

@THeK3nger
Copy link
Author

I forgot to link to my original post 😅

In short, though:

  1. You attach this script to an empty game object and fill in the _voiceId and _apiKey fields (the data is in your ElevenLabs account).
  2. Then, from any other script, you can invoke the GetAudio(string) method and receive the result through the AudioReceived event.

For example, this is a minimal client script where I have a text input field and a button. When I click the button, I send the text to ElevenLab and play the result.

public class TestElevenLabsUI : MonoBehaviour
{
    
    public Button sendButton;
    public InputField inputField;
    public ElevenlabsAPI tts;
    
    void Start()
    {
        // Add the PlayClip handler to the ElevenLabsAPI script
        tts.AudioReceived.AddListener(PlayClip);
        
        // Add the Button's onClick handler 
        sendButton.onClick.AddListener( () => {
            tts.GetAudio(inputField.text);
            inputField.text = "";
        });
    }

    public void PlayClip(AudioClip clip)
    {
        AudioSource.PlayClipAtPoint(clip, Camera.main.transform.position);
    }
}

@rafe4
Copy link

rafe4 commented Feb 27, 2024

Hey! Ive been trying to use this but getting 400 bad request, i have another library setup with elevenLabs that is working so i know its not the API key thats wrong, the url seems right too, any clue whats causing the porblem? Many thanks!

@THeK3nger
Copy link
Author

Hi @rafe4! Yes, a 400 Bad Request error makes me think the problem depends on the payload. If you add a Debug.Log(json) after line 60, you should be able to see if the serialized JSON makes sense. On the top of my head, I don't know what may be wrong: maybe the message contains invalid characters?

@stefanoditore
Copy link

hello, it work perfectly. I just have some question, I need that the tts work with streaming. Do you have idea about how to implement streaming in unity?

@rafe4
Copy link

rafe4 commented Mar 4, 2024

JSON

Hey really appreciate you getting back to me! I figurd it out, it was to do with how i was setting up the voices, thanks anyway and thanks for sharing this script!

@THeK3nger
Copy link
Author

THeK3nger commented Mar 5, 2024

hello, it work perfectly. I just have some question, I need that the tts work with streaming. Do you have idea about how to implement streaming in unity?

From the research I did at the time, it looked like an issue with Unity itself. WWW.GetAudioClip simply cannot stream, or it waits until 90% of the file is downloaded (nullifying any streaming advantage).

A solution would be to use a different library for the streaming and playback of the audio clip, such as https://github.com/naudio/NAudio . But I never experimented with that (luckily, for us, streaming was not required). I wish you good luck. :)

PS: This is a simple Unity package using NAudio I have found online. You can look at the source and see how they do it. https://github.com/AstralSkies/RadioUnityStream/

@stefanoditore
Copy link

stefanoditore commented Mar 5, 2024 via email

@FarahSibtain
Copy link

Thank you for sharing. This was exactly what I wanted

@JanetGilbert
Copy link

Hey! Ive been trying to use this but getting 400 bad request, i have another library setup with elevenLabs that is working so i know its not the API key thats wrong, the url seems right too, any clue whats causing the porblem? Many thanks!

I got this at first: you have to remember to set the voice id: Here's the list of valid options https://elevenlabs.io/docs/voices/premade-voices

@YolgavekGovinda
Copy link

THANK YOU! I have been trying to get this working for days now and your scripts worked like a charm. I see a couple of bad request posts. For me this was whatever gameobject had the ElevenLabsAPI.cs script attached must have API URL set to https://api.elevenlabs.io the /v1/speechtotext is appended in the rest of the script. I made the mistake of using the full URL.

@THeK3nger
Copy link
Author

Hey! Ive been trying to use this but getting 400 bad request, i have another library setup with elevenLabs that is working so i know its not the API key thats wrong, the url seems right too, any clue whats causing the porblem? Many thanks!

I got this at first: you have to remember to set the voice id: Here's the list of valid options https://elevenlabs.io/docs/voices/premade-voices

I also added an optional utility class to fetch the Voices from Unity itself. Check the ElevenVoices.cs file.

@farahfarizi24
Copy link

I'm adapting this code for another project, and received an error 400 on the protocol. This appears on the runtime only when running a built project. Do you have any idea what could have gone wrong?

@itsabhiram
Copy link

i'm getting thsi error
Error downloading audio: Cannot connect to destination host

can someone plese help

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment