Skip to content

Instantly share code, notes, and snippets.

@hiepxanh
Last active January 27, 2025 03:41
Show Gist options
  • Save hiepxanh/a4a011a0fa0d72acbe167fa23e65e54e to your computer and use it in GitHub Desktop.
Save hiepxanh/a4a011a0fa0d72acbe167fa23e65e54e to your computer and use it in GitHub Desktop.
Add to public>scripts/extensions/tts/kokoro-deepinfra.js. \n also need declare at `public/scripts/extensions/tts/index.js` add `"import { KokoroTtsProvider } from './kokoro.js';"` then add to `const ttsProviders = { Kokoro: KokoroTtsProvider`
import { getRequestHeaders } from '../../../script.js';
import { callGenericPopup, POPUP_RESULT, POPUP_TYPE } from '../../popup.js';
import { getPreviewString, saveTtsProviderSettings } from './index.js';
export { KokoroTtsProvider };
class KokoroTtsProvider {
settings;
voices = [];
separator = ' . ';
audioElement = document.createElement('audio');
defaultSettings = {
voiceMap: {},
model: 'kokoro', // Default to your model
speed: 1,
available_voices: ['af_sky', 'af_bella', 'af', 'af_nicole', 'af_sarah', 'af_sky+af_bella', 'af_sky+af_nicole', 'af_sky+af_nicole+af_bella', 'bf_emma', 'bf_isabella', 'af_sky+af_nicole+af_bella+bf_isabella', 'bf_isabella+af_sky+af_nicole+af_bella+bf_isabella', 'bf_isabella+bf_isabella+bf_isabella+af_sky+af_nicole+af_bella+bf_isabella'], // Voices supported by your model
provider_endpoint: 'http://localhost:8880/v1/audio/speech', // Your Python API endpoint
};
get settingsHtml() {
let html = `
<label for="openai_compatible_tts_endpoint">Provider Endpoint:</label>
<div class="flex-container alignItemsCenter">
<div class="flex1">
<input id="openai_compatible_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
</div>
<div id="openai_compatible_tts_key" class="menu_button menu_button_icon">
<i class="fa-solid fa-key"></i>
<span>API Key</span>
</div>
</div>
<label for="openai_compatible_model">Model:</label>
<input id="openai_compatible_model" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.model}"/>
<label for="openai_compatible_tts_voices">Available Voices (comma separated):</label>
<input id="openai_compatible_tts_voices" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.available_voices.join()}"/>
<label for="openai_compatible_tts_speed">Speed: <span id="openai_compatible_tts_speed_output"></span></label>
<input type="range" id="openai_compatible_tts_speed" value="1" min="0.25" max="4" step="0.05">`;
return html;
}
async loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
$('#openai_compatible_tts_endpoint').val(this.settings.provider_endpoint);
$('#openai_compatible_tts_endpoint').on('input', () => { this.onSettingsChange(); });
$('#openai_compatible_model').val(this.defaultSettings.model);
$('#openai_compatible_model').on('input', () => { this.onSettingsChange(); });
$('#openai_compatible_tts_voices').val(this.settings.available_voices.join());
$('#openai_compatible_tts_voices').on('input', () => { this.onSettingsChange(); });
$('#openai_compatible_tts_speed').val(this.settings.speed);
$('#openai_compatible_tts_speed').on('input', () => {
this.onSettingsChange();
});
$('#openai_compatible_tts_speed_output').text(this.settings.speed);
await this.checkReady();
console.debug('OpenAI Compatible TTS: Settings loaded');
}
onSettingsChange() {
// Update dynamically
this.settings.provider_endpoint = String($('#openai_compatible_tts_endpoint').val());
this.settings.model = String($('#openai_compatible_model').val());
this.settings.available_voices = String($('#openai_compatible_tts_voices').val()).split(',');
this.settings.speed = Number($('#openai_compatible_tts_speed').val());
$('#openai_compatible_tts_speed_output').text(this.settings.speed);
saveTtsProviderSettings();
}
async checkReady() {
await this.fetchTtsVoiceObjects();
}
async onRefreshClick() {
return;
}
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(
oaicVoice => oaicVoice.name == voiceName,
)[0];
if (!match) {
throw `TTS Voice name ${voiceName} not found`;
}
return match;
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
}
async fetchTtsVoiceObjects() {
return this.settings.available_voices.map(v => {
return { name: v, voice_id: v, lang: 'en-US' };
});
}
async previewTtsVoice(voiceId) {
this.audioElement.pause();
this.audioElement.currentTime = 0;
const text = getPreviewString('en-US');
const response = await this.fetchTtsGeneration(text, voiceId);
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const audio = await response.blob();
const url = URL.createObjectURL(audio);
this.audioElement.src = url;
this.audioElement.play();
this.audioElement.onended = () => URL.revokeObjectURL(url);
}
async fetchTtsGeneration(inputText, voiceId) {
console.info(`Generating new TTS for voice_id ${voiceId}`);
const response = await fetch(this.settings.provider_endpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: this.settings.model,
voice: voiceId,
text: inputText,
response_format: 'wav',
speed: this.settings.speed,
}),
});
if (!response.ok) {
const responseText = await response.text();
console.error('Response Text:', responseText);
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${responseText}`);
}
const result = await response.json();
console.log('Result:', result);
if (!result.audio) {
throw new Error('Audio field is missing or null in the response');
}
try {
// Clean up the base64 string
let base64String = result.audio;
if (base64String.includes(',')) {
base64String = base64String.split(',')[1];
}
base64String = base64String.replace(/-/g, '+').replace(/_/g, '/');
while (base64String.length % 4) {
base64String += '=';
}
// Convert base64 to raw binary data
const binaryString = atob(base64String);
const bytes = new Uint8Array(binaryString.length);
for (let i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
// Create blob with proper MIME type
const mimeType = result.response_format === 'mp3' ? 'audio/mpeg' : 'audio/wav';
const audioBlob = new Blob([bytes], { type: mimeType });
// Create a Response object from the Blob
return new Response(audioBlob, {
status: 200,
headers: {
'Content-Type': mimeType
}
});
} catch (error) {
console.error('Base64 decoding error:', error);
console.error('Base64 string preview:', result.audio.substring(0, 100) + '...');
throw new Error('Failed to decode base64 audio data: ' + error.message);
}
}
}
@hiepxanh
Copy link
Author

image

also need declare at public/scripts/extensions/tts/index.js add "import { KokoroTtsProvider } from './kokoro.js';" then add to const ttsProviders = { Kokoro: KokoroTtsProvider

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment