Last active
January 27, 2025 03:41
-
-
Save hiepxanh/a4a011a0fa0d72acbe167fa23e65e54e to your computer and use it in GitHub Desktop.
Add to public>scripts/extensions/tts/kokoro-deepinfra.js. \n also need declare at `public/scripts/extensions/tts/index.js` add `"import { KokoroTtsProvider } from './kokoro.js';"` then add to `const ttsProviders = { Kokoro: KokoroTtsProvider`
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { getRequestHeaders } from '../../../script.js'; | |
import { callGenericPopup, POPUP_RESULT, POPUP_TYPE } from '../../popup.js'; | |
import { getPreviewString, saveTtsProviderSettings } from './index.js'; | |
export { KokoroTtsProvider }; | |
class KokoroTtsProvider { | |
settings; | |
voices = []; | |
separator = ' . '; | |
audioElement = document.createElement('audio'); | |
defaultSettings = { | |
voiceMap: {}, | |
model: 'kokoro', // Default to your model | |
speed: 1, | |
available_voices: ['af_sky', 'af_bella', 'af', 'af_nicole', 'af_sarah', 'af_sky+af_bella', 'af_sky+af_nicole', 'af_sky+af_nicole+af_bella', 'bf_emma', 'bf_isabella', 'af_sky+af_nicole+af_bella+bf_isabella', 'bf_isabella+af_sky+af_nicole+af_bella+bf_isabella', 'bf_isabella+bf_isabella+bf_isabella+af_sky+af_nicole+af_bella+bf_isabella'], // Voices supported by your model | |
provider_endpoint: 'http://localhost:8880/v1/audio/speech', // Your Python API endpoint | |
}; | |
get settingsHtml() { | |
let html = ` | |
<label for="openai_compatible_tts_endpoint">Provider Endpoint:</label> | |
<div class="flex-container alignItemsCenter"> | |
<div class="flex1"> | |
<input id="openai_compatible_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/> | |
</div> | |
<div id="openai_compatible_tts_key" class="menu_button menu_button_icon"> | |
<i class="fa-solid fa-key"></i> | |
<span>API Key</span> | |
</div> | |
</div> | |
<label for="openai_compatible_model">Model:</label> | |
<input id="openai_compatible_model" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.model}"/> | |
<label for="openai_compatible_tts_voices">Available Voices (comma separated):</label> | |
<input id="openai_compatible_tts_voices" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.available_voices.join()}"/> | |
<label for="openai_compatible_tts_speed">Speed: <span id="openai_compatible_tts_speed_output"></span></label> | |
<input type="range" id="openai_compatible_tts_speed" value="1" min="0.25" max="4" step="0.05">`; | |
return html; | |
} | |
async loadSettings(settings) { | |
// Populate Provider UI given input settings | |
if (Object.keys(settings).length == 0) { | |
console.info('Using default TTS Provider settings'); | |
} | |
// Only accept keys defined in defaultSettings | |
this.settings = this.defaultSettings; | |
for (const key in settings) { | |
if (key in this.settings) { | |
this.settings[key] = settings[key]; | |
} else { | |
throw `Invalid setting passed to TTS Provider: ${key}`; | |
} | |
} | |
$('#openai_compatible_tts_endpoint').val(this.settings.provider_endpoint); | |
$('#openai_compatible_tts_endpoint').on('input', () => { this.onSettingsChange(); }); | |
$('#openai_compatible_model').val(this.defaultSettings.model); | |
$('#openai_compatible_model').on('input', () => { this.onSettingsChange(); }); | |
$('#openai_compatible_tts_voices').val(this.settings.available_voices.join()); | |
$('#openai_compatible_tts_voices').on('input', () => { this.onSettingsChange(); }); | |
$('#openai_compatible_tts_speed').val(this.settings.speed); | |
$('#openai_compatible_tts_speed').on('input', () => { | |
this.onSettingsChange(); | |
}); | |
$('#openai_compatible_tts_speed_output').text(this.settings.speed); | |
await this.checkReady(); | |
console.debug('OpenAI Compatible TTS: Settings loaded'); | |
} | |
onSettingsChange() { | |
// Update dynamically | |
this.settings.provider_endpoint = String($('#openai_compatible_tts_endpoint').val()); | |
this.settings.model = String($('#openai_compatible_model').val()); | |
this.settings.available_voices = String($('#openai_compatible_tts_voices').val()).split(','); | |
this.settings.speed = Number($('#openai_compatible_tts_speed').val()); | |
$('#openai_compatible_tts_speed_output').text(this.settings.speed); | |
saveTtsProviderSettings(); | |
} | |
async checkReady() { | |
await this.fetchTtsVoiceObjects(); | |
} | |
async onRefreshClick() { | |
return; | |
} | |
async getVoice(voiceName) { | |
if (this.voices.length == 0) { | |
this.voices = await this.fetchTtsVoiceObjects(); | |
} | |
const match = this.voices.filter( | |
oaicVoice => oaicVoice.name == voiceName, | |
)[0]; | |
if (!match) { | |
throw `TTS Voice name ${voiceName} not found`; | |
} | |
return match; | |
} | |
async generateTts(text, voiceId) { | |
const response = await this.fetchTtsGeneration(text, voiceId); | |
return response; | |
} | |
async fetchTtsVoiceObjects() { | |
return this.settings.available_voices.map(v => { | |
return { name: v, voice_id: v, lang: 'en-US' }; | |
}); | |
} | |
async previewTtsVoice(voiceId) { | |
this.audioElement.pause(); | |
this.audioElement.currentTime = 0; | |
const text = getPreviewString('en-US'); | |
const response = await this.fetchTtsGeneration(text, voiceId); | |
if (!response.ok) { | |
throw new Error(`HTTP ${response.status}`); | |
} | |
const audio = await response.blob(); | |
const url = URL.createObjectURL(audio); | |
this.audioElement.src = url; | |
this.audioElement.play(); | |
this.audioElement.onended = () => URL.revokeObjectURL(url); | |
} | |
async fetchTtsGeneration(inputText, voiceId) { | |
console.info(`Generating new TTS for voice_id ${voiceId}`); | |
const response = await fetch(this.settings.provider_endpoint, { | |
method: 'POST', | |
headers: { | |
'Content-Type': 'application/json' | |
}, | |
body: JSON.stringify({ | |
model: this.settings.model, | |
voice: voiceId, | |
text: inputText, | |
response_format: 'wav', | |
speed: this.settings.speed, | |
}), | |
}); | |
if (!response.ok) { | |
const responseText = await response.text(); | |
console.error('Response Text:', responseText); | |
toastr.error(response.statusText, 'TTS Generation Failed'); | |
throw new Error(`HTTP ${response.status}: ${responseText}`); | |
} | |
const result = await response.json(); | |
console.log('Result:', result); | |
if (!result.audio) { | |
throw new Error('Audio field is missing or null in the response'); | |
} | |
try { | |
// Clean up the base64 string | |
let base64String = result.audio; | |
if (base64String.includes(',')) { | |
base64String = base64String.split(',')[1]; | |
} | |
base64String = base64String.replace(/-/g, '+').replace(/_/g, '/'); | |
while (base64String.length % 4) { | |
base64String += '='; | |
} | |
// Convert base64 to raw binary data | |
const binaryString = atob(base64String); | |
const bytes = new Uint8Array(binaryString.length); | |
for (let i = 0; i < binaryString.length; i++) { | |
bytes[i] = binaryString.charCodeAt(i); | |
} | |
// Create blob with proper MIME type | |
const mimeType = result.response_format === 'mp3' ? 'audio/mpeg' : 'audio/wav'; | |
const audioBlob = new Blob([bytes], { type: mimeType }); | |
// Create a Response object from the Blob | |
return new Response(audioBlob, { | |
status: 200, | |
headers: { | |
'Content-Type': mimeType | |
} | |
}); | |
} catch (error) { | |
console.error('Base64 decoding error:', error); | |
console.error('Base64 string preview:', result.audio.substring(0, 100) + '...'); | |
throw new Error('Failed to decode base64 audio data: ' + error.message); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
also need declare at
public/scripts/extensions/tts/index.js
add"import { KokoroTtsProvider } from './kokoro.js';"
then add toconst ttsProviders = { Kokoro: KokoroTtsProvider