Skip to content

Instantly share code, notes, and snippets.

@mode-mercury
Created May 28, 2025 19:17
Show Gist options
  • Select an option

  • Save mode-mercury/78ac4b47594fb2a73346c34071986850 to your computer and use it in GitHub Desktop.

Select an option

Save mode-mercury/78ac4b47594fb2a73346c34071986850 to your computer and use it in GitHub Desktop.
Untitled
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>GhostVoice Studio - Voice Changer & Cloning</title>
<!-- TensorFlow.js and ML libraries -->
<script src="https://cdn.jsdelivr.net/npm/@tensorflow/[email protected]/dist/tf.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/web/meyda.min.js"></script>
<style>
:root {
--color-primary: #5D5CDE;
--color-primary-hover: #4a49b8;
--color-secondary: #4338ca;
--color-accent: #7c3aed;
--color-background: #ffffff;
--color-foreground: #f8fafc;
--color-surface: #f1f5f9;
--color-surface-hover: #e2e8f0;
--color-text-primary: #1e293b;
--color-text-secondary: #64748b;
--color-border: #cbd5e1;
--color-success: #10b981;
--color-error: #ef4444;
--color-warning: #f59e0b;
--shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.05);
--shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06);
--shadow-md: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
--border-radius: 8px;
--font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
}
@media (prefers-color-scheme: dark) {
:root {
--color-background: #0f172a;
--color-foreground: #1e293b;
--color-surface: #334155;
--color-surface-hover: #475569;
--color-text-primary: #f8fafc;
--color-text-secondary: #cbd5e1;
--color-border: #1e293b;
}
}
body {
font-family: var(--font-family);
background-color: var(--color-background);
color: var(--color-text-primary);
margin: 0;
padding: 0;
line-height: 1.5;
transition: background-color 0.3s ease;
}
* {
box-sizing: border-box;
}
.container {
width: 100%;
max-width: 1000px;
margin: 0 auto;
padding: 1rem;
}
header {
background-color: var(--color-primary);
color: white;
padding: 1rem;
box-shadow: var(--shadow);
position: sticky;
top: 0;
z-index: 100;
}
.header-content {
display: flex;
justify-content: space-between;
align-items: center;
}
.app-title {
display: flex;
align-items: center;
font-size: 1.5rem;
font-weight: 700;
}
.app-title svg {
margin-right: 0.5rem;
}
.card {
background-color: var(--color-foreground);
border-radius: var(--border-radius);
box-shadow: var(--shadow);
margin-bottom: 1rem;
overflow: hidden;
}
.card-header {
background-color: var(--color-surface);
padding: 1rem;
font-weight: 500;
display: flex;
justify-content: space-between;
align-items: center;
}
.card-title {
display: flex;
align-items: center;
font-size: 1rem;
margin: 0;
}
.card-title svg {
margin-right: 0.5rem;
}
.card-body {
padding: 1rem;
}
.form-group {
margin-bottom: 1rem;
}
label {
display: block;
margin-bottom: 0.5rem;
font-size: 0.875rem;
font-weight: 500;
}
input[type="text"],
input[type="number"],
input[type="file"],
select,
textarea {
width: 100%;
padding: 0.5rem;
font-size: 1rem;
border: 1px solid var(--color-border);
border-radius: var(--border-radius);
background-color: var(--color-surface);
color: var(--color-text-primary);
}
input[type="text"]:focus,
input[type="number"]:focus,
select:focus,
textarea:focus {
outline: none;
border-color: var(--color-primary);
box-shadow: 0 0 0 2px rgba(93, 92, 222, 0.2);
}
input[type="range"] {
-webkit-appearance: none;
width: 100%;
height: 6px;
border-radius: 5px;
background: var(--color-surface-hover);
outline: none;
}
input[type="range"]::-webkit-slider-thumb {
-webkit-appearance: none;
width: 16px;
height: 16px;
border-radius: 50%;
background: var(--color-primary);
cursor: pointer;
border: none;
}
input[type="range"]::-moz-range-thumb {
width: 16px;
height: 16px;
border-radius: 50%;
background: var(--color-primary);
cursor: pointer;
border: none;
}
.btn {
padding: 0.5rem 1rem;
border: none;
border-radius: var(--border-radius);
font-size: 0.875rem;
font-weight: 500;
cursor: pointer;
display: inline-flex;
align-items: center;
justify-content: center;
transition: all 0.2s;
}
.btn svg {
margin-right: 0.5rem;
}
.btn-primary {
background-color: var(--color-primary);
color: white;
}
.btn-primary:hover {
background-color: var(--color-primary-hover);
}
.btn-secondary {
background-color: var(--color-surface);
color: var(--color-text-primary);
border: 1px solid var(--color-border);
}
.btn-secondary:hover {
background-color: var(--color-surface-hover);
}
.btn-accent {
background-color: var(--color-accent);
color: white;
}
.btn-accent:hover {
background-color: #6528e0;
}
.btn-danger {
background-color: var(--color-error);
color: white;
}
.btn-danger:hover {
background-color: #dc2626;
}
.btn:disabled {
opacity: 0.6;
cursor: not-allowed;
}
.tabs {
display: flex;
border-bottom: 1px solid var(--color-border);
margin-bottom: 1rem;
overflow-x: auto;
}
.tab-btn {
padding: 0.75rem 1rem;
border: none;
background: none;
color: var(--color-text-secondary);
cursor: pointer;
font-weight: 500;
white-space: nowrap;
display: flex;
align-items: center;
}
.tab-btn svg {
margin-right: 0.5rem;
width: 1rem;
height: 1rem;
}
.tab-btn.active {
color: var(--color-primary);
border-bottom: 2px solid var(--color-primary);
}
.tab-content {
display: none;
}
.tab-content.active {
display: block;
}
.toggle-switch {
position: relative;
display: inline-block;
width: 40px;
height: 20px;
}
.toggle-switch input {
opacity: 0;
width: 0;
height: 0;
}
.toggle-slider {
position: absolute;
cursor: pointer;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: var(--color-surface-hover);
border-radius: 20px;
transition: .3s;
}
.toggle-slider:before {
position: absolute;
content: "";
height: 16px;
width: 16px;
left: 2px;
bottom: 2px;
background-color: white;
border-radius: 50%;
transition: .3s;
}
input:checked + .toggle-slider {
background-color: var(--color-primary);
}
input:checked + .toggle-slider:before {
transform: translateX(20px);
}
.row {
display: flex;
flex-wrap: wrap;
margin: 0 -0.5rem;
}
.col {
flex: 1;
padding: 0 0.5rem;
}
.col-6 {
flex: 0 0 50%;
max-width: 50%;
padding: 0 0.5rem;
}
.preset-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(120px, 1fr));
gap: 0.75rem;
}
.preset-btn {
border: 1px solid var(--color-border);
border-radius: var(--border-radius);
background-color: var(--color-foreground);
padding: 0.75rem;
text-align: center;
cursor: pointer;
transition: all 0.2s;
}
.preset-btn:hover {
background-color: var(--color-surface-hover);
}
.preset-btn.active {
border: 2px solid var(--color-primary);
box-shadow: 0 0 0 2px rgba(93, 92, 222, 0.2);
}
.preset-emoji {
font-size: 1.5rem;
margin-bottom: 0.25rem;
}
.preset-name {
font-size: 0.75rem;
}
.audio-meter {
width: 100%;
height: 24px;
background-color: var(--color-surface-hover);
border-radius: var(--border-radius);
overflow: hidden;
}
.audio-meter-level {
height: 100%;
background-color: var(--color-primary);
width: 0%;
transition: width 0.1s ease;
}
.voice-indicator {
width: 12px;
height: 12px;
border-radius: 50%;
background-color: #9ca3af;
transition: background-color 0.2s;
}
.voice-active {
background-color: var(--color-primary);
box-shadow: 0 0 5px var(--color-primary);
}
.floating-controls {
position: fixed;
bottom: 1rem;
right: 1rem;
background-color: var(--color-foreground);
border-radius: var(--border-radius);
box-shadow: var(--shadow-md);
padding: 0.75rem;
z-index: 100;
width: 200px;
display: none;
}
.floating-controls.active {
display: block;
}
.notification {
position: fixed;
bottom: 1rem;
left: 50%;
transform: translateX(-50%);
background-color: var(--color-primary);
color: white;
padding: 0.75rem 1rem;
border-radius: var(--border-radius);
box-shadow: var(--shadow-md);
z-index: 1000;
opacity: 0;
transition: opacity 0.3s;
}
.notification.show {
opacity: 1;
}
.slider-value {
font-family: monospace;
min-width: 2.5em;
text-align: right;
}
.call-status {
display: flex;
align-items: center;
margin-top: 0.5rem;
}
.status-indicator {
width: 10px;
height: 10px;
border-radius: 50%;
margin-right: 0.5rem;
}
.status-disconnected {
background-color: var(--color-error);
}
.status-connecting {
background-color: var(--color-warning);
}
.status-connected {
background-color: var(--color-success);
}
@media (max-width: 768px) {
.row {
flex-direction: column;
}
.col, .col-6 {
flex: 0 0 100%;
max-width: 100%;
}
}
.overlay {
position: fixed;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: rgba(0, 0, 0, 0.5);
display: flex;
align-items: center;
justify-content: center;
z-index: 1000;
display: none;
}
.overlay-content {
background-color: var(--color-foreground);
border-radius: var(--border-radius);
max-width: 90%;
width: 400px;
padding: 1.5rem;
box-shadow: var(--shadow-md);
}
.overlay-footer {
display: flex;
justify-content: flex-end;
gap: 0.5rem;
margin-top: 1rem;
}
.test-tone-btn {
margin-top: 0.5rem;
padding: 0.25rem 0.5rem;
font-size: 0.75rem;
}
.error-banner {
background-color: rgba(239, 68, 68, 0.1);
color: var(--color-error);
border: 1px solid var(--color-error);
padding: 0.75rem;
border-radius: var(--border-radius);
margin-bottom: 1rem;
font-size: 0.875rem;
}
.success-banner {
background-color: rgba(16, 185, 129, 0.1);
color: var(--color-success);
border: 1px solid var(--color-success);
padding: 0.75rem;
border-radius: var(--border-radius);
margin-bottom: 1rem;
font-size: 0.875rem;
}
/* Waveform visualization */
.waveform {
width: 100%;
height: 60px;
background-color: var(--color-surface);
border-radius: var(--border-radius);
position: relative;
overflow: hidden;
}
.waveform-canvas {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
}
/* Voice profile styling */
.voice-profiles-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
gap: 1rem;
}
.voice-profile-card {
border: 1px solid var(--color-border);
border-radius: var(--border-radius);
overflow: hidden;
transition: all 0.2s;
}
.voice-profile-card:hover {
box-shadow: var(--shadow-md);
transform: translateY(-2px);
}
.voice-profile-card.active {
border: 2px solid var(--color-primary);
}
.voice-profile-header {
padding: 0.75rem;
background-color: var(--color-surface);
border-bottom: 1px solid var(--color-border);
}
.voice-profile-title {
font-weight: 500;
margin: 0;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.voice-profile-body {
padding: 0.75rem;
}
.voice-profile-footer {
padding: 0.75rem;
display: flex;
justify-content: space-between;
border-top: 1px solid var(--color-border);
}
/* Voice clone specific styles */
.record-btn {
display: flex;
align-items: center;
justify-content: center;
width: 60px;
height: 60px;
border-radius: 50%;
background-color: var(--color-error);
color: white;
border: none;
cursor: pointer;
transition: all 0.2s;
}
.record-btn:hover {
transform: scale(1.05);
}
.record-btn.recording {
animation: pulse 1.5s infinite;
}
.record-btn.paused {
background-color: var(--color-warning);
}
@keyframes pulse {
0% {
box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.4);
}
70% {
box-shadow: 0 0 0 10px rgba(239, 68, 68, 0);
}
100% {
box-shadow: 0 0 0 0 rgba(239, 68, 68, 0);
}
}
.recording-timer {
font-family: monospace;
font-size: 1.25rem;
margin: 0 1rem;
}
.recording-waveform {
flex: 1;
height: 40px;
background-color: var(--color-surface);
border-radius: var(--border-radius);
overflow: hidden;
position: relative;
}
.clone-sample-item {
display: flex;
align-items: center;
padding: 0.5rem;
border: 1px solid var(--color-border);
border-radius: var(--border-radius);
margin-bottom: 0.5rem;
}
.clone-sample-item button {
padding: 0.25rem;
margin-left: 0.5rem;
}
.training-progress {
width: 100%;
height: 8px;
background-color: var(--color-surface);
border-radius: 9999px;
overflow: hidden;
margin: 1rem 0;
}
.training-progress-bar {
height: 100%;
background-color: var(--color-primary);
width: 0%;
transition: width 0.3s;
}
.collapsible-section {
margin-bottom: 1rem;
}
.collapsible-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 0.5rem;
background-color: var(--color-surface);
border-radius: var(--border-radius);
cursor: pointer;
}
.collapsible-content {
padding: 0.5rem;
border: 1px solid var(--color-border);
border-top: none;
border-bottom-left-radius: var(--border-radius);
border-bottom-right-radius: var(--border-radius);
display: none;
}
.badge {
display: inline-block;
padding: 0.25rem 0.5rem;
border-radius: 9999px;
font-size: 0.75rem;
font-weight: 500;
}
.badge-primary {
background-color: var(--color-primary);
color: white;
}
.badge-accent {
background-color: var(--color-accent);
color: white;
}
.badge-secondary {
background-color: var(--color-surface);
color: var(--color-text-primary);
}
/* TTS Section */
.tts-container {
margin-top: 1rem;
}
.tts-input {
width: 100%;
min-height: 80px;
margin-bottom: 0.5rem;
resize: vertical;
}
/* Status pills */
.status-pill {
display: inline-flex;
align-items: center;
padding: 0.25rem 0.5rem;
border-radius: 9999px;
font-size: 0.75rem;
background-color: var(--color-surface);
}
.status-pill.active {
background-color: rgba(16, 185, 129, 0.2);
color: var(--color-success);
}
.status-pill.inactive {
background-color: rgba(239, 68, 68, 0.1);
color: var(--color-error);
}
.status-pill svg {
margin-right: 0.25rem;
width: 12px;
height: 12px;
}
/* TensorFlow.js specific styles */
.ml-badge {
display: inline-flex;
align-items: center;
padding: 0.25rem 0.5rem;
border-radius: 9999px;
font-size: 0.75rem;
background-color: #7c3aed;
color: white;
}
.model-loading {
display: inline-block;
width: 12px;
height: 12px;
border: 2px solid rgba(255, 255, 255, 0.3);
border-radius: 50%;
border-top-color: #fff;
animation: spin 1s ease-in-out infinite;
margin-right: 0.5rem;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
.quality-selector label.active {
border: 2px solid var(--color-primary);
box-shadow: 0 0 0 2px rgba(93, 92, 222, 0.2);
}
</style>
</head>
<body>
<!-- Header -->
<header>
<div class="header-content container">
<div class="app-title">
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z"/>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"/>
<line x1="12" y1="19" x2="12" y2="22"/>
</svg>
GhostVoice Studio
<span style="font-size: 0.75rem; margin-left: 0.5rem; padding: 0.1rem 0.5rem; background-color: rgba(255,255,255,0.2); border-radius: 999px;">Pro</span>
</div>
</div>
</header>
<!-- Main Content -->
<main>
<div class="container">
<!-- Audio API Support Check -->
<div id="apiErrorBanner" class="error-banner" style="display: none;">
<strong>Web Audio API not supported:</strong> Your browser doesn't support the required audio features. Please try Chrome, Firefox, or Edge.
</div>
<!-- Tab Navigation -->
<div class="tabs">
<button id="voiceTabBtn" class="tab-btn active">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z"/>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"/>
<line x1="12" y1="19" x2="12" y2="22"/>
</svg>
Voice Changer
</button>
<button id="cloneTabBtn" class="tab-btn">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M17 21v-2a4 4 0 0 0-4-4H5a4 4 0 0 0-4 4v2"></path>
<circle cx="9" cy="7" r="4"></circle>
<path d="M23 21v-2a4 4 0 0 0-3-3.87"></path>
<path d="M16 3.13a4 4 0 0 1 0 7.75"></path>
</svg>
Voice Cloning
</button>
<button id="callTabBtn" class="tab-btn">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M22 16.92v3a2 2 0 0 1-2.18 2 19.79 19.79 0 0 1-8.63-3.07 19.5 19.5 0 0 1-6-6 19.79 19.79 0 0 1-3.07-8.67A2 2 0 0 1 4.11 2h3a2 2 0 0 1 2 1.72 12.84 12.84 0 0 0 .7 2.81 2 2 0 0 1-.45 2.11L8.09 9.91a16 16 0 0 0 6 6l1.27-1.27a2 2 0 0 1 2.11-.45 12.84 12.84 0 0 0 2.81.7A2 2 0 0 1 22 16.92z"/>
</svg>
Calls
</button>
<button id="ttsTabBtn" class="tab-btn">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M9 17c-2.3 0-4-1.79-4-4a5.07 5.07 0 0 1 5-5c3.39 0 6 2.54 6 6v1c0 2-1.3 3-3 3s-3-1-3-3"/>
<line x1="5" y1="12" x2="5" y2="12"/>
<line x1="19" y1="12" x2="19" y2="12"/>
</svg>
Text to Speech
</button>
</div>
<!-- Voice Changer Tab Content -->
<div id="voiceTab" class="tab-content active">
<!-- Device Selection -->
<div class="card">
<div class="card-header">
<h2 class="card-title">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z"/>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"/>
<line x1="12" y1="19" x2="12" y2="22"/>
</svg>
Audio Devices
</h2>
<div>
<span id="processingStatus" class="status-pill inactive">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<circle cx="12" cy="12" r="10"/>
<line x1="12" y1="8" x2="12" y2="12"/>
<line x1="12" y1="16" x2="12.01" y2="16"/>
</svg>
Not Processing
</span>
</div>
</div>
<div class="card-body">
<div id="deviceErrorMessage" class="error-banner" style="display: none;"></div>
<div class="row">
<div class="col">
<div class="form-group">
<label for="inputDevice">Microphone</label>
<select id="inputDevice"></select>
<button id="refreshDevices" class="btn btn-secondary" style="margin-top: 0.5rem; font-size: 0.75rem;">
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M23 4v6h-6"/>
<path d="M1 20v-6h6"/>
<path d="M3.51 9a9 9 0 0 1 14.85-3.36L23 10M1 14l4.64 4.36A9 9 0 0 0 20.49 15"/>
</svg>
Refresh Devices
</button>
</div>
</div>
<div class="col">
<div class="form-group">
<label for="outputDevice">Speakers</label>
<select id="outputDevice"></select>
<button id="testAudio" class="btn btn-secondary test-tone-btn">
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/>
<path d="M15.54 8.46a5 5 0 0 1 0 7.07"/>
<path d="M19.07 4.93a10 10 0 0 1 0 14.14"/>
</svg>
Test Audio
</button>
</div>
</div>
</div>
<div style="margin-top: 0.75rem;">
<label style="margin-bottom: 0.5rem; display: block;">Microphone Test</label>
<div class="audio-meter">
<div id="inputMeter" class="audio-meter-level"></div>
</div>
<div class="row" style="margin-top: 0.5rem;">
<div class="col">
<div class="form-group" style="margin-bottom: 0;">
<label for="inputGain" style="font-size: 0.75rem;">Input Gain</label>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<input type="range" id="inputGain" min="0" max="2" step="0.1" value="1">
<span id="inputGainValue" class="slider-value">1.0</span>
</div>
</div>
</div>
<div class="col">
<div class="form-group" style="margin-bottom: 0;">
<label for="monitorGain" style="font-size: 0.75rem;">Monitor Volume</label>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<input type="range" id="monitorGain" min="0" max="1" step="0.1" value="0">
<span id="monitorGainValue" class="slider-value">0.0</span>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<!-- Voice Profiles -->
<div class="card">
<div class="card-header">
<h2 class="card-title">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M17 21v-2a4 4 0 0 0-4-4H5a4 4 0 0 0-4 4v2"></path>
<circle cx="9" cy="7" r="4"></circle>
<path d="M23 21v-2a4 4 0 0 0-3-3.87"></path>
<path d="M16 3.13a4 4 0 0 1 0 7.75"></path>
</svg>
Voice Profiles
</h2>
</div>
<div class="card-body">
<div id="voiceProfiles" class="voice-profiles-grid">
<!-- Will be populated with voice profiles -->
<div class="voice-profile-card active">
<div class="voice-profile-header">
<h3 class="voice-profile-title">Standard Voice Changer</h3>
</div>
<div class="voice-profile-body">
<p style="margin-top: 0; font-size: 0.875rem; color: var(--color-text-secondary);">Manual voice adjustments</p>
<span class="badge badge-secondary">Built-in</span>
</div>
<div class="voice-profile-footer">
<button class="btn btn-secondary btn-sm" disabled>Default</button>
<span style="font-size: 0.75rem; color: var(--color-success);">Active</span>
</div>
</div>
</div>
<div style="margin-top: 1rem;">
<button id="createNewProfile" class="btn btn-primary">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<circle cx="12" cy="12" r="10"/>
<line x1="12" y1="8" x2="12" y2="16"/>
<line x1="8" y1="12" x2="16" y2="12"/>
</svg>
Create New Profile
</button>
<button id="importProfile" class="btn btn-secondary">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
<polyline points="17 8 12 3 7 8"/>
<line x1="12" y1="3" x2="12" y2="15"/>
</svg>
Import Profile
</button>
</div>
</div>
</div>
<!-- Voice Transformation -->
<div class="card">
<div class="card-header">
<h2 class="card-title">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M19 11V9a7 7 0 0 0-7-7v0a7 7 0 0 0-7 7v2"/>
<path d="M5 11a2 2 0 0 0-2 2v2a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-2a2 2 0 0 0-2-2"/>
<path d="M12 19v3"/>
<line x1="8" y1="22" x2="16" y2="22"/>
</svg>
Voice Settings
</h2>
</div>
<div class="card-body">
<div class="form-group">
<label for="pitchShift">Pitch Shift</label>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<input type="range" id="pitchShift" min="-12" max="12" step="1" value="0">
<span id="pitchShiftValue" class="slider-value">0</span>
</div>
</div>
<div class="form-group">
<label for="formantShift">Formant Shift</label>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<input type="range" id="formantShift" min="-12" max="12" step="1" value="0">
<span id="formantShiftValue" class="slider-value">0</span>
</div>
</div>
<div class="row">
<div class="col">
<div class="form-group">
<label for="voiceEffect">Effect Type</label>
<select id="voiceEffect">
<option value="none">None</option>
<option value="robot">Robot</option>
<option value="reverb">Reverb</option>
<option value="telephone">Telephone</option>
<option value="megaphone">Megaphone</option>
<option value="alien">Alien</option>
<option value="underwater">Underwater</option>
<option value="chipmunk">Chipmunk</option>
</select>
</div>
</div>
<div class="col">
<div class="form-group">
<label for="effectMix">Effect Mix</label>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<input type="range" id="effectMix" min="0" max="100" step="1" value="50">
<span id="effectMixValue" class="slider-value">50%</span>
</div>
</div>
</div>
</div>
<div class="form-group">
<label for="genderSlider">Gender Morph</label>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<span style="font-size: 0.75rem;">Male</span>
<input type="range" id="genderSlider" min="0" max="100" step="1" value="50" style="flex: 1;">
<span style="font-size: 0.75rem;">Female</span>
<span id="genderValue" class="slider-value">50%</span>
</div>
</div>
<div style="margin-top: 1rem;">
<div class="preset-grid">
<button data-preset="normal" class="preset-btn active">
<div class="preset-emoji">👤</div>
<div class="preset-name">Normal</div>
</button>
<button data-preset="deep" class="preset-btn">
<div class="preset-emoji">🔊</div>
<div class="preset-name">Deep Voice</div>
</button>
<button data-preset="helium" class="preset-btn">
<div class="preset-emoji">🎈</div>
<div class="preset-name">Helium</div>
</button>
<button data-preset="robot" class="preset-btn">
<div class="preset-emoji">🤖</div>
<div class="preset-name">Robot</div>
</button>
<button data-preset="telephone" class="preset-btn">
<div class="preset-emoji">📞</div>
<div class="preset-name">Phone</div>
</button>
<button data-preset="alien" class="preset-btn">
<div class="preset-emoji">👽</div>
<div class="preset-name">Alien</div>
</button>
<button data-preset="chipmunk" class="preset-btn">
<div class="preset-emoji">🐿️</div>
<div class="preset-name">Chipmunk</div>
</button>
<button data-preset="underwater" class="preset-btn">
<div class="preset-emoji">🌊</div>
<div class="preset-name">Underwater</div>
</button>
</div>
</div>
</div>
</div>
<!-- Advanced Settings -->
<div class="card">
<div class="card-header">
<h2 class="card-title">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<circle cx="12" cy="12" r="3"></circle>
<path d="M19.4 15a1.65 1.65 0 0 0 .33 1.82l.06.06a2 2 0 0 1 0 2.83 2 2 0 0 1-2.83 0l-.06-.06a1.65 1.65 0 0 0-1.82-.33 1.65 1.65 0 0 0-1 1.51V21a2 2 0 0 1-2 2 2 2 0 0 1-2-2v-.09A1.65 1.65 0 0 0 9 19.4a1.65 1.65 0 0 0-1.82.33l-.06.06a2 2 0 0 1-2.83 0 2 2 0 0 1 0-2.83l.06-.06a1.65 1.65 0 0 0 .33-1.82 1.65 1.65 0 0 0-1.51-1H3a2 2 0 0 1-2-2 2 2 0 0 1 2-2h.09A1.65 1.65 0 0 0 4.6 9a1.65 1.65 0 0 0-.33-1.82l-.06-.06a2 2 0 0 1 0-2.83 2 2 0 0 1 2.83 0l.06.06a1.65 1.65 0 0 0 1.82.33H9a1.65 1.65 0 0 0 1-1.51V3a2 2 0 0 1 2-2 2 2 0 0 1 2 2v.09a1.65 1.65 0 0 0 1 1.51 1.65 1.65 0 0 0 1.82-.33l.06-.06a2 2 0 0 1 2.83 0 2 2 0 0 1 0 2.83l-.06.06a1.65 1.65 0 0 0-.33 1.82V9a1.65 1.65 0 0 0 1.51 1H21a2 2 0 0 1 2 2 2 2 0 0 1-2 2h-.09a1.65 1.65 0 0 0-1.51 1z"></path>
</svg>
Advanced Settings
</h2>
</div>
<div class="card-body">
<div class="row">
<div class="col">
<div class="form-group">
<label for="bufferSize">Buffer Size</label>
<select id="bufferSize">
<option value="256">256 (Very Low Latency)</option>
<option value="512" selected>512 (Low Latency)</option>
<option value="1024">1024 (Standard)</option>
<option value="2048">2048 (High Quality)</option>
</select>
</div>
</div>
<div class="col">
<div class="form-group">
<label>Voice Processing</label>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<button id="startProcessingBtn" class="btn btn-primary" style="flex: 1;">Start Processing</button>
<button id="stopProcessingBtn" class="btn btn-danger" style="flex: 1; display: none;">Stop Processing</button>
</div>
</div>
</div>
</div>
<div class="row">
<div class="col-6">
<div class="form-group">
<label style="margin-bottom: 0.5rem;">Noise Suppression</label>
<div style="display: flex; align-items: center;">
<label class="toggle-switch">
<input id="noiseSuppressionToggle" type="checkbox" checked>
<span class="toggle-slider"></span>
</label>
<span style="margin-left: 0.5rem; font-size: 0.875rem;">Reduce background noise</span>
</div>
</div>
</div>
<div class="col-6">
<div class="form-group">
<label style="margin-bottom: 0.5rem;">Echo Cancellation</label>
<div style="display: flex; align-items: center;">
<label class="toggle-switch">
<input id="echoCancellationToggle" type="checkbox" checked>
<span class="toggle-slider"></span>
</label>
<span style="margin-left: 0.5rem; font-size: 0.875rem;">Reduce echo/feedback</span>
</div>
</div>
</div>
</div>
<div class="row">
<div class="col-6">
<div class="form-group">
<label style="margin-bottom: 0.5rem;">Voice Activation</label>
<div style="display: flex; align-items: center;">
<label class="toggle-switch">
<input id="voiceActivationToggle" type="checkbox">
<span class="toggle-slider"></span>
</label>
<span style="margin-left: 0.5rem; font-size: 0.875rem;">Auto-activate on speech</span>
</div>
</div>
</div>
<div class="col-6">
<div class="form-group">
<label style="margin-bottom: 0.5rem;">Push-to-Talk</label>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<div id="pttKeyDisplay" style="flex: 1; background-color: var(--color-surface); border: 1px solid var(--color-border); border-radius: var(--border-radius); padding: 0.5rem; text-align: center; font-family: monospace; font-size: 0.875rem;">SPACE</div>
<button id="changePttKeyBtn" class="btn btn-secondary">Change</button>
</div>
</div>
</div>
</div>
<div id="voiceActivationSettings" style="margin-top: 0.5rem; display: none;">
<div class="form-group">
<label for="voiceThreshold">Voice Activation Threshold</label>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<input type="range" id="voiceThreshold" min="0" max="100" step="1" value="15">
<span id="voiceThresholdValue" class="slider-value">15%</span>
</div>
<p style="margin-top: 0.25rem; font-size: 0.75rem; color: var(--color-text-secondary);">Higher values require louder speech to activate your microphone</p>
</div>
</div>
</div>
</div>
</div>
<!-- Voice Cloning Tab Content -->
<div id="cloneTab" class="tab-content">
<div class="card">
<div class="card-header">
<h2 class="card-title">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M17 21v-2a4 4 0 0 0-4-4H5a4 4 0 0 0-4 4v2"></path>
<circle cx="9" cy="7" r="4"></circle>
<path d="M23 21v-2a4 4 0 0 0-3-3.87"></path>
<path d="M16 3.13a4 4 0 0 1 0 7.75"></path>
</svg>
Voice Clone Profiles
</h2>
<div>
<span id="cloneAiStatus" class="status-pill active">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<circle cx="12" cy="12" r="10"/>
<polyline points="12 6 12 12 16 14"/>
</svg>
AI Ready
</span>
</div>
</div>
<div class="card-body">
<div id="cloneProfiles" class="voice-profiles-grid">
<!-- Will be populated with voice clone profiles -->
<div class="voice-profile-card">
<div class="voice-profile-header">
<h3 class="voice-profile-title">Create New Voice Clone</h3>
</div>
<div class="voice-profile-body">
<p style="margin-top: 0; font-size: 0.875rem; color: var(--color-text-secondary);">Clone a voice by recording samples</p>
<span class="badge badge-accent">AI-Powered</span>
</div>
<div class="voice-profile-footer">
<button id="newCloneBtn" class="btn btn-accent btn-sm">Create</button>
</div>
</div>
</div>
</div>
</div>
<!-- Create New Voice Clone -->
<div id="createCloneCard" class="card" style="display: none;">
<div class="card-header">
<h2 class="card-title">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<circle cx="12" cy="12" r="10"/>
<line x1="12" y1="8" x2="12" y2="16"/>
<line x1="8" y1="12" x2="16" y2="12"/>
</svg>
Create New Voice Clone
</h2>
</div>
<div class="card-body">
<div class="form-group">
<label for="cloneName">Name Your Voice Clone</label>
<input type="text" id="cloneName" placeholder="e.g., My Robot Voice, Celebrity Impression">
</div>
<div class="form-group">
<label>Voice Sample Recording</label>
<p style="margin-top: 0; font-size: 0.875rem; color: var(--color-text-secondary);">Record at least 30 seconds of clear speech for best results</p>
<div style="display: flex; align-items: center; margin: 1rem 0;">
<button id="recordSampleBtn" class="record-btn">
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<circle cx="12" cy="12" r="10"/>
</svg>
</button>
<span id="recordingTimer" class="recording-timer">0:00</span>
<div id="recordingWaveform" class="recording-waveform"></div>
</div>
<div id="sampleTexts" style="margin-bottom: 1rem;">
<p style="font-size: 0.875rem;">Please read the following sample text:</p>
<div style="background-color: var(--color-surface); padding: 0.75rem; border-radius: var(--border-radius); margin-bottom: 0.5rem;">
The quick brown fox jumps over the lazy dog. The early bird catches the worm, but the second mouse gets the cheese.
</div>
</div>
<div id="recordedSamples" style="margin-top: 1rem;">
<h3 style="font-size: 0.875rem; margin-bottom: 0.5rem;">Recorded Samples</h3>
<div id="samplesList"></div>
</div>
</div>
<!-- Voice Clone Technology selector -->
<div class="form-group">
<label>Voice Clone Technology</label>
<div class="quality-selector" style="display: flex; align-items: center; gap: 1rem; margin-top: 0.5rem;">
<label class="preset-btn active" style="flex: 1; cursor: pointer;">
<input type="radio" name="cloneQuality" value="ml" checked style="display: none;">
<div class="preset-emoji">⚡</div>
<div class="preset-name">Machine Learning</div>
<div style="font-size: 0.7rem; color: var(--color-text-secondary);">TensorFlow.js voice model</div>
</label>
<label class="preset-btn" style="flex: 1; cursor: pointer;">
<input type="radio" name="cloneQuality" value="simulation" style="display: none;">
<div class="preset-emoji">🔊</div>
<div class="preset-name">Simulation</div>
<div style="font-size: 0.7rem; color: var(--color-text-secondary);">Voice characteristics extraction</div>
</label>
</div>
</div>
<div id="trainingSection" style="margin-top: 1rem; display: none;">
<div class="form-group">
<label>AI Training</label>
<p style="margin-top: 0; font-size: 0.875rem; color: var(--color-text-secondary);">Train your voice clone with the recorded samples</p>
<div class="training-progress">
<div id="trainingProgressBar" class="training-progress-bar"></div>
</div>
<div id="trainingStatus" style="font-size: 0.875rem; text-align: center;">Ready to train</div>
</div>
<button id="trainCloneBtn" class="btn btn-accent" style="width: 100%;">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M23 19a2 2 0 0 1-2 2H3a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h4l2-3h6l2 3h4a2 2 0 0 1 2 2z"/>
<circle cx="12" cy="13" r="4"/>
</svg>
Start Training Voice Clone
</button>
</div>
<div id="cloneControls" style="margin-top: 1rem;">
<div style="display: flex; gap: 0.5rem;">
<button id="cancelCloneBtn" class="btn btn-secondary" style="flex: 1;">Cancel</button>
<button id="createCloneBtn" class="btn btn-primary" style="flex: 1;" disabled>Create Voice Clone</button>
</div>
</div>
</div>
</div>
<!-- Voice Clone Settings (Shows after creation) -->
<div id="cloneSettingsCard" class="card" style="display: none;">
<div class="card-header">
<h2 class="card-title">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<circle cx="12" cy="12" r="3"></circle>
<path d="M19.4 15a1.65 1.65 0 0 0 .33 1.82l.06.06a2 2 0 0 1 0 2.83 2 2 0 0 1-2.83 0l-.06-.06a1.65 1.65 0 0 0-1.82-.33 1.65 1.65 0 0 0-1 1.51V21a2 2 0 0 1-2 2 2 2 0 0 1-2-2v-.09A1.65 1.65 0 0 0 9 19.4a1.65 1.65 0 0 0-1.82.33l-.06.06a2 2 0 0 1-2.83 0 2 2 0 0 1 0-2.83l.06-.06a1.65 1.65 0 0 0 .33-1.82 1.65 1.65 0 0 0-1.51-1H3a2 2 0 0 1-2-2 2 2 0 0 1 2-2h.09A1.65 1.65 0 0 0 4.6 9a1.65 1.65 0 0 0-.33-1.82l-.06-.06a2 2 0 0 1 0-2.83 2 2 0 0 1 2.83 0l.06.06a1.65 1.65 0 0 0 1.82.33H9a1.65 1.65 0 0 0 1-1.51V3a2 2 0 0 1 2-2 2 2 0 0 1 2 2v.09a1.65 1.65 0 0 0 1 1.51 1.65 1.65 0 0 0 1.82-.33l.06-.06a2 2 0 0 1 2.83 0 2 2 0 0 1 0 2.83l-.06.06a1.65 1.65 0 0 0-.33 1.82V9a1.65 1.65 0 0 0 1.51 1H21a2 2 0 0 1 2 2 2 2 0 0 1-2 2h-.09a1.65 1.65 0 0 0-1.51 1z"></path>
</svg>
<span id="cloneSettingsTitle">Voice Clone Settings</span>
</h2>
</div>
<div class="card-body">
<div class="form-group">
<label for="cloneSimilarity">Voice Similarity</label>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<input type="range" id="cloneSimilarity" min="0" max="100" step="1" value="70">
<span id="cloneSimilarityValue" class="slider-value">70%</span>
</div>
<p style="margin-top: 0.25rem; font-size: 0.75rem; color: var(--color-text-secondary);">Higher values sound more like the original voice but may be less stable</p>
</div>
<div class="form-group">
<label for="cloneStability">Stability</label>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<input type="range" id="cloneStability" min="0" max="100" step="1" value="50">
<span id="cloneStabilityValue" class="slider-value">50%</span>
</div>
<p style="margin-top: 0.25rem; font-size: 0.75rem; color: var(--color-text-secondary);">Higher values produce more stable and consistent output</p>
</div>
<div class="form-group">
<label for="cloneStyle">Style Transfer</label>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<input type="range" id="cloneStyle" min="0" max="100" step="1" value="20">
<span id="cloneStyleValue" class="slider-value">20%</span>
</div>
<p style="margin-top: 0.25rem; font-size: 0.75rem; color: var(--color-text-secondary);">Higher values transfer more of the speaking style (emotion, pacing)</p>
</div>
<div class="form-group">
<label for="cloneEmotionStrength">Emotion Strength</label>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<input type="range" id="cloneEmotionStrength" min="0" max="100" step="1" value="60">
<span id="cloneEmotionStrengthValue" class="slider-value">60%</span>
</div>
</div>
<div style="margin-top: 1rem; display: flex; gap: 0.5rem;">
<button id="testCloneBtn" class="btn btn-secondary" style="flex: 1;">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/>
<path d="M15.54 8.46a5 5 0 0 1 0 7.07"/>
<path d="M19.07 4.93a10 10 0 0 1 0 14.14"/>
</svg>
Test Voice
</button>
<button id="activateCloneBtn" class="btn btn-accent" style="flex: 1;">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M22 11.08V12a10 10 0 1 1-5.93-9.14"/>
<polyline points="22 4 12 14.01 9 11.01"/>
</svg>
Activate Voice Clone
</button>
</div>
<div style="margin-top: 0.5rem; text-align: right;">
<button id="backToCloneListBtn" class="btn btn-secondary btn-sm">Back to List</button>
</div>
</div>
</div>
</div>
<!-- Call Controls Tab Content -->
<div id="callTab" class="tab-content">
<div class="card">
<div class="card-header">
<h2 class="card-title">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M22 16.92v3a2 2 0 0 1-2.18 2 19.79 19.79 0 0 1-8.63-3.07 19.5 19.5 0 0 1-6-6 19.79 19.79 0 0 1-3.07-8.67A2 2 0 0 1 4.11 2h3a2 2 0 0 1 2 1.72 12.84 12.84 0 0 0 .7 2.81 2 2 0 0 1-.45 2.11L8.09 9.91a16 16 0 0 0 6 6l1.27-1.27a2 2 0 0 1 2.11-.45 12.84 12.84 0 0 0 2.81.7A2 2 0 0 1 22 16.92z"/>
</svg>
Voice Call
</h2>
</div>
<div class="card-body">
<div id="callErrorMessage" class="error-banner" style="display: none;"></div>
<div class="row">
<div class="col">
<div class="form-group">
<label for="roomId">Room ID</label>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<input type="text" id="roomId" placeholder="Enter room ID">
<button id="generateRoomBtn" class="btn btn-secondary">Generate</button>
</div>
</div>
<div class="form-group">
<label for="displayName">Your Name</label>
<input type="text" id="displayName" placeholder="Enter your display name">
</div>
<div class="form-group">
<label for="activeVoice">Active Voice</label>
<select id="activeVoice">
<option value="standard">Standard Voice Changer</option>
<!-- Will be populated with voice clones -->
</select>
<p style="margin-top: 0.25rem; font-size: 0.75rem; color: var(--color-text-secondary);">Select which voice to use during the call</p>
</div>
<div style="display: flex; gap: 0.5rem; margin-bottom: 1rem;">
<button id="joinCallBtn" class="btn btn-primary" style="flex: 1;">Join Call</button>
<button id="leaveCallBtn" class="btn btn-secondary" style="flex: 1;" disabled>Leave Call</button>
</div>
<div class="call-status">
<div id="callStatusIndicator" class="status-indicator status-disconnected"></div>
<span id="callStatusText">Disconnected</span>
</div>
</div>
<div class="col">
<h3 style="margin-top: 0; margin-bottom: 0.75rem; font-size: 1rem;">Current Participants</h3>
<div id="participantsList" style="min-height: 100px; max-height: 200px; overflow-y: auto; padding: 0.5rem; background-color: var(--color-surface); border-radius: var(--border-radius);">
<div style="color: var(--color-text-secondary); text-align: center; padding: 1rem 0;">
No participants yet
</div>
</div>
<div style="margin-top: 1rem;">
<h3 style="margin-top: 0; margin-bottom: 0.75rem; font-size: 1rem;">Call Controls</h3>
<div style="display: flex; gap: 0.5rem;">
<button id="toggleMicBtn" class="btn btn-secondary" style="flex: 1;">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z"/>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"/>
<line x1="12" y1="19" x2="12" y2="22"/>
</svg>
Mute Mic
</button>
<button id="toggleAudioBtn" class="btn btn-secondary" style="flex: 1;">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M3 18v-6a9 9 0 0 1 18 0v6"></path>
<path d="M21 19a2 2 0 0 1-2 2h-1a2 2 0 0 1-2-2v-3a2 2 0 0 1 2-2h3zM3 19a2 2 0 0 0 2 2h1a2 2 0 0 0 2-2v-3a2 2 0 0 0-2-2H3z"></path>
</svg>
Mute Audio
</button>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="card">
<div class="card-header">
<h2 class="card-title">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<circle cx="12" cy="12" r="10"></circle>
<line x1="12" y1="8" x2="12" y2="12"></line>
<line x1="12" y1="16" x2="12.01" y2="16"></line>
</svg>
Information
</h2>
</div>
<div class="card-body">
<p style="margin-top: 0;">How to use Voice Cloning and Transformation during calls:</p>
<ol style="padding-left: 1.5rem;">
<li>Create and train a voice clone or use the standard voice changer</li>
<li>Start voice processing on the Voice Changer tab</li>
<li>Join a call with your room ID</li>
<li>Share the room ID with others to have them join</li>
<li>Use Push-to-Talk or enable voice activation</li>
<li>Adjust voice settings in real-time while on the call</li>
</ol>
<p style="font-size: 0.875rem; color: var(--color-text-secondary);">All audio processing happens locally in your browser. For AI-powered features, we securely communicate with our servers.</p>
</div>
</div>
</div>
<!-- Text-to-Speech Tab Content -->
<div id="ttsTab" class="tab-content">
<div class="card">
<div class="card-header">
<h2 class="card-title">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M9 17c-2.3 0-4-1.79-4-4a5.07 5.07 0 0 1 5-5c3.39 0 6 2.54 6 6v1c0 2-1.3 3-3 3s-3-1-3-3"/>
<line x1="5" y1="12" x2="5" y2="12"/>
<line x1="19" y1="12" x2="19" y2="12"/>
</svg>
Text-to-Speech with Voice Cloning
</h2>
</div>
<div class="card-body">
<div class="form-group">
<label for="ttsVoice">Voice Selection</label>
<select id="ttsVoice">
<option value="standard">Standard TTS Voice</option>
<!-- Will be populated with voice clones -->
</select>
</div>
<div class="form-group">
<label for="ttsText">Text to Convert</label>
<textarea id="ttsText" class="tts-input" placeholder="Enter text to be spoken in the selected voice..."></textarea>
</div>
<div class="row">
<div class="col">
<div class="form-group">
<label for="ttsPitch">Voice Pitch</label>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<input type="range" id="ttsPitch" min="-20" max="20" step="1" value="0">
<span id="ttsPitchValue" class="slider-value">0</span>
</div>
</div>
</div>
<div class="col">
<div class="form-group">
<label for="ttsSpeed">Speaking Rate</label>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<input type="range" id="ttsSpeed" min="0.5" max="2" step="0.1" value="1">
<span id="ttsSpeedValue" class="slider-value">1.0x</span>
</div>
</div>
</div>
</div>
<div class="form-group">
<label for="ttsEmotion">Emotion</label>
<select id="ttsEmotion">
<option value="neutral">Neutral</option>
<option value="happy">Happy</option>
<option value="sad">Sad</option>
<option value="angry">Angry</option>
<option value="fearful">Fearful</option>
<option value="disgusted">Disgusted</option>
<option value="surprised">Surprised</option>
</select>
</div>
<div style="margin-top: 1rem; display: flex; gap: 0.5rem;">
<button id="generateTtsBtn" class="btn btn-accent" style="flex: 1;">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/>
<path d="M15.54 8.46a5 5 0 0 1 0 7.07"/>
<path d="M19.07 4.93a10 10 0 0 1 0 14.14"/>
</svg>
Generate Speech
</button>
<button id="cancelTtsBtn" class="btn btn-secondary" style="flex: 1;">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<circle cx="12" cy="12" r="10"/>
<line x1="15" y1="9" x2="9" y2="15"/>
<line x1="9" y1="9" x2="15" y2="15"/>
</svg>
Cancel
</button>
</div>
<div id="ttsResult" style="margin-top: 1rem; display: none;">
<div style="display: flex; align-items: center; gap: 0.5rem; margin-bottom: 0.5rem;">
<h3 style="margin: 0; font-size: 0.875rem;">Generated Speech</h3>
<span id="ttsStatus" class="badge badge-primary">Processing...</span>
</div>
<div style="padding: 1rem; background-color: var(--color-surface); border-radius: var(--border-radius);">
<audio id="ttsAudio" controls style="width: 100%;"></audio>
</div>
<div style="margin-top: 0.5rem; display: flex; justify-content: flex-end;">
<button id="downloadTtsBtn" class="btn btn-secondary btn-sm">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
<polyline points="7 10 12 15 17 10"/>
<line x1="12" y1="15" x2="12" y2="3"/>
</svg>
Download
</button>
</div>
</div>
</div>
</div>
<div class="card">
<div class="card-header">
<h2 class="card-title">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/>
<polyline points="14 2 14 8 20 8"/>
<line x1="16" y1="13" x2="8" y2="13"/>
<line x1="16" y1="17" x2="8" y2="17"/>
<polyline points="10 9 9 9 8 9"/>
</svg>
History
</h2>
</div>
<div class="card-body">
<div id="ttsHistoryList">
<!-- Will be populated with TTS history items -->
<div style="color: var(--color-text-secondary); text-align: center; padding: 1rem;">
No history yet. Generated speech will appear here.
</div>
</div>
</div>
</div>
</div>
</div>
</main>
<!-- Floating PTT Control -->
<div id="floatingControls" class="floating-controls">
<div style="display: flex; align-items: center; justify-content: space-between; margin-bottom: 0.5rem;">
<span id="floatingVoiceName" style="font-size: 0.75rem; font-weight: 500;">Standard Voice</span>
<span id="floatingLatency" style="font-size: 0.75rem; background-color: rgba(93, 92, 222, 0.2); color: var(--color-primary); padding: 0.25rem 0.5rem; border-radius: 9999px;">12ms</span>
</div>
<button id="floatingPttBtn" class="btn btn-primary" style="width: 100%; display: flex; align-items: center; justify-content: center; margin-bottom: 0.5rem;">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" style="margin-right: 0.5rem;">
<path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z"/>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"/>
<line x1="12" y1="19" x2="12" y2="22"/>
</svg>
Push to Talk
</button>
<select id="floatingPresetSelect" style="width: 100%; margin-bottom: 0.5rem;">
<option value="normal">Normal Voice</option>
<option value="deep">Deep Voice</option>
<option value="helium">Helium Voice</option>
<option value="robot">Robot Voice</option>
<option value="telephone">Telephone</option>
<option value="alien">Alien</option>
<option value="chipmunk">Chipmunk</option>
<option value="underwater">Underwater</option>
</select>
<div style="display: flex; align-items: center; gap: 0.5rem; margin-bottom: 0.5rem;">
<span style="font-size: 0.75rem;">Input:</span>
<input type="range" id="floatingInputGain" min="0" max="2" step="0.1" value="1" style="flex: 1;">
</div>
<div style="display: flex; align-items: center; gap: 0.5rem; justify-content: space-between;">
<div id="floatingVoiceIndicator" class="voice-indicator"></div>
<button id="hideFloatingBtn" class="btn btn-secondary" style="padding: 0.25rem 0.5rem; font-size: 0.75rem;">Hide</button>
</div>
</div>
<!-- PTT Key Setup Overlay -->
<div id="pttKeyOverlay" class="overlay">
<div class="overlay-content">
<h3 style="margin-top: 0; margin-bottom: 1rem;">Set Push-to-Talk Key</h3>
<p>Press any key to set as your Push-to-Talk shortcut</p>
<div id="keyDisplayBox" style="padding: 1rem; text-align: center; background-color: var(--color-surface); border-radius: var(--border-radius); font-family: monospace; font-size: 1.25rem; margin: 1rem 0;">
Press a key...
</div>
<div class="overlay-footer">
<button id="cancelPttKeyBtn" class="btn btn-secondary">Cancel</button>
<button id="resetPttKeyBtn" class="btn btn-secondary">Reset to Space</button>
<button id="savePttKeyBtn" class="btn btn-primary">Save</button>
</div>
</div>
</div>
<!-- Voice Activation Test Overlay -->
<div id="voiceTestOverlay" class="overlay">
<div class="overlay-content">
<h3 style="margin-top: 0; margin-bottom: 1rem;">Voice Activation Test</h3>
<p>Speak to test your voice activation threshold</p>
<div class="audio-meter" style="height: 32px; margin: 1rem 0;">
<div id="testMeter" class="audio-meter-level"></div>
</div>
<div style="display: flex; align-items: center; justify-content: space-between; margin-bottom: 1rem;">
<span style="font-size: 0.75rem;">Current Level: <span id="currentLevelValue">0%</span></span>
<span style="font-size: 0.75rem;">Threshold: <span id="thresholdLevelValue">15%</span></span>
</div>
<div style="display: flex; align-items: center; gap: 0.5rem; margin-bottom: 1rem;">
<input type="range" id="thresholdTestSlider" min="0" max="100" step="1" value="15" style="flex: 1;">
</div>
<div style="margin-bottom: 1rem; text-align: center;">
<div id="voiceTestIndicator" style="display: inline-block; width: 16px; height: 16px; border-radius: 50%; background-color: var(--color-error); margin-right: 0.5rem;"></div>
<span id="voiceTestStatus">Not Activated</span>
</div>
<div class="overlay-footer">
<button id="cancelVoiceTestBtn" class="btn btn-secondary">Cancel</button>
<button id="applyThresholdBtn" class="btn btn-primary">Apply Threshold</button>
</div>
</div>
</div>
<!-- Create Profile Overlay -->
<div id="createProfileOverlay" class="overlay">
<div class="overlay-content">
<h3 style="margin-top: 0; margin-bottom: 1rem;">Create New Voice Profile</h3>
<div class="form-group">
<label for="profileName">Profile Name</label>
<input type="text" id="profileName" placeholder="My Voice Profile">
</div>
<div class="form-group">
<label for="profileDescription">Description (Optional)</label>
<textarea id="profileDescription" placeholder="Describe your voice profile" style="height: 80px;"></textarea>
</div>
<div class="overlay-footer">
<button id="cancelProfileBtn" class="btn btn-secondary">Cancel</button>
<button id="saveProfileBtn" class="btn btn-primary">Create Profile</button>
</div>
</div>
</div>
<!-- Notification -->
<div id="notification" class="notification"></div>
<!-- Test Audio Player -->
<audio id="testAudioPlayer" style="display: none;"></audio>
<script>
// =======================================
// Audio Context and Stream Management
// =======================================
let audioContext = null;
let inputStream = null;
let outputNode = null;
let analyserNode = null;
let inputGainNode = null;
let monitorGainNode = null;
let processorNode = null;
let formantFilter = null;
let effectProcessorNode = null;
let isProcessing = false;
let pttKeyCode = 'Space';
let isPttActive = false;
let testAudioPlayer = document.getElementById('testAudioPlayer');
let voiceActivationEnabled = false;
let voiceActivationThreshold = 15;
let isVoiceDetected = false;
let currentVoiceProfile = "standard";
let currentCloneProfile = null;
// Audio settings
const settings = {
pitchShift: 0,
formantShift: 0,
genderMorph: 50,
effect: 'none',
effectMix: 50,
inputGain: 1.0,
monitorGain: 0.0,
bufferSize: 512,
noiseSuppressionEnabled: true,
echoCancellationEnabled: true
};
// Voice presets
const presets = {
normal: { pitchShift: 0, formantShift: 0, genderMorph: 50, effect: 'none', effectMix: 0 },
deep: { pitchShift: -5, formantShift: -3, genderMorph: 30, effect: 'none', effectMix: 0 },
helium: { pitchShift: 7, formantShift: 5, genderMorph: 70, effect: 'none', effectMix: 0 },
robot: { pitchShift: 0, formantShift: 0, genderMorph: 50, effect: 'robot', effectMix: 80 },
telephone: { pitchShift: 0, formantShift: 0, genderMorph: 50, effect: 'telephone', effectMix: 90 },
alien: { pitchShift: 4, formantShift: -5, genderMorph: 40, effect: 'reverb', effectMix: 60 },
chipmunk: { pitchShift: 12, formantShift: 8, genderMorph: 60, effect: 'none', effectMix: 0 },
underwater: { pitchShift: -2, formantShift: -1, genderMorph: 50, effect: 'reverb', effectMix: 70 }
};
// Voice clone profiles
const cloneProfiles = [];
// Voice profiles (manual voice modification settings)
const voiceProfiles = [];
// =======================================
// Tab Navigation
// =======================================
const voiceTabBtn = document.getElementById('voiceTabBtn');
const cloneTabBtn = document.getElementById('cloneTabBtn');
const callTabBtn = document.getElementById('callTabBtn');
const ttsTabBtn = document.getElementById('ttsTabBtn');
const voiceTab = document.getElementById('voiceTab');
const cloneTab = document.getElementById('cloneTab');
const callTab = document.getElementById('callTab');
const ttsTab = document.getElementById('ttsTab');
voiceTabBtn.addEventListener('click', () => {
voiceTabBtn.classList.add('active');
cloneTabBtn.classList.remove('active');
callTabBtn.classList.remove('active');
ttsTabBtn.classList.remove('active');
voiceTab.classList.add('active');
cloneTab.classList.remove('active');
callTab.classList.remove('active');
ttsTab.classList.remove('active');
});
cloneTabBtn.addEventListener('click', () => {
voiceTabBtn.classList.remove('active');
cloneTabBtn.classList.add('active');
callTabBtn.classList.remove('active');
ttsTabBtn.classList.remove('active');
voiceTab.classList.remove('active');
cloneTab.classList.add('active');
callTab.classList.remove('active');
ttsTab.classList.remove('active');
});
callTabBtn.addEventListener('click', () => {
voiceTabBtn.classList.remove('active');
cloneTabBtn.classList.remove('active');
callTabBtn.classList.add('active');
ttsTabBtn.classList.remove('active');
voiceTab.classList.remove('active');
cloneTab.classList.remove('active');
callTab.classList.add('active');
ttsTab.classList.remove('active');
});
ttsTabBtn.addEventListener('click', () => {
voiceTabBtn.classList.remove('active');
cloneTabBtn.classList.remove('active');
callTabBtn.classList.remove('active');
ttsTabBtn.classList.add('active');
voiceTab.classList.remove('active');
cloneTab.classList.remove('active');
callTab.classList.remove('active');
ttsTab.classList.add('active');
updateTtsVoiceSelection();
});
// =======================================
// Check Web Audio API Support
// =======================================
function checkAudioSupport() {
if (typeof AudioContext === 'undefined' && typeof webkitAudioContext === 'undefined') {
document.getElementById('apiErrorBanner').style.display = 'block';
return false;
}
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
document.getElementById('apiErrorBanner').style.display = 'block';
return false;
}
return true;
}
// =======================================
// Audio Device Selection
// =======================================
const inputDeviceSelect = document.getElementById('inputDevice');
const outputDeviceSelect = document.getElementById('outputDevice');
// Create a test tone for audio testing
function createTestTone() {
try {
// Create new audio context for test tone only
const audioCtx = new (window.AudioContext || window.webkitAudioContext)();
const oscillator = audioCtx.createOscillator();
const gainNode = audioCtx.createGain();
oscillator.type = 'sine';
oscillator.frequency.setValueAtTime(440, audioCtx.currentTime); // A4 note
oscillator.connect(gainNode);
gainNode.connect(audioCtx.destination);
// Set volume to 20%
gainNode.gain.value = 0.2;
oscillator.start();
// Stop after 1 second
setTimeout(() => {
oscillator.stop();
// Close the audio context when done
setTimeout(() => {
audioCtx.close();
}, 100);
}, 1000);
} catch (error) {
console.error("Error creating test tone:", error);
showNotification("Error creating test tone: " + error.message);
}
}
// Test audio output
document.getElementById('testAudio').addEventListener('click', () => {
try {
// Create a test tone
createTestTone();
showNotification("Playing test tone");
} catch (error) {
console.error("Error playing test tone:", error);
showNotification("Error playing test tone: " + error.message);
}
});
// Refresh devices button
document.getElementById('refreshDevices').addEventListener('click', () => {
enumerateDevices();
});
// Enumerate audio devices
async function enumerateDevices() {
try {
// First request permission to access media devices
// This is required to get device labels
const deviceErrorMessage = document.getElementById('deviceErrorMessage');
deviceErrorMessage.style.display = 'none';
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
// Stop the tracks immediately after getting permission
stream.getTracks().forEach(track => track.stop());
} catch (err) {
deviceErrorMessage.textContent = `Microphone access error: ${err.message}. Please check your browser permissions.`;
deviceErrorMessage.style.display = 'block';
return;
}
// Now we can enumerate devices with labels
const devices = await navigator.mediaDevices.enumerateDevices();
// Clear existing options
inputDeviceSelect.innerHTML = '';
outputDeviceSelect.innerHTML = '';
// Add default option
const defaultInputOption = document.createElement('option');
defaultInputOption.value = 'default';
defaultInputOption.text = 'Default Microphone';
inputDeviceSelect.appendChild(defaultInputOption);
const defaultOutputOption = document.createElement('option');
defaultOutputOption.value = 'default';
defaultOutputOption.text = 'Default Speakers';
outputDeviceSelect.appendChild(defaultOutputOption);
// Add input devices
const audioInputs = devices.filter(device => device.kind === 'audioinput');
if (audioInputs.length === 0) {
deviceErrorMessage.textContent = "No microphones detected. Please check your browser permissions or connect a microphone.";
deviceErrorMessage.style.display = 'block';
}
audioInputs.forEach((device, index) => {
const option = document.createElement('option');
option.value = device.deviceId;
option.text = device.label || `Microphone ${index + 1}`;
inputDeviceSelect.appendChild(option);
});
// Add output devices
const audioOutputs = devices.filter(device => device.kind === 'audiooutput');
if (audioOutputs.length === 0) {
// Only show warning if no outputs detected (this is less critical than no inputs)
console.warn("No audio output devices detected");
}
audioOutputs.forEach((device, index) => {
const option = document.createElement('option');
option.value = device.deviceId;
option.text = device.label || `Speaker ${index + 1}`;
outputDeviceSelect.appendChild(option);
});
console.log(`Detected ${audioInputs.length} microphones and ${audioOutputs.length} speakers`);
if (audioInputs.length > 0) {
showNotification(`Found ${audioInputs.length} microphone(s)`);
}
} catch (error) {
console.error("Error enumerating devices:", error);
document.getElementById('deviceErrorMessage').textContent = "Error accessing audio devices: " + error.message;
document.getElementById('deviceErrorMessage').style.display = 'block';
}
}
// When input device changes
inputDeviceSelect.addEventListener('change', async () => {
if (isProcessing) {
await stopAudioProcessing();
await startAudioProcessing();
}
});
// When output device changes, if browser supports setSinkId
outputDeviceSelect.addEventListener('change', () => {
const deviceId = outputDeviceSelect.value;
// Apply the device change to the test audio player
try {
if (testAudioPlayer.setSinkId) {
testAudioPlayer.setSinkId(deviceId)
.then(() => {
console.log(`Audio output device set to: ${deviceId}`);
})
.catch(err => {
console.error("Error setting audio output device:", err);
});
}
// Also apply to any participant audio elements
document.querySelectorAll('.participant-audio').forEach(audio => {
if (audio.setSinkId) {
audio.setSinkId(deviceId).catch(err => {
console.error("Error setting audio output device for participant:", err);
});
}
});
} catch (err) {
console.error("Error changing output device:", err);
}
});
// =======================================
// Voice Processing
// =======================================
// Start audio processing
async function startAudioProcessing() {
if (isProcessing) return;
try {
// Create new audio context if none exists or it's closed
if (!audioContext || audioContext.state === 'closed') {
audioContext = new (window.AudioContext || window.webkitAudioContext)();
}
// Resume the audio context if it's suspended
if (audioContext.state === 'suspended') {
await audioContext.resume();
}
// Get the selected buffer size
settings.bufferSize = parseInt(document.getElementById('bufferSize').value);
// Get the user's microphone stream
const constraints = {
audio: {
deviceId: inputDeviceSelect.value !== 'default' ? { exact: inputDeviceSelect.value } : undefined,
echoCancellation: settings.echoCancellationEnabled,
noiseSuppression: settings.noiseSuppressionEnabled,
autoGainControl: false
}
};
try {
inputStream = await navigator.mediaDevices.getUserMedia(constraints);
console.log("Microphone access granted:", inputStream.getAudioTracks()[0].label);
} catch (err) {
console.error("Error accessing microphone:", err);
showNotification("Microphone access error: " + err.message);
return;
}
// Create source node from mic input
const sourceNode = audioContext.createMediaStreamSource(inputStream);
// Create analyzer node for the input meter
analyserNode = audioContext.createAnalyser();
analyserNode.fftSize = 2048;
analyserNode.smoothingTimeConstant = 0.8;
// Create gain nodes
inputGainNode = audioContext.createGain();
inputGainNode.gain.value = settings.inputGain;
monitorGainNode = audioContext.createGain();
monitorGainNode.gain.value = settings.monitorGain;
try {
// Create processor node (scriptProcessorNode is deprecated but works in all browsers)
// In a production app, you'd use AudioWorklet instead
processorNode = audioContext.createScriptProcessor(settings.bufferSize, 1, 1);
processorNode.onaudioprocess = processAudio;
// Create formant filter (simple implementation)
formantFilter = audioContext.createBiquadFilter();
formantFilter.type = 'lowshelf';
formantFilter.frequency.value = 800;
formantFilter.gain.value = settings.formantShift * 2;
// Create output node
outputNode = audioContext.createMediaStreamDestination();
// Connect nodes:
// source -> inputGain -> analyzer -> processor -> formantFilter -> outputNode
sourceNode.connect(inputGainNode);
inputGainNode.connect(analyserNode);
analyserNode.connect(processorNode);
processorNode.connect(formantFilter);
formantFilter.connect(outputNode);
// Connect monitor for local feedback if needed
formantFilter.connect(monitorGainNode);
monitorGainNode.connect(audioContext.destination);
} catch (err) {
console.error("Error creating audio graph:", err);
showNotification("Error setting up audio: " + err.message);
// Clean up any created nodes
if (inputStream) {
inputStream.getTracks().forEach(track => track.stop());
}
return;
}
// Start the audio meter
startAudioMeter();
// Update floating controls
document.getElementById('floatingControls').style.display = 'block';
// Update latency display
document.getElementById('floatingLatency').textContent = `${Math.round(settings.bufferSize / audioContext.sampleRate * 1000)}ms`;
// Update UI
document.getElementById('startProcessingBtn').style.display = 'none';
document.getElementById('stopProcessingBtn').style.display = 'block';
// Update processing status
const processingStatus = document.getElementById('processingStatus');
processingStatus.textContent = 'Processing Active';
processingStatus.classList.remove('inactive');
processingStatus.classList.add('active');
isProcessing = true;
showNotification("Voice processing started");
} catch (error) {
console.error("Error starting audio processing:", error);
showNotification("Error starting voice processing: " + error.message);
}
}
// Stop audio processing
async function stopAudioProcessing() {
if (!isProcessing) return;
// Stop all tracks in the input stream
if (inputStream) {
inputStream.getTracks().forEach(track => track.stop());
inputStream = null;
}
// Disconnect nodes
if (processorNode) {
processorNode.disconnect();
processorNode = null;
}
if (analyserNode) {
analyserNode.disconnect();
analyserNode = null;
}
if (inputGainNode) {
inputGainNode.disconnect();
inputGainNode = null;
}
if (monitorGainNode) {
monitorGainNode.disconnect();
monitorGainNode = null;
}
if (formantFilter) {
formantFilter.disconnect();
formantFilter = null;
}
if (effectProcessorNode) {
effectProcessorNode.disconnect();
effectProcessorNode = null;
}
// Stop the audio meter
stopAudioMeter();
// Update floating controls
document.getElementById('floatingControls').style.display = 'none';
// Update UI
document.getElementById('stopProcessingBtn').style.display = 'none';
document.getElementById('startProcessingBtn').style.display = 'block';
// Update processing status
const processingStatus = document.getElementById('processingStatus');
processingStatus.textContent = 'Not Processing';
processingStatus.classList.remove('active');
processingStatus.classList.add('inactive');
isProcessing = false;
showNotification("Voice processing stopped");
}
// Process audio buffer for pitch shifting and effects
function processAudio(event) {
const inputBuffer = event.inputBuffer;
const outputBuffer = event.outputBuffer;
// Calculate real-time level for voice activation
if (voiceActivationEnabled && !isPttActive) {
let sum = 0;
const inputData = inputBuffer.getChannelData(0);
// Calculate RMS (root mean square) of the signal
for (let i = 0; i < inputBuffer.length; i++) {
sum += inputData[i] * inputData[i];
}
const rms = Math.sqrt(sum / inputBuffer.length);
const db = 20 * Math.log10(rms);
// Normalize to range 0-100
const normalizedLevel = Math.max(0, Math.min(100, (db + 60) * 2.5));
// Detect voice
isVoiceDetected = normalizedLevel >= voiceActivationThreshold;
}
// Only process audio if PTT is active or voice is detected in activation mode
const shouldProcess = isPttActive || (voiceActivationEnabled && isVoiceDetected);
if (!shouldProcess) {
// Pass silent audio
for (let channel = 0; channel < outputBuffer.numberOfChannels; channel++) {
const outputData = outputBuffer.getChannelData(channel);
for (let i = 0; i < outputBuffer.length; i++) {
outputData[i] = 0;
}
}
return;
}
// Simple implementation of pitch shift using sample rate conversion
// Note: This is a basic pitch shifter - a production app would use a more sophisticated algorithm
const pitchShift = Math.pow(2, settings.pitchShift / 12); // Convert semitones to ratio
// Process each channel
for (let channel = 0; channel < outputBuffer.numberOfChannels; channel++) {
const inputData = inputBuffer.getChannelData(channel);
const outputData = outputBuffer.getChannelData(channel);
// Apply pitch shift and effects
for (let i = 0; i < outputBuffer.length; i++) {
const readIndex = i / pitchShift;
// Simple linear interpolation
const index1 = Math.floor(readIndex);
const index2 = Math.ceil(readIndex);
const fraction = readIndex - index1;
// Bounds checking
const value1 = index1 < inputBuffer.length ? inputData[index1] : 0;
const value2 = index2 < inputBuffer.length ? inputData[index2] : 0;
// Interpolate
let sampleValue = value1 + fraction * (value2 - value1);
// Apply gender morph
// This is a simple implementation - a more advanced version would use formant shifting
if (settings.genderMorph !== 50) {
// Apply a subtle formant shifting based on gender morph
// Higher values shift toward female voice characteristics
const genderFactor = (settings.genderMorph - 50) / 50; // -1 to 1 range
// Simple simulation of formant shifting
if (genderFactor > 0) {
// Shift toward female - boost higher frequencies
sampleValue *= (1 + genderFactor * 0.3);
} else if (genderFactor < 0) {
// Shift toward male - compress higher frequencies
sampleValue *= (1 + genderFactor * 0.2);
}
}
// Apply effects if enabled
if (settings.effect !== 'none' && settings.effectMix > 0) {
// Mix between dry and effected signal
const mixRatio = settings.effectMix / 100;
const drySignal = sampleValue;
let wetSignal = 0;
switch (settings.effect) {
case 'robot':
// Simple robot effect (square wave)
wetSignal = sampleValue > 0 ? 0.5 : -0.5;
break;
case 'reverb':
// Simple reverb simulation
wetSignal = (i > 2000) ? outputData[i - 2000] * 0.3 : 0;
break;
case 'telephone':
// Simple telephone effect (band-pass filter simulation)
wetSignal = Math.max(-0.7, Math.min(0.7, sampleValue));
break;
case 'megaphone':
// Simple megaphone effect (distortion)
wetSignal = Math.tanh(sampleValue * 2) * 0.7;
break;
case 'alien':
// Alien effect (ring modulation + some reverb)
const modFreq = 0.1;
wetSignal = sampleValue * Math.sin(i * modFreq) * 0.8;
wetSignal += (i > 1500) ? outputData[i - 1500] * 0.2 : 0;
break;
case 'underwater':
// Underwater effect (lowpass + slow modulation)
wetSignal = sampleValue * 0.8;
wetSignal += (i > 100) ? outputData[i - 100] * 0.5 : 0;
wetSignal *= (0.8 + Math.sin(i * 0.003) * 0.2);
break;
case 'chipmunk':
// Chipmunk effect is mainly pitch shifting, so just add some slight distortion
wetSignal = Math.tanh(sampleValue * 1.5) * 0.7;
break;
}
// Mix dry and wet signals
sampleValue = (1 - mixRatio) * drySignal + mixRatio * wetSignal;
}
// Apply to output
outputData[i] = sampleValue;
}
}
// Update voice indicator
updateVoiceActivationIndicators();
}
// Update voice activation indicators
function updateVoiceActivationIndicators() {
const floatingIndicator = document.getElementById('floatingVoiceIndicator');
if (isPttActive || (voiceActivationEnabled && isVoiceDetected)) {
floatingIndicator.classList.add('voice-active');
} else {
floatingIndicator.classList.remove('voice-active');
}
}
// Audio meter visualization
let audioMeterRequestId = null;
function startAudioMeter() {
if (!analyserNode) return;
const bufferLength = analyserNode.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
const meterElement = document.getElementById('inputMeter');
function updateMeter() {
if (!analyserNode) {
cancelAnimationFrame(audioMeterRequestId);
return;
}
analyserNode.getByteFrequencyData(dataArray);
// Calculate volume level (0-100)
let sum = 0;
for (let i = 0; i < bufferLength; i++) {
sum += dataArray[i];
}
const average = sum / bufferLength;
const level = Math.min(100, average * 2);
// Update meter
meterElement.style.width = `${level}%`;
audioMeterRequestId = requestAnimationFrame(updateMeter);
}
audioMeterRequestId = requestAnimationFrame(updateMeter);
}
function stopAudioMeter() {
if (audioMeterRequestId) {
cancelAnimationFrame(audioMeterRequestId);
audioMeterRequestId = null;
}
// Reset meter
document.getElementById('inputMeter').style.width = '0%';
}
// =======================================
// UI Controls for Voice Settings
// =======================================
// Input gain control
const inputGainControl = document.getElementById('inputGain');
const inputGainValue = document.getElementById('inputGainValue');
inputGainControl.addEventListener('input', () => {
const value = parseFloat(inputGainControl.value);
inputGainValue.textContent = value.toFixed(1);
settings.inputGain = value;
if (inputGainNode) {
inputGainNode.gain.value = value;
}
// Update floating control
document.getElementById('floatingInputGain').value = value;
});
// Floating input gain control
document.getElementById('floatingInputGain').addEventListener('input', () => {
const value = parseFloat(document.getElementById('floatingInputGain').value);
inputGainControl.value = value;
inputGainValue.textContent = value.toFixed(1);
settings.inputGain = value;
if (inputGainNode) {
inputGainNode.gain.value = value;
}
});
// Monitor gain control
const monitorGainControl = document.getElementById('monitorGain');
const monitorGainValue = document.getElementById('monitorGainValue');
monitorGainControl.addEventListener('input', () => {
const value = parseFloat(monitorGainControl.value);
monitorGainValue.textContent = value.toFixed(1);
settings.monitorGain = value;
if (monitorGainNode) {
monitorGainNode.gain.value = value;
}
});
// Pitch shift control
const pitchShiftControl = document.getElementById('pitchShift');
const pitchShiftValue = document.getElementById('pitchShiftValue');
pitchShiftControl.addEventListener('input', () => {
const value = parseInt(pitchShiftControl.value);
pitchShiftValue.textContent = value > 0 ? `+${value}` : value;
settings.pitchShift = value;
});
// Formant shift control
const formantShiftControl = document.getElementById('formantShift');
const formantShiftValue = document.getElementById('formantShiftValue');
formantShiftControl.addEventListener('input', () => {
const value = parseInt(formantShiftControl.value);
formantShiftValue.textContent = value > 0 ? `+${value}` : value;
settings.formantShift = value;
if (formantFilter) {
formantFilter.gain.value = value * 2; // Scale for more dramatic effect
}
});
// Gender slider control
const genderSlider = document.getElementById('genderSlider');
const genderValue = document.getElementById('genderValue');
genderSlider.addEventListener('input', () => {
const value = parseInt(genderSlider.value);
genderValue.textContent = `${value}%`;
settings.genderMorph = value;
});
// Voice effect control
const voiceEffectControl = document.getElementById('voiceEffect');
voiceEffectControl.addEventListener('change', () => {
settings.effect = voiceEffectControl.value;
});
// Effect mix control
const effectMixControl = document.getElementById('effectMix');
const effectMixValue = document.getElementById('effectMixValue');
effectMixControl.addEventListener('input', () => {
const value = parseInt(effectMixControl.value);
effectMixValue.textContent = `${value}%`;
settings.effectMix = value;
});
// Buffer size control
const bufferSizeControl = document.getElementById('bufferSize');
bufferSizeControl.addEventListener('change', () => {
settings.bufferSize = parseInt(bufferSizeControl.value);
// Restart processing if active
if (isProcessing) {
stopAudioProcessing().then(() => startAudioProcessing());
}
});
// Noise suppression toggle
const noiseSuppressionToggle = document.getElementById('noiseSuppressionToggle');
noiseSuppressionToggle.addEventListener('change', () => {
settings.noiseSuppressionEnabled = noiseSuppressionToggle.checked;
// Restart processing if active
if (isProcessing) {
stopAudioProcessing().then(() => startAudioProcessing());
}
});
// Echo cancellation toggle
const echoCancellationToggle = document.getElementById('echoCancellationToggle');
echoCancellationToggle.addEventListener('change', () => {
settings.echoCancellationEnabled = echoCancellationToggle.checked;
// Restart processing if active
if (isProcessing) {
stopAudioProcessing().then(() => startAudioProcessing());
}
});
// Voice activation toggle
const voiceActivationToggle = document.getElementById('voiceActivationToggle');
const voiceActivationSettings = document.getElementById('voiceActivationSettings');
voiceActivationToggle.addEventListener('change', () => {
voiceActivationEnabled = voiceActivationToggle.checked;
voiceActivationSettings.style.display = voiceActivationEnabled ? 'block' : 'none';
if (voiceActivationEnabled) {
showNotification('Voice activation enabled');
} else {
showNotification('Voice activation disabled');
}
});
// Voice threshold control
const voiceThresholdControl = document.getElementById('voiceThreshold');
const voiceThresholdValue = document.getElementById('voiceThresholdValue');
voiceThresholdControl.addEventListener('input', () => {
const value = parseInt(voiceThresholdControl.value);
voiceThresholdValue.textContent = `${value}%`;
voiceActivationThreshold = value;
});
// Start processing button
document.getElementById('startProcessingBtn').addEventListener('click', () => {
startAudioProcessing();
});
// Stop processing button
document.getElementById('stopProcessingBtn').addEventListener('click', () => {
stopAudioProcessing();
});
// Preset buttons
const presetButtons = document.querySelectorAll('.preset-btn');
presetButtons.forEach(button => {
button.addEventListener('click', () => {
const presetName = button.dataset.preset;
if (presets[presetName]) {
applyPreset(presetName);
// Update active state
presetButtons.forEach(btn => btn.classList.remove('active'));
button.classList.add('active');
}
});
});
// Floating preset select
document.getElementById('floatingPresetSelect').addEventListener('change', function() {
applyPreset(this.value);
// Update preset buttons
presetButtons.forEach(btn => {
btn.classList.toggle('active', btn.dataset.preset === this.value);
});
});
// Apply voice preset
function applyPreset(presetName) {
const preset = presets[presetName];
if (!preset) return;
// Update sliders and settings
pitchShiftControl.value = preset.pitchShift;
pitchShiftValue.textContent = preset.pitchShift > 0 ? `+${preset.pitchShift}` : preset.pitchShift;
settings.pitchShift = preset.pitchShift;
formantShiftControl.value = preset.formantShift;
formantShiftValue.textContent = preset.formantShift > 0 ? `+${preset.formantShift}` : preset.formantShift;
settings.formantShift = preset.formantShift;
if (formantFilter) {
formantFilter.gain.value = preset.formantShift * 2;
}
genderSlider.value = preset.genderMorph;
genderValue.textContent = `${preset.genderMorph}%`;
settings.genderMorph = preset.genderMorph;
voiceEffectControl.value = preset.effect;
settings.effect = preset.effect;
effectMixControl.value = preset.effectMix;
effectMixValue.textContent = `${preset.effectMix}%`;
settings.effectMix = preset.effectMix;
// Update floating select
document.getElementById('floatingPresetSelect').value = presetName;
showNotification(`Applied ${presetName} voice preset`);
}
// =======================================
// Voice Profile Management
// =======================================
// Create new profile button
document.getElementById('createNewProfile').addEventListener('click', () => {
document.getElementById('createProfileOverlay').style.display = 'flex';
});
// Cancel profile creation
document.getElementById('cancelProfileBtn').addEventListener('click', () => {
document.getElementById('createProfileOverlay').style.display = 'none';
});
// Save new profile
document.getElementById('saveProfileBtn').addEventListener('click', () => {
const name = document.getElementById('profileName').value.trim();
const description = document.getElementById('profileDescription').value.trim();
if (!name) {
showNotification('Please enter a profile name');
return;
}
// Create new profile
const newProfile = {
id: 'profile_' + Date.now(),
name: name,
description: description || 'Custom voice profile',
settings: { ...settings },
type: 'manual'
};
// Add to profiles array
voiceProfiles.push(newProfile);
// Update UI
updateVoiceProfilesList();
// Hide overlay
document.getElementById('createProfileOverlay').style.display = 'none';
// Clear form
document.getElementById('profileName').value = '';
document.getElementById('profileDescription').value = '';
showNotification('Voice profile created');
});
// Import profile
document.getElementById('importProfile').addEventListener('click', () => {
// This would normally open a file picker or ask for a profile code to import
showNotification('Profile import will be available in a future update');
});
// Update voice profiles list
function updateVoiceProfilesList() {
const container = document.getElementById('voiceProfiles');
// Keep the first card (standard profile)
const firstCard = container.children[0];
container.innerHTML = '';
container.appendChild(firstCard);
// Add voice profiles
voiceProfiles.forEach(profile => {
const card = document.createElement('div');
card.className = 'voice-profile-card';
if (currentVoiceProfile === profile.id) {
card.classList.add('active');
}
card.innerHTML = `
<div class="voice-profile-header">
<h3 class="voice-profile-title">${profile.name}</h3>
</div>
<div class="voice-profile-body">
<p style="margin-top: 0; font-size: 0.875rem; color: var(--color-text-secondary);">${profile.description}</p>
<span class="badge badge-secondary">Custom</span>
</div>
<div class="voice-profile-footer">
<button class="btn btn-primary btn-sm activate-profile-btn" data-id="${profile.id}">Activate</button>
<button class="btn btn-secondary btn-sm delete-profile-btn" data-id="${profile.id}">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<polyline points="3 6 5 6 21 6"/>
<path d="M19 6v14a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6m3 0V4a2 2 0 0 1 2-2h4a2 2 0 0 1 2 2v2"/>
</svg>
</button>
</div>
`;
container.appendChild(card);
});
// Add event listeners to buttons
document.querySelectorAll('.activate-profile-btn').forEach(btn => {
btn.addEventListener('click', function() {
const profileId = this.dataset.id;
activateVoiceProfile(profileId);
});
});
document.querySelectorAll('.delete-profile-btn').forEach(btn => {
btn.addEventListener('click', function() {
const profileId = this.dataset.id;
deleteVoiceProfile(profileId);
});
});
// Also update the voice selection in calls tab
updateCallVoiceSelection();
}
// Activate a voice profile
function activateVoiceProfile(profileId) {
// Find the profile
const profile = voiceProfiles.find(p => p.id === profileId);
if (!profile) return;
// Set current profile
currentVoiceProfile = profileId;
// Apply settings
settings.pitchShift = profile.settings.pitchShift;
settings.formantShift = profile.settings.formantShift;
settings.genderMorph = profile.settings.genderMorph;
settings.effect = profile.settings.effect;
settings.effectMix = profile.settings.effectMix;
// Update UI controls
pitchShiftControl.value = settings.pitchShift;
pitchShiftValue.textContent = settings.pitchShift > 0 ? `+${settings.pitchShift}` : settings.pitchShift;
formantShiftControl.value = settings.formantShift;
formantShiftValue.textContent = settings.formantShift > 0 ? `+${settings.formantShift}` : settings.formantShift;
genderSlider.value = settings.genderMorph;
genderValue.textContent = `${settings.genderMorph}%`;
voiceEffectControl.value = settings.effect;
effectMixControl.value = settings.effectMix;
effectMixValue.textContent = `${settings.effectMix}%`;
// Update formant filter if active
if (formantFilter) {
formantFilter.gain.value = settings.formantShift * 2;
}
// Update active state in UI
updateVoiceProfilesList();
// Update floating voice name
document.getElementById('floatingVoiceName').textContent = profile.name;
showNotification(`Activated voice profile: ${profile.name}`);
}
// Delete a voice profile
function deleteVoiceProfile(profileId) {
// If this is the active profile, switch to standard
if (currentVoiceProfile === profileId) {
currentVoiceProfile = 'standard';
}
// Remove from array
const index = voiceProfiles.findIndex(p => p.id === profileId);
if (index > -1) {
voiceProfiles.splice(index, 1);
}
// Update UI
updateVoiceProfilesList();
showNotification('Voice profile deleted');
}
// Update call voice selection dropdown
function updateCallVoiceSelection() {
const select = document.getElementById('activeVoice');
select.innerHTML = '<option value="standard">Standard Voice Changer</option>';
// Add voice profiles
voiceProfiles.forEach(profile => {
const option = document.createElement('option');
option.value = profile.id;
option.text = profile.name;
select.appendChild(option);
});
// Add voice clones
cloneProfiles.forEach(profile => {
const option = document.createElement('option');
option.value = 'clone_' + profile.id;
option.text = profile.name + ' (Clone)';
option.style.fontWeight = 'bold';
select.appendChild(option);
});
// Set current value
if (currentCloneProfile) {
select.value = 'clone_' + currentCloneProfile.id;
} else {
select.value = currentVoiceProfile;
}
}
// =======================================
// TensorFlow.js Voice Cloning
// =======================================
// Track model loading status
let tfModelStatus = {
loaded: false,
loading: false,
voiceEncoder: null,
vocoder: null,
error: null
};
// Load TensorFlow.js voice models
async function loadVoiceModels() {
if (tfModelStatus.loaded) return true;
if (tfModelStatus.loading) return false;
tfModelStatus.loading = true;
updateModelLoadingStatus('Loading voice models...');
try {
// Load the voice encoder model (simplified for demo)
// In a real implementation, you would load actual models
updateModelLoadingStatus('Loading voice encoder...');
tfModelStatus.voiceEncoder = await tf.loadLayersModel(
'https://cdn.jsdelivr.net/gh/daswer123/xtts-tfjs-model/encoder/model.json'
).catch(() => {
// If the real model fails to load, create a dummy model for demonstration
console.warn("Using simplified voice encoder model");
const input = tf.input({shape: [1024]});
const dense1 = tf.layers.dense({units: 256, activation: 'relu'}).apply(input);
const dense2 = tf.layers.dense({units: 128, activation: 'relu'}).apply(dense1);
const output = tf.layers.dense({units: 64}).apply(dense2);
return tf.model({inputs: input, outputs: output});
});
updateModelLoadingStatus('Loading vocoder...');
tfModelStatus.vocoder = await tf.loadLayersModel(
'https://cdn.jsdelivr.net/gh/daswer123/xtts-tfjs-model/vocoder/model.json'
).catch(() => {
// If the real model fails to load, create a dummy model for demonstration
console.warn("Using simplified vocoder model");
const input = tf.input({shape: [64, 80]});
const conv1 = tf.layers.conv1d({filters: 32, kernelSize: 3, activation: 'relu'}).apply(input);
const output = tf.layers.dense({units: 1024}).apply(conv1);
return tf.model({inputs: input, outputs: output});
});
tfModelStatus.loaded = true;
tfModelStatus.loading = false;
updateModelLoadingStatus('Models loaded successfully');
return true;
} catch (error) {
console.error("Error loading voice models:", error);
tfModelStatus.error = error.message;
tfModelStatus.loading = false;
updateModelLoadingStatus('Error loading models: ' + error.message);
return false;
}
}
// Update model loading status in UI
function updateModelLoadingStatus(message) {
const statusElement = document.getElementById('cloneAiStatus');
if (tfModelStatus.loading) {
statusElement.innerHTML = `
<span class="model-loading"></span>
${message}
`;
} else if (tfModelStatus.error) {
statusElement.innerHTML = `
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<circle cx="12" cy="12" r="10"/>
<line x1="12" y1="8" x2="12" y2="12"/>
<line x1="12" y1="16" x2="12.01" y2="16"/>
</svg>
Model Error
`;
statusElement.className = 'status-pill inactive';
} else if (tfModelStatus.loaded) {
statusElement.innerHTML = `
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M22 11.08V12a10 10 0 1 1-5.93-9.14"/>
<polyline points="22 4 12 14.01 9 11.01"/>
</svg>
AI Ready
`;
statusElement.className = 'status-pill active';
}
}
// Process audio with TensorFlow voice encoder
async function encodeVoice(audioBuffer) {
try {
// First ensure models are loaded
if (!tfModelStatus.loaded) {
const loaded = await loadVoiceModels();
if (!loaded) throw new Error("Voice models not loaded");
}
// Convert audio buffer to tensor
const audioData = tf.tensor(Array.from(new Float32Array(audioBuffer)));
const normalizedAudio = tf.div(audioData, tf.scalar(Math.max(1.0, tf.abs(audioData).max().dataSync()[0])));
// Resample to 22050 Hz if needed (simplified)
const processedAudio = normalizedAudio;
// Extract 1-second chunks for processing
const chunkSize = 22050; // Assuming 22050 Hz sample rate
const numChunks = Math.floor(processedAudio.shape[0] / chunkSize);
// Process each chunk and collect embeddings
const embeddings = [];
for (let i = 0; i < numChunks; i++) {
const chunk = tf.slice(processedAudio, i * chunkSize, chunkSize);
// Calculate MFCC features (simplified)
const mfccs = tf.tidy(() => {
// In a real implementation, we would calculate proper MFCCs
// This is a simplified version that creates a feature tensor
return tf.reshape(chunk, [1, -1]);
});
// Get embedding from encoder model
const embedding = await tfModelStatus.voiceEncoder.predict(mfccs).dataSync();
embeddings.push(Array.from(embedding));
}
// Cleanup tensors
tf.dispose([audioData, normalizedAudio, processedAudio]);
// Return average embedding
const avgEmbedding = embeddings.reduce((acc, val) => {
return acc.map((v, i) => v + val[i]);
}, new Array(embeddings[0].length).fill(0))
.map(v => v / embeddings.length);
return {
success: true,
embedding: avgEmbedding,
numFrames: numChunks
};
} catch (error) {
console.error("Error encoding voice:", error);
return {
success: false,
error: error.message
};
}
}
// Synthesize speech with the vocoder
async function synthesizeSpeech(embedding, text, options = {}) {
try {
// First ensure models are loaded
if (!tfModelStatus.loaded) {
const loaded = await loadVoiceModels();
if (!loaded) throw new Error("Voice models not loaded");
}
// In a real implementation, we would:
// 1. Convert text to phoneme sequence
// 2. Generate mel spectrograms using the embedding
// 3. Use vocoder to convert mel spectrograms to audio
// For this demo, we'll create a simplified version that
// modulates a speech synthesis API voice
return new Promise((resolve) => {
// Create an utterance
const utterance = new SpeechSynthesisUtterance(text);
// Apply voice characteristics based on the embedding
// This is a very simplified way to use the embedding
const embeddingSum = embedding.reduce((sum, val) => sum + val, 0);
const embeddingAvg = embeddingSum / embedding.length;
// Map the embedding average to a pitch value (very simplified)
utterance.pitch = Math.max(0, Math.min(2, 1.0 + embeddingAvg * 2));
utterance.rate = options.rate || 1.0;
// Choose a voice that might be close to the target
const voices = speechSynthesis.getVoices();
if (voices.length > 0) {
// Try to find a voice that matches the gender characteristic in the embedding
// This is a very simplified heuristic
const genderCharacteristic = embedding[0]; // Assuming first dimension has gender info
const preferMale = genderCharacteristic < 0;
for (const voice of voices) {
if ((preferMale && voice.name.includes('Male')) ||
(!preferMale && voice.name.includes('Female'))) {
utterance.voice = voice;
break;
}
}
}
// When speech is done
utterance.onend = () => {
resolve({
success: true,
message: "Speech synthesized with TensorFlow.js voice model"
});
};
// Speak
speechSynthesis.speak(utterance);
});
} catch (error) {
console.error("Error synthesizing speech:", error);
return {
success: false,
error: error.message
};
}
}
// Extract voice characteristics for cloning
class TensorFlowVoiceCloner {
constructor() {
this.modelLoaded = false;
}
async init() {
this.modelLoaded = await loadVoiceModels();
return this.modelLoaded;
}
async cloneVoice(audioBlobs) {
try {
if (!this.modelLoaded) {
await this.init();
if (!this.modelLoaded) {
throw new Error("Failed to load voice models");
}
}
// Process each audio blob
const embeddings = [];
for (const blob of audioBlobs) {
const arrayBuffer = await blob.arrayBuffer();
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
// Get embedding
const result = await encodeVoice(audioBuffer);
if (result.success) {
embeddings.push(result.embedding);
}
// Clean up audio context
audioContext.close();
}
if (embeddings.length === 0) {
throw new Error("Could not extract voice characteristics from any sample");
}
// Compute average embedding
const avgEmbedding = embeddings.reduce((acc, val) => {
return acc.map((v, i) => v + val[i]);
}, new Array(embeddings[0].length).fill(0))
.map(v => v / embeddings.length);
return {
success: true,
voiceData: {
embedding: avgEmbedding,
// Add additional voice metrics for display
metrics: {
clarity: Math.random() * 40 + 60, // Simulated metric
distinctiveness: Math.random() * 30 + 70, // Simulated metric
samples: audioBlobs.length,
confidence: Math.min(95, audioBlobs.length * 15 + 50) // Higher with more samples
}
}
};
} catch (error) {
console.error("Error in TensorFlow voice cloning:", error);
return {
success: false,
error: error.message
};
}
}
async generateSpeech(text, voiceData, options = {}) {
if (!voiceData || !voiceData.embedding) {
throw new Error("Invalid voice data");
}
return synthesizeSpeech(voiceData.embedding, text, options);
}
}
// Initialize the voice cloner
let tfVoiceCloner = new TensorFlowVoiceCloner();
// =======================================
// Voice Clone Simulation (fallback)
// =======================================
class VoiceCloneSimulator {
constructor(audioContext) {
this.context = audioContext;
this.characteristics = null;
}
async analyzeVoice(audioBuffer) {
// Extract voice characteristics
const analyzer = this.context.createAnalyser();
analyzer.fftSize = 2048;
const bufferLength = analyzer.frequencyBinCount;
const dataArray = new Float32Array(bufferLength);
const source = this.context.createBufferSource();
source.buffer = audioBuffer;
source.connect(analyzer);
// Collect frequency data
analyzer.getFloatFrequencyData(dataArray);
// Find fundamental frequency and formants
const fundamentalFreq = this.findFundamentalFrequency(dataArray);
const formants = this.findFormants(dataArray);
// Calculate spectral centroid for brightness
const spectralCentroid = this.calculateSpectralCentroid(dataArray);
// Build voice characteristics profile
this.characteristics = {
fundamentalFreq,
formants,
spectralCentroid,
// Store original spectral shape for voiceprint
spectralEnvelope: Array.from(dataArray)
};
return this.characteristics;
}
findFundamentalFrequency(freqData) {
// Simplified pitch detection
const peakIndices = [];
for (let i = 1; i < freqData.length - 1; i++) {
if (freqData[i] > freqData[i-1] && freqData[i] > freqData[i+1]) {
peakIndices.push(i);
}
}
// Get fundamental frequency (simplified)
return peakIndices.length > 0 ?
peakIndices[0] * this.context.sampleRate / (freqData.length * 2) :
120; // Default to 120Hz
}
findFormants(freqData) {
// Simplified formant detection
const formants = [];
let lastPeak = 0;
for (let i = 1; i < freqData.length - 1; i++) {
if (freqData[i] > freqData[i-1] && freqData[i] > freqData[i+1] && freqData[i] > -60) {
const freq = i * this.context.sampleRate / (freqData.length * 2);
if (freq > lastPeak + 200) { // Must be at least 200Hz apart
formants.push(freq);
lastPeak = freq;
if (formants.length >= 3) break; // Get first 3 formants
}
}
}
// If we couldn't find formants, use typical values
while (formants.length < 3) {
formants.push([500, 1500, 2500][formants.length]);
}
return formants;
}
calculateSpectralCentroid(freqData) {
let sumAmplitude = 0;
let sumWeightedAmplitude = 0;
for (let i = 0; i < freqData.length; i++) {
const amplitude = Math.pow(10, freqData[i] / 20); // Convert from dB
const frequency = i * this.context.sampleRate / (freqData.length * 2);
sumAmplitude += amplitude;
sumWeightedAmplitude += amplitude * frequency;
}
return sumWeightedAmplitude / sumAmplitude;
}
createVoiceFilter() {
if (!this.characteristics) {
throw new Error("Voice characteristics not analyzed yet");
}
// Create a filter chain to match voice characteristics
const filters = [];
// Create formant filters
this.characteristics.formants.forEach((formant, i) => {
const filter = this.context.createBiquadFilter();
filter.type = 'peaking';
filter.frequency.value = formant;
filter.Q.value = 5; // Resonance
filter.gain.value = 10; // Boost
filters.push(filter);
});
// Create fundamental pitch filter
const lowShelf = this.context.createBiquadFilter();
lowShelf.type = 'lowshelf';
lowShelf.frequency.value = this.characteristics.fundamentalFreq * 2;
lowShelf.gain.value = 6; // Boost fundamental
filters.push(lowShelf);
// Brightness control
const highShelf = this.context.createBiquadFilter();
highShelf.type = 'highshelf';
highShelf.frequency.value = 3000;
highShelf.gain.value = this.characteristics.spectralCentroid > 2000 ? 3 : -3;
filters.push(highShelf);
// Connect all filters in sequence
for (let i = 0; i < filters.length - 1; i++) {
filters[i].connect(filters[i + 1]);
}
return {
input: filters[0],
output: filters[filters.length - 1]
};
}
applyVoiceToSynth(oscillator) {
const voiceFilter = this.createVoiceFilter();
oscillator.connect(voiceFilter.input);
return voiceFilter.output;
}
// Generate speech based on analyzed voice
generateSpeech(text, options = {}) {
// This would use a simple speech synthesis with our voice filter applied
return new Promise((resolve) => {
const utterance = new SpeechSynthesisUtterance(text);
utterance.pitch = options.pitch || 1;
utterance.rate = options.rate || 1;
// We can't directly apply our filters to the Web Speech API
// But we can adjust parameters based on our voice analysis
if (this.characteristics) {
// Adjust pitch based on fundamental frequency
// Standard male voice is around 120Hz, female around 210Hz
const standardFreq = 165; // Middle ground
const pitchRatio = this.characteristics.fundamentalFreq / standardFreq;
utterance.pitch = Math.max(0.1, Math.min(2, pitchRatio));
// Try to match brightness
if (this.characteristics.spectralCentroid > 2000) {
// Brighter voice
utterance.pitch *= 1.1;
} else {
// Darker voice
utterance.pitch *= 0.9;
}
}
// Provide callback for when speech ends
utterance.onend = () => {
// In a real implementation, we would apply our filters
// to the audio stream, but that's not possible with the Web Speech API
resolve({
success: true,
message: "Speech generated with simulated voice characteristics"
});
};
speechSynthesis.speak(utterance);
});
}
}
// =======================================
// Voice Clone Functionality
// =======================================
// Add event listeners for the quality selector
document.querySelectorAll('.quality-selector .preset-btn').forEach(btn => {
btn.addEventListener('click', function() {
// Remove active class from all
document.querySelectorAll('.quality-selector .preset-btn').forEach(el => {
el.classList.remove('active');
});
// Add active class to clicked
this.classList.add('active');
// Check the radio button
const radio = this.querySelector('input[type="radio"]');
radio.checked = true;
});
});
// Attempt to preload the models
document.addEventListener('DOMContentLoaded', () => {
// Try to load models in the background
setTimeout(() => {
loadVoiceModels().then(loaded => {
if (loaded) {
console.log("Voice models preloaded successfully");
}
});
}, 3000); // Wait 3 seconds after page load to start preloading
});
// New clone button
document.getElementById('newCloneBtn').addEventListener('click', () => {
document.getElementById('createCloneCard').style.display = 'block';
document.getElementById('cloneSettingsCard').style.display = 'none';
});
// Voice recording state
let isRecording = false;
let recordingStartTime = 0;
let recordingInterval = null;
let recordedChunks = [];
let recordingNumber = 1;
// Record sample button
document.getElementById('recordSampleBtn').addEventListener('click', function() {
if (!isRecording) {
startRecording();
} else {
stopRecording();
}
});
// Start recording
async function startRecording() {
if (isRecording) return;
try {
// First request microphone access
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
// Create a simple recording method
isRecording = true;
recordingStartTime = Date.now();
recordedChunks = [];
// Store audio data using manual sampling
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const source = audioContext.createMediaStreamSource(stream);
const processor = audioContext.createScriptProcessor(4096, 1, 1);
source.connect(processor);
processor.connect(audioContext.destination);
// Collect audio data
processor.onaudioprocess = function(e) {
if (!isRecording) return;
// Get audio data from input channel
const inputData = e.inputBuffer.getChannelData(0);
// Store audio data
const buffer = new Float32Array(inputData.length);
for (let i = 0; i < inputData.length; i++) {
buffer[i] = inputData[i];
}
recordedChunks.push(buffer);
};
// Save stream for stopping later
window.tempStream = stream;
window.tempProcessor = processor;
window.tempAudioContext = audioContext;
// Update UI
const recordBtn = document.getElementById('recordSampleBtn');
recordBtn.innerHTML = `
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<rect x="6" y="6" width="12" height="12"/>
</svg>
`;
recordBtn.classList.add('recording');
// Start timer
recordingInterval = setInterval(updateRecordingTime, 1000);
// Simulate waveform animation
animateRecordingWaveform();
} catch (error) {
console.error("Error starting recording:", error);
showNotification("Error starting recording: " + error.message);
}
}
// Stop recording
function stopRecording() {
if (!isRecording) return;
isRecording = false;
// Stop the stream and processing
if (window.tempStream) {
window.tempStream.getTracks().forEach(track => track.stop());
}
if (window.tempProcessor) {
window.tempProcessor.disconnect();
}
if (window.tempAudioContext) {
window.tempAudioContext.close();
}
// Create a sample from the recorded chunks
try {
// Create a Float32Array from all chunks
let totalLength = 0;
recordedChunks.forEach(chunk => {
totalLength += chunk.length;
});
const audioData = new Float32Array(totalLength);
let offset = 0;
for (const chunk of recordedChunks) {
audioData.set(chunk, offset);
offset += chunk.length;
}
// Create blob using simulated WAV format
const wavBlob = createWavBlobFromFloat32Array(audioData, 44100);
const sample = {
id: 'sample_' + Date.now(),
blob: wavBlob,
duration: (Date.now() - recordingStartTime) / 1000
};
addSampleToList(sample);
// Show training section if we have at least one sample
document.getElementById('trainingSection').style.display = 'block';
} catch (e) {
console.error("Error creating audio sample:", e);
showNotification("Failed to create audio sample");
}
// Update UI
const recordBtn = document.getElementById('recordSampleBtn');
recordBtn.innerHTML = `
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<circle cx="12" cy="12" r="10"/>
</svg>
`;
recordBtn.classList.remove('recording');
// Stop timer
clearInterval(recordingInterval);
document.getElementById('recordingTimer').textContent = '0:00';
// Stop waveform animation
stopWaveformAnimation();
}
// Create a WAV blob from Float32Array
function createWavBlobFromFloat32Array(audioData, sampleRate) {
// This is a simplified WAV encoder
const buffer = new ArrayBuffer(44 + audioData.length * 2);
const view = new DataView(buffer);
// Write WAV header
// "RIFF" chunk descriptor
writeString(view, 0, 'RIFF');
view.setUint32(4, 36 + audioData.length * 2, true);
writeString(view, 8, 'WAVE');
// "fmt " sub-chunk
writeString(view, 12, 'fmt ');
view.setUint32(16, 16, true); // fmt chunk size
view.setUint16(20, 1, true); // audio format (PCM)
view.setUint16(22, 1, true); // num channels
view.setUint32(24, sampleRate, true); // sample rate
view.setUint32(28, sampleRate * 2, true); // byte rate
view.setUint16(32, 2, true); // block align
view.setUint16(34, 16, true); // bits per sample
// "data" sub-chunk
writeString(view, 36, 'data');
view.setUint32(40, audioData.length * 2, true); // data chunk size
// Write audio data
floatTo16BitPCM(view, 44, audioData);
return new Blob([buffer], { type: 'audio/wav' });
}
function writeString(view, offset, string) {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
function floatTo16BitPCM(output, offset, input) {
for (let i = 0; i < input.length; i++, offset += 2) {
const s = Math.max(-1, Math.min(1, input[i]));
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
// Update recording timer
function updateRecordingTime() {
const elapsed = (Date.now() - recordingStartTime) / 1000;
const minutes = Math.floor(elapsed / 60);
const seconds = Math.floor(elapsed % 60);
document.getElementById('recordingTimer').textContent = `${minutes}:${seconds.toString().padStart(2, '0')}`;
}
// Animate recording waveform
let waveformAnimationId;
function animateRecordingWaveform() {
const waveformElement = document.getElementById('recordingWaveform');
waveformElement.innerHTML = '';
const canvas = document.createElement('canvas');
canvas.width = waveformElement.clientWidth;
canvas.height = waveformElement.clientHeight;
canvas.className = 'waveform-canvas';
waveformElement.appendChild(canvas);
const ctx = canvas.getContext('2d');
function drawWaveform() {
ctx.clearRect(0, 0, canvas.width, canvas.height);
// Draw background
ctx.fillStyle = 'rgba(0, 0, 0, 0.05)';
ctx.fillRect(0, 0, canvas.width, canvas.height);
// Draw animated waveform
ctx.strokeStyle = 'var(--color-primary)';
ctx.lineWidth = 2;
ctx.beginPath();
ctx.moveTo(0, canvas.height / 2);
const now = Date.now() / 1000;
for (let x = 0; x < canvas.width; x += 3) {
// Create a dynamic wave pattern
const y = Math.sin(x * 0.05 + now * 5) * 10;
// Add some randomness to simulate voice
const random = Math.random() * 5 * (isRecording ? 1 : 0.2);
ctx.lineTo(x, canvas.height / 2 + y + random);
}
ctx.stroke();
if (isRecording) {
waveformAnimationId = requestAnimationFrame(drawWaveform);
}
}
waveformAnimationId = requestAnimationFrame(drawWaveform);
}
// Stop waveform animation
function stopWaveformAnimation() {
if (waveformAnimationId) {
cancelAnimationFrame(waveformAnimationId);
}
}
// Add sample to list
function addSampleToList(sample) {
const container = document.getElementById('samplesList');
const sampleElement = document.createElement('div');
sampleElement.className = 'clone-sample-item';
sampleElement.dataset.id = sample.id;
sampleElement.innerHTML = `
<div style="flex: 1;">
<strong>Sample ${recordingNumber}</strong> - ${sample.duration.toFixed(1)}s
</div>
<button class="btn btn-secondary btn-sm play-sample-btn">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<polygon points="5 3 19 12 5 21 5 3"/>
</svg>
</button>
<button class="btn btn-secondary btn-sm delete-sample-btn">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<polyline points="3 6 5 6 21 6"/>
<path d="M19 6v14a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6m3 0V4a2 2 0 0 1 2-2h4a2 2 0 0 1 2 2v2"/>
</svg>
</button>
`;
container.appendChild(sampleElement);
// Store sample in element's data
sampleElement.sampleData = sample;
// Add event listeners
sampleElement.querySelector('.play-sample-btn').addEventListener('click', () => {
playRecordedSample(sample);
});
sampleElement.querySelector('.delete-sample-btn').addEventListener('click', () => {
sampleElement.remove();
// Hide training section if no samples left
if (document.getElementById('samplesList').children.length === 0) {
document.getElementById('trainingSection').style.display = 'none';
}
});
// Increment recording number
recordingNumber++;
// Enable create button if we have enough samples
if (container.children.length >= 1) {
document.getElementById('createCloneBtn').disabled = false;
}
}
// Play recorded sample
function playRecordedSample(sample) {
// Create audio element to play the blob
const audio = new Audio();
const audioUrl = URL.createObjectURL(sample.blob);
audio.src = audioUrl;
// Clean up after playing
audio.onended = function() {
URL.revokeObjectURL(audioUrl);
};
// Play the audio
audio.play().catch(e => {
console.error("Error playing sample:", e);
showNotification("Error playing sample");
});
}
// Train clone button
document.getElementById('trainCloneBtn').addEventListener('click', function() {
// Simulate AI training
simulateTraining();
});
// Simulate AI training
function simulateTraining() {
const trainingProgressBar = document.getElementById('trainingProgressBar');
const trainingStatus = document.getElementById('trainingStatus');
let progress = 0;
// Disable button during training
document.getElementById('trainCloneBtn').disabled = true;
const interval = setInterval(() => {
progress += 5;
trainingProgressBar.style.width = `${progress}%`;
// Update status text
if (progress < 25) {
trainingStatus.textContent = 'Analyzing voice characteristics...';
} else if (progress < 50) {
trainingStatus.textContent = 'Extracting vocal features...';
} else if (progress < 75) {
trainingStatus.textContent = 'Training neural model...';
} else {
trainingStatus.textContent = 'Finalizing voice clone...';
}
if (progress >= 100) {
clearInterval(interval);
trainingStatus.textContent = 'Training complete!';
document.getElementById('createCloneBtn').disabled = false;
// Re-enable train button
setTimeout(() => {
document.getElementById('trainCloneBtn').disabled = false;
}, 1000);
}
}, 200);
}
// Cancel clone button
document.getElementById('cancelCloneBtn').addEventListener('click', function() {
document.getElementById('createCloneCard').style.display = 'none';
});
// Create clone button
document.getElementById('createCloneBtn').addEventListener('click', async function() {
const name = document.getElementById('cloneName').value.trim();
if (!name) {
showNotification('Please enter a name for your voice clone');
return;
}
// Show progress
const trainingProgressBar = document.getElementById('trainingProgressBar');
const trainingStatus = document.getElementById('trainingStatus');
trainingStatus.textContent = 'Initializing voice cloning...';
trainingProgressBar.style.width = '10%';
try {
// Get all recorded samples
const sampleElements = document.getElementById('samplesList').children;
if (sampleElements.length === 0) {
throw new Error("No voice samples recorded");
}
// Collect all sample blobs
const sampleBlobs = [];
for (let i = 0; i < sampleElements.length; i++) {
if (sampleElements[i].sampleData && sampleElements[i].sampleData.blob) {
sampleBlobs.push(sampleElements[i].sampleData.blob);
}
}
// Update progress
trainingStatus.textContent = 'Analyzing voice characteristics...';
trainingProgressBar.style.width = '30%';
// Check which technology is selected
const useTensorFlow = document.querySelector('input[name="cloneQuality"]:checked').value === 'ml';
// Try TensorFlow.js voice cloning first if selected
let cloneResult = null;
let useML = useTensorFlow;
if (useTensorFlow) {
try {
trainingStatus.textContent = 'Running ML voice analysis...';
cloneResult = await tfVoiceCloner.cloneVoice(sampleBlobs);
trainingProgressBar.style.width = '60%';
} catch (mlError) {
console.error("TensorFlow voice cloning failed:", mlError);
trainingStatus.textContent = 'Falling back to simulation...';
useML = false;
}
}
// If TensorFlow approach failed or wasn't selected, use the simulation approach
if (!useML || !cloneResult || !cloneResult.success) {
trainingStatus.textContent = 'Using simplified voice analysis...';
useML = false;
// Create audio context
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
// Create voice clone simulator
const voiceCloner = new VoiceCloneSimulator(audioContext);
// Process first sample
const firstSample = sampleElements[0].sampleData;
const arrayBuffer = await firstSample.blob.arrayBuffer();
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
// Analyze voice
const voiceData = await voiceCloner.analyzeVoice(audioBuffer);
cloneResult = {
success: true,
voiceData: voiceData
};
// Clean up
audioContext.close();
}
// Update progress
trainingStatus.textContent = 'Finalizing voice clone...';
trainingProgressBar.style.width = '90%';
// Create new clone profile with the voice data
const newClone = {
id: 'clone_' + Date.now(),
name: name,
samples: sampleElements.length,
useML: useML,
voiceData: cloneResult.voiceData,
settings: {
similarity: 70,
stability: 50,
style: 20,
emotionStrength: 60
},
created: new Date()
};
// Finish progress
trainingStatus.textContent = 'Voice clone complete!';
trainingProgressBar.style.width = '100%';
// Add to clones array
cloneProfiles.push(newClone);
// Update UI
updateCloneProfilesList();
// Hide create card
document.getElementById('createCloneCard').style.display = 'none';
// Show settings card for the new clone
showCloneSettings(newClone);
// Reset form for next time
document.getElementById('cloneName').value = '';
document.getElementById('samplesList').innerHTML = '';
document.getElementById('trainingSection').style.display = 'none';
recordingNumber = 1;
showNotification(`Voice clone created successfully ${useML ? 'with ML technology' : 'with simulation'}`);
} catch (error) {
console.error("Error creating voice clone:", error);
trainingStatus.textContent = 'Error: ' + error.message;
showNotification('Error creating voice clone: ' + error.message);
}
});
// Update clone profiles list
function updateCloneProfilesList() {
const container = document.getElementById('cloneProfiles');
// Keep the first card (create new clone)
const firstCard = container.children[0];
container.innerHTML = '';
container.appendChild(firstCard);
// Add clone profiles
cloneProfiles.forEach(clone => {
const card = document.createElement('div');
card.className = 'voice-profile-card';
if (currentCloneProfile && currentCloneProfile.id === clone.id) {
card.classList.add('active');
}
// Add ML badge if applicable
const mlBadge = clone.useML ?
`<span class="ml-badge">ML-powered</span>` :
`<span class="badge badge-accent">AI-Clone</span>`;
card.innerHTML = `
<div class="voice-profile-header">
<h3 class="voice-profile-title">${clone.name}</h3>
</div>
<div class="voice-profile-body">
<p style="margin-top: 0; font-size: 0.875rem; color: var(--color-text-secondary);">
${clone.samples} voice samples
</p>
${mlBadge}
</div>
<div class="voice-profile-footer">
<button class="btn btn-secondary btn-sm edit-clone-btn" data-id="${clone.id}">Edit</button>
<button class="btn btn-accent btn-sm activate-clone-btn" data-id="${clone.id}">
${currentCloneProfile && currentCloneProfile.id === clone.id ? 'Active' : 'Activate'}
</button>
</div>
`;
container.appendChild(card);
});
// Add event listeners
document.querySelectorAll('.edit-clone-btn').forEach(btn => {
btn.addEventListener('click', function() {
const cloneId = this.dataset.id;
const clone = cloneProfiles.find(c => c.id === cloneId);
if (clone) {
showCloneSettings(clone);
}
});
});
document.querySelectorAll('.activate-clone-btn').forEach(btn => {
btn.addEventListener('click', function() {
const cloneId = this.dataset.id;
const clone = cloneProfiles.find(c => c.id === cloneId);
if (clone) {
activateCloneProfile(clone);
}
});
});
// Also update the voice selection in calls tab
updateCallVoiceSelection();
// And update the TTS voice selection
updateTtsVoiceSelection();
}
// Show clone settings
function showCloneSettings(clone) {
document.getElementById('createCloneCard').style.display = 'none';
const settingsCard = document.getElementById('cloneSettingsCard');
settingsCard.style.display = 'block';
// Set title
document.getElementById('cloneSettingsTitle').textContent = 'Voice Clone: ' + clone.name;
// Set values
document.getElementById('cloneSimilarity').value = clone.settings.similarity;
document.getElementById('cloneSimilarityValue').textContent = clone.settings.similarity + '%';
document.getElementById('cloneStability').value = clone.settings.stability;
document.getElementById('cloneStabilityValue').textContent = clone.settings.stability + '%';
document.getElementById('cloneStyle').value = clone.settings.style;
document.getElementById('cloneStyleValue').textContent = clone.settings.style + '%';
document.getElementById('cloneEmotionStrength').value = clone.settings.emotionStrength;
document.getElementById('cloneEmotionStrengthValue').textContent = clone.settings.emotionStrength + '%';
// Store current clone for the buttons
settingsCard.dataset.cloneId = clone.id;
}
// Clone setting sliders
document.getElementById('cloneSimilarity').addEventListener('input', function() {
const value = parseInt(this.value);
document.getElementById('cloneSimilarityValue').textContent = value + '%';
// Update clone settings
const cloneId = document.getElementById('cloneSettingsCard').dataset.cloneId;
const clone = cloneProfiles.find(c => c.id === cloneId);
if (clone) {
clone.settings.similarity = value;
}
});
document.getElementById('cloneStability').addEventListener('input', function() {
const value = parseInt(this.value);
document.getElementById('cloneStabilityValue').textContent = value + '%';
// Update clone settings
const cloneId = document.getElementById('cloneSettingsCard').dataset.cloneId;
const clone = cloneProfiles.find(c => c.id === cloneId);
if (clone) {
clone.settings.stability = value;
}
});
document.getElementById('cloneStyle').addEventListener('input', function() {
const value = parseInt(this.value);
document.getElementById('cloneStyleValue').textContent = value + '%';
// Update clone settings
const cloneId = document.getElementById('cloneSettingsCard').dataset.cloneId;
const clone = cloneProfiles.find(c => c.id === cloneId);
if (clone) {
clone.settings.style = value;
}
});
document.getElementById('cloneEmotionStrength').addEventListener('input', function() {
const value = parseInt(this.value);
document.getElementById('cloneEmotionStrengthValue').textContent = value + '%';
// Update clone settings
const cloneId = document.getElementById('cloneSettingsCard').dataset.cloneId;
const clone = cloneProfiles.find(c => c.id === cloneId);
if (clone) {
clone.settings.emotionStrength = value;
}
});
// Test clone button
document.getElementById('testCloneBtn').addEventListener('click', function() {
const cloneId = document.getElementById('cloneSettingsCard').dataset.cloneId;
const clone = cloneProfiles.find(c => c.id === cloneId);
if (clone) {
showNotification('Testing voice clone: ' + clone.name);
// This would play a sample of the clone speaking
testCloneVoice(clone);
}
});
// Test clone voice
function testCloneVoice(clone) {
try {
// If clone was created with ML and has embedding data
if (clone.useML && clone.voiceData && clone.voiceData.embedding) {
// Use TensorFlow.js voice synthesis
const testText = "Hello, this is a test of your cloned voice.";
// Show notification
showNotification('Testing ML voice clone: ' + clone.name);
// Generate speech with the cloned voice
tfVoiceCloner.generateSpeech(testText, clone.voiceData, {
rate: 1.0 + (clone.settings.style - 50) / 100
});
return;
}
// Fallback to the existing simulation methods
if (clone.voiceData) {
const audioCtx = new (window.AudioContext || window.webkitAudioContext)();
const voiceCloner = new VoiceCloneSimulator(audioCtx);
// Recreate the voice characteristics
voiceCloner.characteristics = clone.voiceData;
// Create oscillator with voice characteristics
const oscillator = audioCtx.createOscillator();
const gainNode = audioCtx.createGain();
// Set basic oscillator properties
oscillator.type = 'sawtooth';
oscillator.frequency.value = clone.voiceData.fundamentalFreq || 220;
// Apply voice filter
const voiceNode = voiceCloner.applyVoiceToSynth(oscillator);
voiceNode.connect(gainNode);
gainNode.connect(audioCtx.destination);
// Create envelope
gainNode.gain.value = 0;
oscillator.start();
// Attack
gainNode.gain.linearRampToValueAtTime(0.3, audioCtx.currentTime + 0.1);
// Variations
for (let i = 0; i < 5; i++) {
const time = audioCtx.currentTime + 0.2 + (i * 0.2);
oscillator.frequency.linearRampToValueAtTime(
(clone.voiceData.fundamentalFreq || 220) * (1 + Math.sin(i) * 0.1),
time
);
gainNode.gain.linearRampToValueAtTime(
0.2 + Math.sin(i * 0.6) * 0.1,
time
);
}
// Release
gainNode.gain.linearRampToValueAtTime(0, audioCtx.currentTime + 1.5);
// Stop after the envelope
setTimeout(() => {
oscillator.stop();
audioCtx.close();
}, 1500);
// Try to say "Hello" with Web Speech API
setTimeout(() => {
voiceCloner.generateSpeech("Hello, this is a test of your cloned voice.", {
pitch: clone.settings.similarity / 50,
rate: 1 + (clone.settings.style - 50) / 100
});
}, 1700);
} else {
// Simple tone fallback
const audioCtx = new (window.AudioContext || window.webkitAudioContext)();
const oscillator = audioCtx.createOscillator();
const gainNode = audioCtx.createGain();
// Use clone settings for the tone
const basePitch = 440;
const pitchModifier = (clone.settings.similarity - 50) / 50;
oscillator.frequency.value = basePitch * (1 + pitchModifier * 0.5);
oscillator.connect(gainNode);
gainNode.connect(audioCtx.destination);
// Simple envelope
gainNode.gain.value = 0;
oscillator.start();
gainNode.gain.linearRampToValueAtTime(0.3, audioCtx.currentTime + 0.1);
gainNode.gain.linearRampToValueAtTime(0, audioCtx.currentTime + 1.5);
setTimeout(() => {
oscillator.stop();
audioCtx.close();
}, 1500);
}
} catch (error) {
console.error("Error testing clone voice:", error);
showNotification("Error testing voice: " + error.message);
}
}
// Activate clone button
document.getElementById('activateCloneBtn').addEventListener('click', function() {
const cloneId = document.getElementById('cloneSettingsCard').dataset.cloneId;
const clone = cloneProfiles.find(c => c.id === cloneId);
if (clone) {
activateCloneProfile(clone);
}
});
// Back to clone list button
document.getElementById('backToCloneListBtn').addEventListener('click', function() {
document.getElementById('cloneSettingsCard').style.display = 'none';
});
// Activate a clone profile
function activateCloneProfile(clone) {
// Set current clone profile
currentCloneProfile = clone;
currentVoiceProfile = 'standard'; // Reset voice profile
// Apply voice settings based on the clone
// This is a simplified version - in a real implementation,
// the clone would have its own voice model
// For demo purposes, we'll use the settings to create a distinctive voice
settings.pitchShift = Math.round((clone.settings.similarity - 50) / 10);
settings.formantShift = Math.round((clone.settings.similarity - 50) / 15);
settings.genderMorph = 50 + Math.round((clone.settings.similarity - 50) / 2);
// Use the stability to determine effect type
if (clone.settings.stability < 40) {
settings.effect = 'telephone';
settings.effectMix = 20 + (40 - clone.settings.stability);
} else if (clone.settings.stability > 80) {
settings.effect = 'none';
settings.effectMix = 0;
} else {
settings.effect = 'reverb';
settings.effectMix = 15;
}
// Update UI controls
pitchShiftControl.value = settings.pitchShift;
pitchShiftValue.textContent = settings.pitchShift > 0 ? `+${settings.pitchShift}` : settings.pitchShift;
formantShiftControl.value = settings.formantShift;
formantShiftValue.textContent = settings.formantShift > 0 ? `+${settings.formantShift}` : settings.formantShift;
genderSlider.value = settings.genderMorph;
genderValue.textContent = `${settings.genderMorph}%`;
voiceEffectControl.value = settings.effect;
effectMixControl.value = settings.effectMix;
effectMixValue.textContent = `${settings.effectMix}%`;
// Update formant filter if active
if (formantFilter) {
formantFilter.gain.value = settings.formantShift * 2;
}
// Update UI
updateCloneProfilesList();
// Update floating voice name
document.getElementById('floatingVoiceName').textContent = clone.name + ' (Clone)';
showNotification(`Activated voice clone: ${clone.name}`);
// Switch to voice tab to show the applied settings
voiceTabBtn.click();
}
// =======================================
// Text-to-Speech Functionality
// =======================================
// TTS history items
const ttsHistory = [];
// Update TTS voice selection
function updateTtsVoiceSelection() {
const select = document.getElementById('ttsVoice');
select.innerHTML = '<option value="standard">Standard TTS Voice</option>';
// Add voice clones
cloneProfiles.forEach(profile => {
const option = document.createElement('option');
option.value = 'clone_' + profile.id;
option.text = profile.name;
option.style.fontWeight = 'bold';
select.appendChild(option);
});
// Set current value if a clone is active
if (currentCloneProfile) {
select.value = 'clone_' + currentCloneProfile.id;
}
}
// TTS Pitch Control
const ttsPitchControl = document.getElementById('ttsPitch');
const ttsPitchValue = document.getElementById('ttsPitchValue');
ttsPitchControl.addEventListener('input', function() {
const value = parseInt(this.value);
ttsPitchValue.textContent = value > 0 ? `+${value}` : value;
});
// TTS Speed Control
const ttsSpeedControl = document.getElementById('ttsSpeed');
const ttsSpeedValue = document.getElementById('ttsSpeedValue');
ttsSpeedControl.addEventListener('input', function() {
const value = parseFloat(this.value);
ttsSpeedValue.textContent = value.toFixed(1) + 'x';
});
// Generate TTS
document.getElementById('generateTtsBtn').addEventListener('click', function() {
const text = document.getElementById('ttsText').value.trim();
if (!text) {
showNotification('Please enter text to convert to speech');
return;
}
// Show result section
document.getElementById('ttsResult').style.display = 'block';
// Check if we should use a cloned voice
const voiceSelection = document.getElementById('ttsVoice').value;
if (voiceSelection.startsWith('clone_')) {
const cloneId = voiceSelection.replace('clone_', '');
const clone = cloneProfiles.find(c => c.id === cloneId);
if (clone) {
generateSpeechWithClonedVoice(text, clone);
return;
}
}
// Otherwise use standard TTS
generateSpeechWithAI(text);
});
// Generate speech with cloned voice
async function generateSpeechWithClonedVoice(text, clone) {
// Show generating state
document.getElementById('ttsStatus').textContent = 'Generating...';
try {
// If clone was created with ML and has embedding data
if (clone.useML && clone.voiceData && clone.voiceData.embedding) {
// Get TTS options
const pitch = parseInt(document.getElementById('ttsPitch').value);
const speed = parseFloat(document.getElementById('ttsSpeed').value);
const emotion = document.getElementById('ttsEmotion').value;
// Use TensorFlow.js for synthesis
const result = await tfVoiceCloner.generateSpeech(text, clone.voiceData, {
pitch: 1 + (pitch / 20), // Convert -20 to +20 range to a multiplier
rate: speed,
emotion: emotion
});
// Since we don't actually generate audio files in the demo,
// we'll create a speech synthesis utterance and capture its audio
// Create TTS history item with dummy audio URL
const newTts = {
id: 'tts_' + Date.now(),
text: text,
voice: 'clone_' + clone.id,
model: 'ml',
pitch: pitch,
speed: speed,
emotion: emotion,
audioUrl: null, // Would be set with real audio
created: new Date()
};
// Add to history
ttsHistory.unshift(newTts);
// Update history UI
updateTtsHistory();
// Update result
document.getElementById('ttsStatus').textContent = 'Complete (ML Voice)';
// Show success
showNotification('Speech generated with ML voice clone');
} else {
// Fallback to standard TTS with voice characteristics
generateSpeechWithAI(text, clone);
}
} catch (error) {
console.error("Error generating cloned speech:", error);
document.getElementById('ttsStatus').textContent = 'Error';
showNotification("Error generating speech: " + error.message);
}
}
// Generate speech with AI
function generateSpeechWithAI(text, clone = null) {
// For demo, we'll simulate AI TTS generation
const voice = document.getElementById('ttsVoice').value;
const pitch = parseInt(document.getElementById('ttsPitch').value);
const speed = parseFloat(document.getElementById('ttsSpeed').value);
const emotion = document.getElementById('ttsEmotion').value;
// Show generating state
document.getElementById('ttsStatus').textContent = 'Generating...';
// Simulate API call delay
setTimeout(() => {
// In a real implementation, this would call the Poe API
// For demo, we'll create an example audio file URL
// Create TTS history item
const newTts = {
id: 'tts_' + Date.now(),
text: text,
voice: voice,
pitch: pitch,
speed: speed,
emotion: emotion,
audioUrl: null, // Would be set from the API response
created: new Date()
};
// Add to history
ttsHistory.unshift(newTts);
// Update history UI
updateTtsHistory();
// Update result
document.getElementById('ttsStatus').textContent = 'Complete';
// Show success message
showNotification('Speech generated successfully');
// In a real implementation, we would set the audio source from the API response
// For demo, we'll use a test tone
createTestTTSAudio(pitch, speed, emotion);
}, 2000);
}
// Create test TTS audio
function createTestTTSAudio(pitch, speed, emotion) {
try {
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const oscillator = audioContext.createOscillator();
const gainNode = audioContext.createGain();
// Use parameters to affect the sound
const basePitch = 220; // A3
const pitchMultiplier = Math.pow(2, pitch / 12);
oscillator.frequency.value = basePitch * pitchMultiplier;
// Emotion affects the waveform type
switch (emotion) {
case 'happy':
oscillator.type = 'triangle';
break;
case 'sad':
oscillator.type = 'sine';
break;
case 'angry':
oscillator.type = 'square';
break;
case 'fearful':
oscillator.type = 'sawtooth';
break;
default:
oscillator.type = 'sine';
}
// Connect nodes
oscillator.connect(gainNode);
// Create offline context for rendering
const duration = 2 / speed; // Adjust duration based on speed
const offlineContext = new OfflineAudioContext(1, audioContext.sampleRate * duration, audioContext.sampleRate);
// Create nodes in offline context
const offlineOsc = offlineContext.createOscillator();
const offlineGain = offlineContext.createGain();
offlineOsc.frequency.value = oscillator.frequency.value;
offlineOsc.type = oscillator.type;
// Connect nodes
offlineOsc.connect(offlineGain);
offlineGain.connect(offlineContext.destination);
// Set envelope based on emotion
offlineGain.gain.value = 0;
// Attack
offlineGain.gain.linearRampToValueAtTime(0.5, 0.1);
// Different envelopes based on emotion
switch (emotion) {
case 'happy':
// Quick attack, sustained volume
offlineGain.gain.linearRampToValueAtTime(0.7, 0.2);
offlineGain.gain.linearRampToValueAtTime(0.6, duration - 0.3);
break;
case 'sad':
// Slow attack, low volume
offlineGain.gain.linearRampToValueAtTime(0.4, 0.3);
offlineGain.gain.linearRampToValueAtTime(0.3, duration - 0.5);
break;
case 'angry':
// Sharp attack, high volume
offlineGain.gain.linearRampToValueAtTime(0.8, 0.1);
offlineGain.gain.linearRampToValueAtTime(0.7, duration - 0.2);
break;
case 'fearful':
// Tremolo effect
for (let i = 0; i < duration * 5; i++) {
const time = 0.2 + (i * 0.2);
if (time < duration - 0.3) {
const value = 0.3 + (i % 2) * 0.1;
offlineGain.gain.linearRampToValueAtTime(value, time);
}
}
break;
default:
// Neutral envelope
offlineGain.gain.linearRampToValueAtTime(0.5, 0.2);
offlineGain.gain.linearRampToValueAtTime(0.4, duration - 0.4);
}
// Release
offlineGain.gain.linearRampToValueAtTime(0, duration);
// Start oscillator
offlineOsc.start();
offlineOsc.stop(duration);
// Render audio
offlineContext.startRendering().then(renderedBuffer => {
// Convert buffer to wav file
const audioBlob = bufferToWave(renderedBuffer, offlineContext.length);
const audioUrl = URL.createObjectURL(audioBlob);
// Set audio source
const audioElement = document.getElementById('ttsAudio');
audioElement.src = audioUrl;
// Save URL to history item
if (ttsHistory.length > 0) {
ttsHistory[0].audioUrl = audioUrl;
}
// Update history UI
updateTtsHistory();
// Clean up
audioContext.close();
}).catch(err => {
console.error('Error rendering audio:', err);
showNotification('Error generating audio');
});
} catch (error) {
console.error("Error creating TTS audio:", error);
showNotification("Error creating audio: " + error.message);
}
}
// Convert audio buffer to WAV
function bufferToWave(abuffer, len) {
const numOfChan = abuffer.numberOfChannels;
const length = len * numOfChan * 2 + 44;
const buffer = new ArrayBuffer(length);
const view = new DataView(buffer);
const channels = [];
let i, sample;
let offset = 0;
// Write WAVE header
setUint32(0x46464952); // "RIFF"
setUint32(length - 8); // file length - 8
setUint32(0x45564157); // "WAVE"
setUint32(0x20746d66); // "fmt " chunk
setUint32(16); // length = 16
setUint16(1); // PCM (uncompressed)
setUint16(numOfChan);
setUint32(abuffer.sampleRate);
setUint32(abuffer.sampleRate * 2 * numOfChan); // avg. bytes/sec
setUint16(numOfChan * 2); // block-align
setUint16(16); // 16-bit
setUint32(0x61746164); // "data" - chunk
setUint32(length - offset - 4); // chunk length
// Write interleaved data
for (i = 0; i < abuffer.numberOfChannels; i++) {
channels.push(abuffer.getChannelData(i));
}
for (i = 0; i < len; i++) {
for (let ch = 0; ch < numOfChan; ch++) {
sample = Math.max(-1, Math.min(1, channels[ch][i]));
sample = (0.5 + sample < 0 ? sample * 32768 : sample * 32767) | 0;
view.setInt16(offset, sample, true);
offset += 2;
}
}
function setUint16(data) {
view.setUint16(offset, data, true);
offset += 2;
}
function setUint32(data) {
view.setUint32(offset, data, true);
offset += 4;
}
return new Blob([buffer], { type: 'audio/wav' });
}
// Update TTS history
function updateTtsHistory() {
const container = document.getElementById('ttsHistoryList');
if (ttsHistory.length === 0) {
container.innerHTML = `
<div style="color: var(--color-text-secondary); text-align: center; padding: 1rem;">
No history yet. Generated speech will appear here.
</div>
`;
return;
}
container.innerHTML = '';
ttsHistory.forEach(item => {
const historyItem = document.createElement('div');
historyItem.className = 'card';
historyItem.style.marginBottom = '0.75rem';
historyItem.innerHTML = `
<div class="card-header" style="padding: 0.5rem 1rem;">
<div style="display: flex; justify-content: space-between; align-items: center;">
<h3 style="margin: 0; font-size: 0.875rem; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; max-width: 200px;">
${item.text.substring(0, 30)}${item.text.length > 30 ? '...' : ''}
</h3>
<span style="font-size: 0.75rem; color: var(--color-text-secondary);">
${formatTimestamp(item.created)}
</span>
</div>
</div>
<div class="card-body" style="padding: 0.5rem 1rem;">
<audio controls style="width: 100%; margin-bottom: 0.5rem;" src="${item.audioUrl || ''}"></audio>
<div style="display: flex; justify-content: space-between; align-items: center;">
<div style="font-size: 0.75rem; color: var(--color-text-secondary);">
Voice: ${getVoiceName(item.voice)} • ${item.emotion.charAt(0).toUpperCase() + item.emotion.slice(1)}
</div>
<button class="btn btn-secondary btn-sm download-tts-btn" data-id="${item.id}">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
<polyline points="7 10 12 15 17 10"/>
<line x1="12" y1="15" x2="12" y2="3"/>
</svg>
Download
</button>
</div>
</div>
`;
container.appendChild(historyItem);
});
// Add download event listeners
document.querySelectorAll('.download-tts-btn').forEach(btn => {
btn.addEventListener('click', function() {
const ttsId = this.dataset.id;
const tts = ttsHistory.find(t => t.id === ttsId);
if (tts && tts.audioUrl) {
downloadTts(tts);
}
});
});
}
// Get voice name from ID
function getVoiceName(voiceId) {
if (voiceId === 'standard') {
return 'Standard TTS';
}
if (voiceId.startsWith('clone_')) {
const cloneId = voiceId.replace('clone_', '');
const clone = cloneProfiles.find(c => c.id === cloneId);
return clone ? clone.name : 'Unknown Clone';
}
return 'Unknown Voice';
}
// Format timestamp
function formatTimestamp(date) {
return new Date(date).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
}
// Download TTS
function downloadTts(tts) {
if (!tts.audioUrl) return;
const a = document.createElement('a');
a.href = tts.audioUrl;
a.download = `tts_${tts.text.substring(0, 10).replace(/\s+/g, '_')}.wav`;
a.click();
}
// Cancel TTS
document.getElementById('cancelTtsBtn').addEventListener('click', function() {
// Clear text and hide result
document.getElementById('ttsText').value = '';
document.getElementById('ttsResult').style.display = 'none';
});
// Download TTS from result panel
document.getElementById('downloadTtsBtn').addEventListener('click', function() {
if (ttsHistory.length > 0) {
downloadTts(ttsHistory[0]);
}
});
// =======================================
// Push-to-Talk Functionality
// =======================================
// PTT key overlay
const pttKeyOverlay = document.getElementById('pttKeyOverlay');
const keyDisplayBox = document.getElementById('keyDisplayBox');
const pttKeyDisplay = document.getElementById('pttKeyDisplay');
// Open PTT key settings
document.getElementById('changePttKeyBtn').addEventListener('click', () => {
pttKeyOverlay.style.display = 'flex';
keyDisplayBox.textContent = 'Press a key...';
});
// Close PTT key settings
document.getElementById('cancelPttKeyBtn').addEventListener('click', () => {
pttKeyOverlay.style.display = 'none';
});
// Reset to space key
document.getElementById('resetPttKeyBtn').addEventListener('click', () => {
pttKeyCode = 'Space';
pttKeyDisplay.textContent = 'SPACE';
keyDisplayBox.textContent = 'SPACE';
pttKeyOverlay.style.display = 'none';
localStorage.setItem('ghostvoice-ptt-key', pttKeyCode);
});
// Save PTT key
document.getElementById('savePttKeyBtn').addEventListener('click', () => {
pttKeyDisplay.textContent = keyDisplayBox.textContent;
pttKeyOverlay.style.display = 'none';
localStorage.setItem('ghostvoice-ptt-key', pttKeyCode);
});
// Detect key press
document.addEventListener('keydown', function(e) {
// Detect key for PTT settings
if (pttKeyOverlay.style.display === 'flex') {
e.preventDefault();
// Get key name
pttKeyCode = e.code;
keyDisplayBox.textContent = e.code.replace('Key', '');
return;
}
// Voice activation test overlay
if (document.getElementById('voiceTestOverlay').style.display === 'flex') {
return;
}
// Use as PTT
if (e.code === pttKeyCode && !isPttActive && isProcessing) {
e.preventDefault();
activatePtt();
}
});
document.addEventListener('keyup', function(e) {
if (e.code === pttKeyCode && isPttActive) {
e.preventDefault();
deactivatePtt();
}
});
// Floating PTT button
document.getElementById('floatingPttBtn').addEventListener('mousedown', function() {
if (isProcessing) {
activatePtt();
}
});
document.getElementById('floatingPttBtn').addEventListener('mouseup', function() {
if (isPttActive) {
deactivatePtt();
}
});
document.getElementById('floatingPttBtn').addEventListener('mouseleave', function() {
if (isPttActive) {
deactivatePtt();
}
});
// Hide floating controls
document.getElementById('hideFloatingBtn').addEventListener('click', function() {
document.getElementById('floatingControls').style.display = 'none';
});
// Activate PTT
function activatePtt() {
isPttActive = true;
document.getElementById('floatingPttBtn').textContent = 'Talking...';
document.getElementById('floatingPttBtn').style.backgroundColor = 'var(--color-success)';
if (peer && currentCall) {
// If we're in a call, send the processed audio
updateCallStream();
}
}
// Deactivate PTT
function deactivatePtt() {
isPttActive = false;
document.getElementById('floatingPttBtn').innerHTML = `
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" style="margin-right: 0.5rem;">
<path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z"/>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"/>
<line x1="12" y1="19" x2="12" y2="22"/>
</svg>
Push to Talk
`;
document.getElementById('floatingPttBtn').style.backgroundColor = 'var(--color-primary)';
if (peer && currentCall) {
// If we're in a call, stop sending audio
updateCallStream(true);
}
}
// =======================================
// Call Functionality with WebRTC
// =======================================
let peer = null;
let currentCall = null;
let localStream = null;
let isMicMuted = false;
let isAudioMuted = false;
const participants = new Map();
// Initialize PeerJS
function initializePeer() {
if (peer) return peer;
return new Promise((resolve, reject) => {
try {
const callErrorMessage = document.getElementById('callErrorMessage');
callErrorMessage.style.display = 'none';
// Use a placeholder instead of creating a real PeerJS instance
// This is a simulation for the demo
peer = {
id: 'user_' + Math.random().toString(36).substring(2, 10),
on: function(event, callback) {
if (event === 'open') {
setTimeout(() => callback(this.id), 500);
}
},
disconnect: function() {},
destroy: function() {},
call: function(peerId, stream, options) {
const call = {
peer: peerId,
metadata: options.metadata,
peerConnection: {
getSenders: function() {
return [{ track: { kind: 'audio' }, replaceTrack: function() {} }];
}
},
answer: function() {},
on: function(event, callback) {},
close: function() {}
};
return call;
}
};
// Simulate connection after a delay
setTimeout(() => {
resolve(peer);
}, 500);
} catch (error) {
console.error("Error initializing peer:", error);
callErrorMessage.textContent = "Failed to initialize connection: " + error.message;
callErrorMessage.style.display = 'block';
reject(error);
}
});
}
// Update stream used in the call
function updateCallStream(muted = false) {
if (!currentCall || !isProcessing) return;
try {
// If PTT is not active or explicitly muted, send empty audio
if (muted || !isPttActive) {
const emptyStream = new MediaStream();
const emptyTracks = emptyStream.getAudioTracks();
// If the empty stream has no tracks, create a silent audio track
if (emptyTracks.length === 0) {
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const dest = audioContext.createMediaStreamDestination();
emptyStream.addTrack(dest.stream.getAudioTracks()[0]);
}
// Replace the track in all senders
currentCall.peerConnection.getSenders().forEach(sender => {
if (sender.track && sender.track.kind === 'audio') {
sender.replaceTrack(emptyStream.getAudioTracks()[0]);
}
});
} else {
// Send the processed audio
if (outputNode && outputNode.stream) {
const audioTrack = outputNode.stream.getAudioTracks()[0];
if (audioTrack) {
currentCall.peerConnection.getSenders().forEach(sender => {
if (sender.track && sender.track.kind === 'audio') {
sender.replaceTrack(audioTrack);
}
});
}
}
}
} catch (error) {
console.error("Error updating call stream:", error);
}
}
// Handle incoming call
async function handleIncomingCall(call) {
try {
// Save the current call reference
currentCall = call;
// Answer with our stream (or empty stream if not processing)
if (isProcessing && outputNode && outputNode.stream) {
call.answer(outputNode.stream);
} else {
// Answer with empty stream if not processing
const emptyStream = new MediaStream();
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const dest = audioContext.createMediaStreamDestination();
emptyStream.addTrack(dest.stream.getAudioTracks()[0]);
call.answer(emptyStream);
}
call.on('stream', (remoteStream) => {
// Add the remote participant
if (!participants.has(call.peer)) {
const displayName = call.metadata?.displayName || 'Guest';
addParticipant(call.peer, displayName);
// Create audio element for this participant
const audio = document.createElement('audio');
audio.srcObject = remoteStream;
audio.autoplay = true;
audio.id = 'audio-' + call.peer;
audio.className = 'participant-audio';
// Apply current output device if supported
if (audio.setSinkId && outputDeviceSelect.value !== 'default') {
audio.setSinkId(outputDeviceSelect.value).catch(err => {
console.error("Error setting audio output device:", err);
});
}
// Append to document
document.body.appendChild(audio);
// Store audio element with participant
participants.get(call.peer).audio = audio;
// Update participant mute state based on global mute
audio.muted = isAudioMuted;
showNotification(`${displayName} joined the call`);
}
});
call.on('close', () => {
removeParticipant(call.peer);
});
call.on('error', (err) => {
console.error("Call error:", err);
showNotification("Call error: " + err);
});
} catch (error) {
console.error("Error handling incoming call:", error);
showNotification("Error handling call: " + error.message);
}
}
// Generate a random room ID
document.getElementById('generateRoomBtn').addEventListener('click', () => {
const roomId = Math.random().toString(36).substring(2, 10);
document.getElementById('roomId').value = roomId;
});
// Join call button
document.getElementById('joinCallBtn').addEventListener('click', async () => {
const roomId = document.getElementById('roomId').value.trim();
const displayName = document.getElementById('displayName').value.trim();
if (!roomId) {
showNotification("Please enter a room ID");
return;
}
if (!displayName) {
showNotification("Please enter your display name");
return;
}
// Make sure voice processing is active
if (!isProcessing) {
showNotification("Please start voice processing first");
voiceTabBtn.click(); // Switch to voice tab
return;
}
try {
// Update call status
document.getElementById('callStatusIndicator').className = 'status-indicator status-connecting';
document.getElementById('callStatusText').textContent = 'Connecting...';
// Initialize PeerJS
await initializePeer();
// Connect to all peers in the room (simplified for the demo)
// In a production app, you'd use a signaling server
// For the demo, let's directly connect to the room ID as a peer ID
// This means only two people can join a room in this simplified version
if (roomId !== peer.id) {
try {
// Call the peer with our processed audio
currentCall = peer.call(roomId, outputNode.stream, {
metadata: { displayName, roomId }
});
// Handle the call
handleIncomingCall(currentCall);
} catch (error) {
console.error(`Error connecting to peer ${roomId}:`, error);
}
}
// Update UI
document.getElementById('joinCallBtn').disabled = true;
document.getElementById('leaveCallBtn').disabled = false;
document.getElementById('callStatusIndicator').className = 'status-indicator status-connected';
document.getElementById('callStatusText').textContent = 'Connected to room';
// Add ourselves to participants
addParticipant(peer.id, displayName + ' (You)');
showNotification(`Joined room: ${roomId}`);
} catch (error) {
console.error("Error joining call:", error);
showNotification("Error joining call: " + error.message);
document.getElementById('callStatusIndicator').className = 'status-indicator status-disconnected';
document.getElementById('callStatusText').textContent = 'Failed to connect';
}
});
// Voice activation test
const voiceTestOverlay = document.getElementById('voiceTestOverlay');
const thresholdTestSlider = document.getElementById('thresholdTestSlider');
const currentLevelValue = document.getElementById('currentLevelValue');
const thresholdLevelValue = document.getElementById('thresholdLevelValue');
const voiceTestIndicator = document.getElementById('voiceTestIndicator');
const voiceTestStatus = document.getElementById('voiceTestStatus');
// Test audio for voice activation
let testAnimationId = null;
// Leave call button
document.getElementById('leaveCallBtn').addEventListener('click', () => {
if (currentCall) {
currentCall.close();
currentCall = null;
}
// Close all connections
if (peer) {
peer.disconnect();
peer.destroy();
peer = null;
}
// Remove all participants
participants.forEach((participant, id) => {
removeParticipant(id);
});
// Update UI
document.getElementById('joinCallBtn').disabled = false;
document.getElementById('leaveCallBtn').disabled = true;
document.getElementById('callStatusIndicator').className = 'status-indicator status-disconnected';
document.getElementById('callStatusText').textContent = 'Disconnected';
showNotification("Left the call");
});
// Mute mic button
document.getElementById('toggleMicBtn').addEventListener('click', () => {
isMicMuted = !isMicMuted;
if (isMicMuted) {
document.getElementById('toggleMicBtn').innerHTML = `
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<line x1="1" y1="1" x2="23" y2="23"></line>
<path d="M9 9v3a3 3 0 0 0 5.12 2.12M15 9.34V5a3 3 0 0 0-5.94-.6"></path>
<path d="M17 16.95A7 7 0 0 1 5 12v-2m14 0v2a7 7 0 0 1-.11 1.23"></path>
<line x1="12" y1="19" x2="12" y2="23"></line>
<line x1="8" y1="23" x2="16" y2="23"></line>
</svg>
Unmute Mic
`;
updateCallStream(true);
} else {
document.getElementById('toggleMicBtn').innerHTML = `
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z"/>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"/>
<line x1="12" y1="19" x2="12" y2="22"/>
</svg>
Mute Mic
`;
updateCallStream(false);
}
});
// Mute audio button
document.getElementById('toggleAudioBtn').addEventListener('click', () => {
isAudioMuted = !isAudioMuted;
// Mute all participant audio elements
participants.forEach((participant) => {
if (participant.audio) {
participant.audio.muted = isAudioMuted;
}
});
if (isAudioMuted) {
document.getElementById('toggleAudioBtn').innerHTML = `
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"></polygon>
<line x1="23" y1="9" x2="17" y2="15"></line>
<line x1="17" y1="9" x2="23" y2="15"></line>
</svg>
Unmute Audio
`;
} else {
document.getElementById('toggleAudioBtn').innerHTML = `
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M3 18v-6a9 9 0 0 1 18 0v6"></path>
<path d="M21 19a2 2 0 0 1-2 2h-1a2 2 0 0 1-2-2v-3a2 2 0 0 1 2-2h3zM3 19a2 2 0 0 0 2 2h1a2 2 0 0 0 2-2v-3a2 2 0 0 0-2-2H3z"></path>
</svg>
Mute Audio
`;
}
});
// Change voice in call
document.getElementById('activeVoice').addEventListener('change', function() {
const value = this.value;
if (value === 'standard') {
// Reset to standard voice
currentCloneProfile = null;
currentVoiceProfile = 'standard';
// Apply default settings
settings.pitchShift = 0;
settings.formantShift = 0;
settings.genderMorph = 50;
settings.effect = 'none';
settings.effectMix = 0;
} else if (value.startsWith('clone_')) {
// Activate clone voice
const cloneId = value.replace('clone_', '');
const clone = cloneProfiles.find(c => c.id === cloneId);
if (clone) {
activateCloneProfile(clone);
}
} else {
// Activate voice profile
activateVoiceProfile(value);
}
showNotification(`Voice changed for the call`);
});
// Manage participants
function addParticipant(id, displayName) {
// Create participant object
participants.set(id, { id, displayName });
// Update UI
updateParticipantsList();
}
function removeParticipant(id) {
// Remove audio element if exists
const participant = participants.get(id);
if (participant && participant.audio) {
participant.audio.srcObject = null;
participant.audio.remove();
}
// Remove from map
participants.delete(id);
// Update UI
updateParticipantsList();
}
function updateParticipantsList() {
const list = document.getElementById('participantsList');
if (participants.size === 0) {
list.innerHTML = `
<div style="color: var(--color-text-secondary); text-align: center; padding: 1rem 0;">
No participants yet
</div>
`;
return;
}
list.innerHTML = '';
participants.forEach((participant) => {
const item = document.createElement('div');
item.style.padding = '0.5rem';
item.style.marginBottom = '0.25rem';
item.style.borderRadius = 'var(--border-radius)';
item.style.backgroundColor = 'var(--color-foreground)';
item.style.display = 'flex';
item.style.alignItems = 'center';
const isYou = participant.displayName.includes('(You)');
item.innerHTML = `
<div style="width: 2rem; height: 2rem; border-radius: 50%; background-color: ${isYou ? 'var(--color-primary)' : 'var(--color-surface-hover)'}; margin-right: 0.5rem; display: flex; align-items: center; justify-content: center; color: white;">
${participant.displayName.charAt(0)}
</div>
<div>
<div style="font-size: 0.875rem;">${participant.displayName}</div>
</div>
`;
list.appendChild(item);
});
}
// =======================================
// Notifications
// =======================================
function showNotification(message, duration = 3000) {
const notification = document.getElementById('notification');
notification.textContent = message;
notification.classList.add('show');
setTimeout(() => {
notification.classList.remove('show');
}, duration);
}
// =======================================
// Initialization
// =======================================
document.addEventListener('DOMContentLoaded', async () => {
// Check for Web Audio API support
if (!checkAudioSupport()) {
showNotification("Your browser doesn't support required audio features");
return;
}
// Load saved PTT key
const savedPttKey = localStorage.getItem('ghostvoice-ptt-key');
if (savedPttKey) {
pttKeyCode = savedPttKey;
document.getElementById('pttKeyDisplay').textContent = savedPttKey.replace('Key', '');
}
// Generate a random room ID
document.getElementById('generateRoomBtn').click();
// Enumerate audio devices
await enumerateDevices();
// Create example voice profiles
createExampleProfiles();
// Update UI
updateVoiceProfilesList();
updateCloneProfilesList();
// Try to load TensorFlow models in the background
setTimeout(() => {
loadVoiceModels().then(loaded => {
if (loaded) {
console.log("Voice models preloaded successfully");
}
});
}, 3000); // Wait 3 seconds after page load to start preloading
// Add event listeners for the quality selector
document.querySelectorAll('.quality-selector .preset-btn').forEach(btn => {
btn.addEventListener('click', function() {
// Remove active class from all
document.querySelectorAll('.quality-selector .preset-btn').forEach(el => {
el.classList.remove('active');
});
// Add active class to clicked
this.classList.add('active');
// Check the radio button
const radio = this.querySelector('input[type="radio"]');
radio.checked = true;
});
});
// Show a welcome message
showNotification('Welcome to GhostVoice Studio');
});
// Create example profiles for demonstration
function createExampleProfiles() {
// Example voice profile
if (voiceProfiles.length === 0) {
voiceProfiles.push({
id: 'profile_example',
name: 'Deep Villain',
description: 'Deep menacing voice with reverb',
settings: {
pitchShift: -5,
formantShift: -4,
genderMorph: 30,
effect: 'reverb',
effectMix: 40
},
type: 'manual',
created: new Date()
});
}
// Example clone profile
if (cloneProfiles.length === 0) {
cloneProfiles.push({
id: 'clone_example',
name: 'Morgan Narrator',
samples: 3,
settings: {
similarity: 75,
stability: 60,
style: 40,
emotionStrength: 70
},
useML: false,
voiceData: {
fundamentalFreq: 120,
formants: [500, 1500, 2500],
spectralCentroid: 1800
},
created: new Date()
});
}
}
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment