Lachee · May 9, 2023 06:03
diff --git a/OpenTTS.html b/OpenTTS.html


 <html>
    <head>
        <style>
            * { font-family: Arial, Helvetica, sans-serif;}
            div { 
                background: #00000008; 
                margin: 10px; 
                min-height: 1em; 
                padding: 10px;
                border-radius: 5px;
            }

        </style>
    </head>
    <body>
        
        OpenTTS URL
        <input type="url" id="api" name="api" value="http://localhost:5500/">
            
        Use Conqui (slow):
        <input type="checkbox" id="coqui" checked="true" />
        <button id="play">Begin</button><br>
        <hr>

        Status: <div id="status"></div>
        <hr>

        Recognised: <div id="text"></div>
        Queue: <ul id="queue"></ul>
        Speech: <div id="speech"></div>
        <audio id="audio" controls="" autoplay="" src="blob:http://localhost:5500/cd4f8b80-c654-4bfa-b233-7728b718cd8e"></audio>
    </body>
 <script>
    // STT: https://www.google.com/intl/en/chrome/demos/speech.html
    // TTS: https://github.com/mdn/dom-examples/tree/main/web-speech-api/speak-easy-synthesis

    const lang = 'en-AU';
    const voice = 'larynx:southern_english_female-glow_tts'; // 'Google UK English Female';
    const coquiVoice    = 'coqui-tts:en_vctk';
    const coquiSpeaker  = 'p259';

    const playButton = document.getElementById('play');
    const resultBox = document.getElementById('text');
    const recognitionStatusBox = document.getElementById('status');
    const speechBox = document.getElementById('speech');
    const queueList = document.getElementById('queue');
    const coquiCheckbox = document.getElementById('coqui');
    const audio = document.getElementById('audio');

    const apiInput = document.getElementById('api');
    
    let isSpeaking = false;
    let isListening = false;
    let recognitionResults = '';
    let recognition = new webkitSpeechRecognition();
    let synthDownloadQueue = [];
    let synthSpeakQueue = [];

    recognition.lang = lang;
    recognition.continuous = true;
    recognition.interimResults = true;
    recognition.maxAlternatives = 1;

    recognition.onstart = function() {
        recognitionStatusBox.innerText = 'Recognition started. Waiting...';
    };

    recognition.onerror = function(event) {
        if (event.error == 'no-speech') {
            recognitionStatusBox.innerText = 'No speech available';
        }
        if (event.error == 'audio-capture') {
            recognitionStatusBox.innerText = 'No microphone available';
        }
        if (event.error == 'not-allowed') {
            if (event.timeStamp - start_timestamp < 100) {
                recognitionStatusBox.innerText = 'Web Speech API Blocked';
            } else {
                recognitionStatusBox.innerText = 'Web Speech API Denied';
            }
        }
    };
    
    recognition.onend = function(e) {
        recognitionStatusBox.innerText = 'Recognition ended.';
        if (isListening) {
            console.warn('recognition has ended early', e);            
            recognition.start();
        }
    };

    
    recognition.onresult = function(event) {
        let interim = '';

        for (var i = event.resultIndex; i < event.results.length; ++i) {
            if (event.results[i].isFinal) {
                recognitionResults = event.results[i][0].transcript;
                console.log('speaking', event.results[i]);
                if (event.results[i].length > 1) alert('DEBUG: ANOTHER RESULT');
                speak(recognitionResults);
            } else {
                interim += event.results[i][0].transcript;
            }
        }

        resultBox.innerHTML = linebreak(recognitionResults);
        recognitionStatusBox.innerHTML = linebreak(interim);
    }

    function stopListening() {
        console.log('stopped listening');
        isListening = false;
        recognition.stop();
        playButton.innerText = 'Begin';
    }

    function startListen() {
        if (isListening) 
            stopListening();

        isListening = true;
        recognition.start();
        resultBox.innerHTML = '';
        playButton.innerText = 'End';
    }

    function speak(words) {
        if (words === "") 
            return false;

        /** Downloads the synth for the words and returns a URL for the blob */
        const download = async (words, attempts = 3) => {
            try {
                const url = new URL('/api/tts', apiInput.value);
                if (coquiCheckbox.checked) {
                    url.searchParams.append('voice', coquiVoice);
                    url.searchParams.append('speakerId', coquiSpeaker);
                } else {
                    url.searchParams.append('voice', voice);
                }

                url.searchParams.append('lang', 'en');
                url.searchParams.append('text', words);
                url.searchParams.append('vocoder', 'medium');      // quality
                url.searchParams.append('ssml', false);  // SSML support
                console.log('requesting ', url.toString());

                const response = await fetch(url);
                if (!response.ok) {
                    console.error('failed to synth', response);
                    return;
                } 
                
                const blob = await response.blob();
                return URL.createObjectURL(blob);
            }catch(e) {
                if (attempts <= 0) {
                    console.error('DOWNLOAD ABORTED', e);
                } else {
                    console.warn('failed to download clip, trying again in some time', e);
                    return new Promise((resolve) => {
                        setTimeout(() => download(words, attempts - 1).then(r => resolve(r)), 250);
                    });
                }
            }
        }

        synthDownloadQueue.push({ words, synth: download(words) });
        updateDownloadQueue();

        if (!isSpeaking) {
            startSpeaking();
        }
    }
    async function startSpeaking() {
        const play = (src) => new Promise((resolve, reject) => {
            audio.src = src;
            audio.play();
            audio.onended = resolve;
        });

        isSpeaking = true;
        while(synthDownloadQueue.length > 0) {
            const download = synthDownloadQueue.shift();
            updateDownloadQueue();
            speechBox.innerText = download.words + "... (downloading)";
            const url = await download.synth;
            
            speechBox.innerText = download.words;
            console.log('playing', url);
            await play(url);
            
            speechBox.innerText = '';
        }
        isSpeaking = false;
    }

    function updateDownloadQueue() {
        let html = '';
        for(const download of synthDownloadQueue) 
            html = `<li><div>${download.words}</div></li>${html}`
        queueList.innerHTML = html;
    }

    playButton.addEventListener('click', () => {
        if (isListening) {
            stopListening();
        } else {
            startListen();
        }
    });


    function capitalize(s) {
        const first_char = /\S/;
        return s.replace(first_char, function(m) { return m.toUpperCase(); });
    }

    function linebreak(s) {
        const two_line = /\n\n/g;
        const one_line = /\n/g;
        return s.replace(two_line, '<p></p>').replace(one_line, '<br>');
    }

    document.addEventListener('DOMContentLoaded', () => {
        //startListen();
    })
 </script>
 </html>
diff --git a/WebSpeech.html b/WebSpeech.html


 <html>
    <head>
        <style>
            * { font-family: Arial, Helvetica, sans-serif;}
            div { 
                background: #00000008; 
                margin: 10px; 
                min-height: 1em; 
                padding: 10px;
                border-radius: 5px;
            }

        </style>
    </head>
    <body>
                
        <div>
            <input type="range" id="pitch" name="pitch" min="0" max="2" value="1" step="0.1">
            <label for="volume">Pitch</label>
        </div>
        
        <div>
            <input type="range" id="rate" name="rate" min="0" max="2" value="1" step="0.1">
            <label for="cowbell">Rate</label>
        </div>        
        <button id="play">Begin</button><br>
        <hr>

        Status: <div id="status"></div>
        <hr>

        Recognised: <div id="text"></div>
        Queue: <ul id="queue"></ul>
        Speech: <div id="speech"></div>
    </body>
 <script>
    // STT: https://www.google.com/intl/en/chrome/demos/speech.html
    // TTS: https://github.com/mdn/dom-examples/tree/main/web-speech-api/speak-easy-synthesis

    const lang = 'en-US';
    const voice = 'Microsoft Catherine - English (Australia)'; // 'Google UK English Female';
    const synth = window.speechSynthesis;

    const playButton = document.getElementById('play');
    const resultBox = document.getElementById('text');
    const recognitionStatusBox = document.getElementById('status');
    const speechBox = document.getElementById('speech');
    const queueList = document.getElementById('queue');

    const pitchRange = document.getElementById('pitch');
    const rateRange = document.getElementById('rate');
    

    let voices = [];
    function populateVoices() {
        voices = synth.getVoices().sort(function (a, b) {
            const aname = a.name.toUpperCase();
            const bname = b.name.toUpperCase();
            if (aname < bname) {
                return -1;
            } else if (aname == bname) {
                return 0;
            } else {
                return +1;
            }
        });
        console.log('voices', voices);
    }
    synth.addEventListener('voiceschanged', () => populateVoices());

    let isSynthing = false;
    let isListening = false;
    let recognitionResults = '';
    let recognition = new webkitSpeechRecognition();
    let synthQueue = [];

    recognition.lang = lang;
    recognition.continuous = true;
    recognition.interimResults = true;
    recognition.maxAlternatives = 1;

    recognition.onstart = function() {
        recognitionStatusBox.innerText = 'Recognition started. Waiting...';
    };

    recognition.onerror = function(event) {
        if (event.error == 'no-speech') {
            recognitionStatusBox.innerText = 'No speech available';
        }
        if (event.error == 'audio-capture') {
            recognitionStatusBox.innerText = 'No microphone available';
        }
        if (event.error == 'not-allowed') {
            if (event.timeStamp - start_timestamp < 100) {
                recognitionStatusBox.innerText = 'Web Speech API Blocked';
            } else {
                recognitionStatusBox.innerText = 'Web Speech API Denied';
            }
        }
    };
    
    recognition.onend = function(e) {
        recognitionStatusBox.innerText = 'Recognition ended.';
        if (isListening) {
            console.warn('recognition has ended early', e);            
            recognition.start();
        }
    };

    
    recognition.onresult = function(event) {
        let interim = '';

        for (var i = event.resultIndex; i < event.results.length; ++i) {
            if (event.results[i].isFinal) {
                recognitionResults = event.results[i][0].transcript;
                console.log('speaking', event.results[i]);
                if (event.results[i].length > 1) alert('DEBUG: ANOTHER RESULT');
                speak(recognitionResults);
            } else {
                interim += event.results[i][0].transcript;
            }
        }

        resultBox.innerHTML = linebreak(recognitionResults);
        recognitionStatusBox.innerHTML = linebreak(interim);
    }

    function stopListening() {
        console.log('stopped listening');
        isListening = false;
        recognition.stop();
        playButton.innerText = 'Begin';
    }

    function startListen() {
        if (isListening) 
            stopListening();

        isListening = true;
        recognition.start();
        resultBox.innerHTML = '';
        playButton.innerText = 'End';
    }


    function speak(words) {

        if (words === "") 
            return false;

        synthQueue.push(words);
        updateQueue();

        if (!isSynthing) {
            startSynthQueue();
        }
    }
    async function startSynthQueue() {
        const say = (words) => new Promise((resolve, reject) => {
            const utterThis = new SpeechSynthesisUtterance(words);

            utterThis.onend = function (event) {
                console.log("SpeechSynthesisUtterance.onend");
                recognitionStatusBox.innerText = "Synth Ended";
                resolve(words);
            };

            utterThis.onerror = function (event) {
                console.error("SpeechSynthesisUtterance.onerror", event);
                recognitionStatusBox.innerText = "Synth Errored";
                reject(event);
            };

            if (voices.length == 0) 
                populateVoices();

            for (let i = 0; i < voices.length; i++) {
                if (voices[i].name === voice) {
                    utterThis.voice = voices[i];
                    break;
                }
            }

            utterThis.pitch = pitchRange.value;
            utterThis.rate = rateRange.value;
            synth.speak(utterThis);
            speechBox.innerText = words;
        });

        isSynthing = true;
        while(synthQueue.length > 0) {
            const words = synthQueue.shift();
            updateQueue();
            await say(words);
        }
        isSynthing = false;
    }

    function updateQueue() {
        let html = '';
        for(const w of synthQueue) 
            html = `<li><div>${w}</div></li>${html}`
        queueList.innerHTML = html;
    }

    playButton.addEventListener('click', () => {
        if (isListening) {
            stopListening();
        } else {
            startListen();
        }
    });


    function capitalize(s) {
        const first_char = /\S/;
        return s.replace(first_char, function(m) { return m.toUpperCase(); });
    }

    function linebreak(s) {
        const two_line = /\n\n/g;
        const one_line = /\n/g;
        return s.replace(two_line, '<p></p>').replace(one_line, '<br>');
    }

    document.addEventListener('DOMContentLoaded', () => {
        //startListen();
    })
 </script>
 </html>


	<html>
	<head>
	<style>
	* { font-family: Arial, Helvetica, sans-serif;}
	div {
	background: #00000008;
	margin: 10px;
	min-height: 1em;
	padding: 10px;
	border-radius: 5px;
	}

	</style>
	</head>
	<body>

	OpenTTS URL
	<input type="url" id="api" name="api" value="http://localhost:5500/">

	Use Conqui (slow):
	<input type="checkbox" id="coqui" checked="true" />
	<button id="play">Begin</button><br>
	<hr>

	Status: <div id="status"></div>
	<hr>

	Recognised: <div id="text"></div>
	Queue: <ul id="queue"></ul>
	Speech: <div id="speech"></div>
	<audio id="audio" controls="" autoplay="" src="blob:http://localhost:5500/cd4f8b80-c654-4bfa-b233-7728b718cd8e"></audio>
	</body>
	<script>
	// STT: https://www.google.com/intl/en/chrome/demos/speech.html
	// TTS: https://github.com/mdn/dom-examples/tree/main/web-speech-api/speak-easy-synthesis

	const lang = 'en-AU';
	const voice = 'larynx:southern_english_female-glow_tts'; // 'Google UK English Female';
	const coquiVoice = 'coqui-tts:en_vctk';
	const coquiSpeaker = 'p259';

	const playButton = document.getElementById('play');
	const resultBox = document.getElementById('text');
	const recognitionStatusBox = document.getElementById('status');
	const speechBox = document.getElementById('speech');
	const queueList = document.getElementById('queue');
	const coquiCheckbox = document.getElementById('coqui');
	const audio = document.getElementById('audio');

	const apiInput = document.getElementById('api');

	let isSpeaking = false;
	let isListening = false;
	let recognitionResults = '';
	let recognition = new webkitSpeechRecognition();
	let synthDownloadQueue = [];
	let synthSpeakQueue = [];

	recognition.lang = lang;
	recognition.continuous = true;
	recognition.interimResults = true;
	recognition.maxAlternatives = 1;

	recognition.onstart = function() {
	recognitionStatusBox.innerText = 'Recognition started. Waiting...';
	};

	recognition.onerror = function(event) {
	if (event.error == 'no-speech') {
	recognitionStatusBox.innerText = 'No speech available';
	}
	if (event.error == 'audio-capture') {
	recognitionStatusBox.innerText = 'No microphone available';
	}
	if (event.error == 'not-allowed') {
	if (event.timeStamp - start_timestamp < 100) {
	recognitionStatusBox.innerText = 'Web Speech API Blocked';
	} else {
	recognitionStatusBox.innerText = 'Web Speech API Denied';
	}
	}
	};

	recognition.onend = function(e) {
	recognitionStatusBox.innerText = 'Recognition ended.';
	if (isListening) {
	console.warn('recognition has ended early', e);
	recognition.start();
	}
	};


	recognition.onresult = function(event) {
	let interim = '';

	for (var i = event.resultIndex; i < event.results.length; ++i) {
	if (event.results[i].isFinal) {
	recognitionResults = event.results[i][0].transcript;
	console.log('speaking', event.results[i]);
	if (event.results[i].length > 1) alert('DEBUG: ANOTHER RESULT');
	speak(recognitionResults);
	} else {
	interim += event.results[i][0].transcript;
	}
	}

	resultBox.innerHTML = linebreak(recognitionResults);
	recognitionStatusBox.innerHTML = linebreak(interim);
	}

	function stopListening() {
	console.log('stopped listening');
	isListening = false;
	recognition.stop();
	playButton.innerText = 'Begin';
	}

	function startListen() {
	if (isListening)
	stopListening();

	isListening = true;
	recognition.start();
	resultBox.innerHTML = '';
	playButton.innerText = 'End';
	}

	function speak(words) {
	if (words === "")
	return false;

	/** Downloads the synth for the words and returns a URL for the blob */
	const download = async (words, attempts = 3) => {
	try {
	const url = new URL('/api/tts', apiInput.value);
	if (coquiCheckbox.checked) {
	url.searchParams.append('voice', coquiVoice);
	url.searchParams.append('speakerId', coquiSpeaker);
	} else {
	url.searchParams.append('voice', voice);
	}

	url.searchParams.append('lang', 'en');
	url.searchParams.append('text', words);
	url.searchParams.append('vocoder', 'medium'); // quality
	url.searchParams.append('ssml', false); // SSML support
	console.log('requesting ', url.toString());

	const response = await fetch(url);
	if (!response.ok) {
	console.error('failed to synth', response);
	return;
	}

	const blob = await response.blob();
	return URL.createObjectURL(blob);
	}catch(e) {
	if (attempts <= 0) {
	console.error('DOWNLOAD ABORTED', e);
	} else {
	console.warn('failed to download clip, trying again in some time', e);
	return new Promise((resolve) => {
	setTimeout(() => download(words, attempts - 1).then(r => resolve(r)), 250);
	});
	}
	}
	}

	synthDownloadQueue.push({ words, synth: download(words) });
	updateDownloadQueue();

	if (!isSpeaking) {
	startSpeaking();
	}
	}
	async function startSpeaking() {
	const play = (src) => new Promise((resolve, reject) => {
	audio.src = src;
	audio.play();
	audio.onended = resolve;
	});

	isSpeaking = true;
	while(synthDownloadQueue.length > 0) {
	const download = synthDownloadQueue.shift();
	updateDownloadQueue();
	speechBox.innerText = download.words + "... (downloading)";
	const url = await download.synth;

	speechBox.innerText = download.words;
	console.log('playing', url);
	await play(url);

	speechBox.innerText = '';
	}
	isSpeaking = false;
	}

	function updateDownloadQueue() {
	let html = '';
	for(const download of synthDownloadQueue)
	html = `<li><div>${download.words}</div></li>${html}`
	queueList.innerHTML = html;
	}

	playButton.addEventListener('click', () => {
	if (isListening) {
	stopListening();
	} else {
	startListen();
	}
	});


	function capitalize(s) {
	const first_char = /\S/;
	return s.replace(first_char, function(m) { return m.toUpperCase(); });
	}

	function linebreak(s) {
	const two_line = /\n\n/g;
	const one_line = /\n/g;
	return s.replace(two_line, '<p></p>').replace(one_line, '<br>');
	}

	document.addEventListener('DOMContentLoaded', () => {
	//startListen();
	})
	</script>
	</html>