Created
October 26, 2023 11:03
-
-
Save mbrock/51a853f1375c9f6d6c42b1c413d3232f to your computer and use it in GitHub Desktop.
swa.sh
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// swa.sh - a tool, for naught | |
// Copyright (C) 2023 Mikael Brockman | |
// | |
// This program is free software: you can redistribute it and/or modify | |
// it under the terms of the GNU Affero General Public License as published by | |
// the Free Software Foundation, either version 3 of the License, or | |
// (at your option) any later version. | |
// | |
// This program is distributed in the hope that it will be useful, | |
// but WITHOUT ANY WARRANTY; without even the implied warranty of | |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
// GNU Affero General Public License for more details. | |
// | |
// You should have received a copy of the GNU Affero General Public License | |
// along with this program. If not, see <http://www.gnu.org/licenses/>. | |
function zb32word() { | |
const base = "ybndrfg8ejkmcpqxot1uwisza345h769" | |
const array = new Int32Array(1) | |
window.crypto.getRandomValues(array) | |
const i = array[0] | |
return ( | |
base[(i >>> 27) & 0x1f] + | |
base[(i >>> 22) & 0x1f] + | |
base[(i >>> 17) & 0x1f] + | |
base[(i >>> 12) & 0x1f] + | |
base[(i >>> 7) & 0x1f] + | |
base[(i >>> 2) & 0x1f] | |
) | |
} | |
function gensym() { | |
return `${zb32word()}${zb32word()}` | |
} | |
class Stream { | |
constructor(setup) { | |
this.buffer = [] | |
const next = value => { | |
if (this.promise) { | |
this.resolve({ value, done: false }) | |
this.promise = null | |
} else { | |
this.buffer.push(value) | |
} | |
} | |
const stop = () => { | |
this.resolve({ done: true }) | |
} | |
const fail = error => { | |
this.reject(error) | |
} | |
setup({ next, stop, fail }) | |
} | |
async next() { | |
if (this.buffer.length > 0) { | |
return Promise.resolve({ | |
value: this.buffer.shift(), | |
done: false, | |
}) | |
} | |
if (!this.promise) { | |
this.promise = new Promise((r, e) => { | |
this.resolve = r | |
this.reject = e | |
}) | |
} | |
return this.promise | |
} | |
return() { | |
this.resolve({ done: true }) | |
return Promise.resolve({ done: true }) | |
} | |
throw(error) { | |
this.reject(error) | |
} | |
[Symbol.asyncIterator]() { | |
return this | |
} | |
static async *merge(iterators) { | |
const promises = iterators.map((iterator, index) => | |
iterator.next().then(result => ({ ...result, source: index })) | |
) | |
while (promises.length > 0) { | |
const nextPromise = Promise.race(promises) | |
const { value, done, source } = await nextPromise | |
if (done) { | |
const index = promises.findIndex((_, i) => i === source) | |
if (index !== -1) { | |
promises.splice(index, 1) | |
} | |
} else { | |
yield value | |
promises[source] = iterators[source] | |
.next() | |
.then(result => ({ ...result, source })) | |
} | |
} | |
} | |
} | |
class BaseComponent extends HTMLElement { | |
constructor(templateContent) { | |
super() | |
this.attachShadow({ mode: "open" }) | |
this.appendTemplate(templateContent) | |
} | |
$(selector) { | |
return this.shadowRoot.querySelector(selector) | |
} | |
$$(selector) { | |
return this.shadowRoot.querySelectorAll(selector) | |
} | |
appendTemplate(templateContent) { | |
const template = document.createElement("template") | |
template.innerHTML = templateContent | |
this.shadowRoot.appendChild(template.content.cloneNode(true)) | |
} | |
tag(tagName, attributes = {}, children = []) { | |
const element = document.createElement(tagName) | |
Object.keys(attributes).forEach(key => { | |
element.setAttribute(key, attributes[key]) | |
}) | |
children.forEach(child => { | |
if (typeof child === "string") { | |
child = document.createTextNode(child) | |
} else if (child instanceof HTMLElement) { | |
// do nothing | |
} else { | |
throw new Error("Invalid child type") | |
} | |
element.appendChild(child) | |
}) | |
return element | |
} | |
} | |
function speechRecognitionEventStream({ language = "en-US" }) { | |
return new Stream(({ next, fail }) => { | |
const recognition = new (window.SpeechRecognition || | |
window.webkitSpeechRecognition)() | |
recognition.interimResults = true | |
recognition.continuous = true | |
recognition.lang = language | |
recognition.onresult = event => { | |
const timestamp = new Date().toISOString() | |
next({ type: "Result", timestamp }) | |
Array.from(event.results) | |
.slice(event.resultIndex) | |
.forEach(result => { | |
next({ | |
type: result.isFinal ? "FinalTranscript" : "InterimTranscript", | |
transcript: result[0].transcript, | |
grade: result.isFinal | |
? confidenceGrade(result[0].confidence) | |
: undefined, | |
timestamp, | |
id: gensym(), | |
}) | |
}) | |
} | |
recognition.onerror = error => { | |
if (error.error === "no-speech") { | |
next({ type: "NoSpeech", timestamp: new Date().toISOString() }) | |
} else if (error.error === "network") { | |
next({ type: "NetworkDown" }) | |
} else { | |
fail(error) | |
} | |
} | |
recognition.onend = () => { | |
recognition.start() | |
} | |
recognition.start() | |
}) | |
} | |
class AudioRecorder { | |
constructor() { | |
this.mediaRecorder = null | |
this.chunks = [] | |
this.stream = null | |
this.startTime = null | |
} | |
async setup() { | |
if (!this.stream) { | |
this.stream = await navigator.mediaDevices.getUserMedia({ audio: true }) | |
this.mediaRecorder = new MediaRecorder(this.stream) | |
this.mediaRecorder.ondataavailable = e => { | |
this.chunks.push(e.data) | |
} | |
} | |
} | |
async start() { | |
await this.setup() | |
if (this.mediaRecorder.state === "inactive") { | |
this.mediaRecorder.start(100) | |
this.startTime = Date.now() | |
} | |
} | |
dump() { | |
const blob = new Blob(this.chunks, { type: "audio/webm; codecs=opus" }) | |
return blob | |
} | |
stop() { | |
return new Promise(resolve => { | |
this.mediaRecorder.onstop = () => { | |
const blob = this.dump() | |
this.chunks = [] | |
resolve(blob) | |
} | |
this.mediaRecorder.stop() | |
}) | |
} | |
async restart() { | |
console.info("restarting audio") | |
const blob = await this.stop() | |
await this.start() | |
return blob | |
} | |
} | |
async function transcribe({ file, token, language = "en", prompt = "" }) { | |
const formData = new FormData() | |
formData.append("file", file, "audio.webm") | |
formData.append("model", "whisper-1") | |
formData.append("response_format", "verbose_json") | |
formData.append("prompt", prompt) | |
formData.append("language", language) | |
const response = await fetch( | |
"https://api.openai.com/v1/audio/transcriptions", | |
{ | |
method: "POST", | |
body: formData, | |
headers: { | |
Authorization: `Bearer ${token}`, | |
}, | |
} | |
) | |
if (!response.ok) { | |
console.error(await response.text()) | |
throw new Error(`HTTP error! status: ${response.status}`) | |
} | |
return await response.json() | |
} | |
async function demand({ key, message = key }) { | |
return new Promise(resolve => { | |
const x = localStorage.getItem(key) || prompt(message) | |
localStorage.setItem(key, x) | |
resolve(x) | |
}) | |
} | |
class ResettableTimer { | |
constructor(timeoutDuration, onTimeout) { | |
this.timeoutDuration = timeoutDuration | |
this.onTimeout = onTimeout | |
this.timeoutId = null | |
} | |
start() { | |
this.reset() | |
} | |
reset() { | |
clearTimeout(this.timeoutId) | |
this.timeoutId = setTimeout(this.onTimeout, this.timeoutDuration) | |
} | |
stop() { | |
clearTimeout(this.timeoutId) | |
this.timeoutId = null | |
} | |
} | |
class SwashDictaphone extends BaseComponent { | |
constructor() { | |
super(` | |
<link rel="stylesheet" href="index.css"> | |
<article> | |
<div class="final"><p></p></div> | |
<div class="interim"></div> | |
</article> | |
<audio controls></audio> | |
`) | |
} | |
async connectedCallback() { | |
this.db = this.getAttribute("db") | |
this.loadAndHandleEvents() | |
const language = this.getAttribute("lang") || "en-US" | |
this.shortLanguage = language.split("-")[0] | |
this.recognitionEventStream = speechRecognitionEventStream({ | |
language, | |
}) | |
this.recorder = new AudioRecorder() | |
await this.recorder.start() | |
this.timer = new ResettableTimer(5000, async () => { | |
const blob = await this.recorder.restart() | |
if (!this.$(".final p:empty:last-child")) { | |
this.$(".final").appendChild(this.tag("p")) | |
} | |
this.timer.reset() | |
}) | |
for await (const event of this.recognitionEventStream) { | |
console.log("ok", event) | |
this.handleEvent(event, true) | |
} | |
} | |
loadAndHandleEvents() { | |
const events = JSON.parse(localStorage.getItem(this.db) || "[]") | |
events.forEach(event => this.handleEvent(event, false)) | |
} | |
saveEvent(event) { | |
let events = JSON.parse(localStorage.getItem(this.db) || "[]") | |
events = [...events, event] | |
localStorage.setItem(this.db, JSON.stringify(events)) | |
} | |
reset() { | |
localStorage.removeItem(this.db) | |
this.$(".final").innerHTML = "" | |
this.$(".interim").textContent = "" | |
} | |
async handleEvent(event, shouldSave) { | |
if (shouldSave) { | |
this.saveEvent(event) | |
} | |
const eventTypeHandlers = { | |
Result: async () => { | |
this.$(".interim").textContent = "" | |
}, | |
FinalTranscript: async event => { | |
const commandFunc = { | |
"reset bro": () => this.reset(), | |
}[event.transcript.trim().toLowerCase()] | |
if (commandFunc) { | |
await commandFunc() | |
} else { | |
let recording = this.tag( | |
"span", | |
{ | |
"data-grade": event.grade, | |
"data-id": event.id, | |
"data-timestamp": event.timestamp, | |
class: shouldSave ? "recording" : "", | |
}, | |
[event.transcript] | |
) | |
this.$(".final p:last-of-type").appendChild(recording) | |
if (shouldSave) { | |
const p = this.$(".final p:last-of-type") | |
const target = this.tag("span", { | |
class: "whisper transcription pending", | |
}) | |
p.appendChild(target) | |
const transcription = await transcribe({ | |
file: this.recorder.dump(), | |
token: await demand({ | |
key: "openai-token", | |
message: "Please enter your OpenAI API token", | |
}), | |
language: this.shortLanguage, | |
}) | |
target.classList.remove("pending") | |
target.classList.add("done") | |
// remove all other transcriptions in the same paragraph | |
for (const span of p.querySelectorAll(".whisper.transcription")) { | |
if (span !== target) { | |
span.remove() | |
} | |
} | |
recording.remove() | |
// {"task":"transcribe","language":"english","duration":2.94,"segments":[{"id":0,"seek":0,"start":0.0,"end":3.0,"text":" Hello.","tokens":[50364,2425,13,50514],"temperature":0.0,"avg_logprob":-0.936490821838379,"compression_ratio":0.42857142857142855,"no_speech_prob":0.2167164534330368,"transient":false}],"text":"Hello."} | |
console.info(transcription) | |
target.textContent = transcription.text | |
} | |
this.$(".interim").textContent = "" | |
} | |
}, | |
InterimTranscript: async event => { | |
this.$(".interim").textContent += event.transcript | |
if (shouldSave) { | |
this.timer.reset() | |
} | |
}, | |
NoSpeech: async event => { | |
if (shouldSave) { | |
} | |
}, | |
} | |
const handlerFunc = eventTypeHandlers[event.type] | |
if (handlerFunc) { | |
await handlerFunc(event) | |
} | |
// scroll to bottom smoothly, centering the last line | |
this.$(".final > :last-child, .interim").scrollIntoView({ | |
behavior: "smooth", | |
block: "center", | |
}) | |
} | |
} | |
// Define the new element | |
customElements.define("swash-dictaphone", SwashDictaphone) | |
function confidenceGrade(confidence) { | |
let grade | |
if (confidence > 0.95) { | |
grade = "A+" | |
} else if (confidence > 0.9) { | |
grade = "A" | |
} else if (confidence > 0.8) { | |
grade = "B" | |
} else if (confidence > 0.7) { | |
grade = "C" | |
} else if (confidence > 0.6) { | |
grade = "D" | |
} else { | |
grade = "F" | |
} | |
return grade | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment