mbrock · October 26, 2023 11:03
diff --git a/swash.js b/swash.js
 // swa.sh - a tool, for naught
 // Copyright (C) 2023  Mikael Brockman
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License as published by
 // the Free Software Foundation, either version 3 of the License, or
 // (at your option) any later version.
 //
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 // GNU Affero General Public License for more details.
 //
 // You should have received a copy of the GNU Affero General Public License
 // along with this program.  If not, see <http://www.gnu.org/licenses/>.

 function zb32word() {
  const base = "ybndrfg8ejkmcpqxot1uwisza345h769"
  const array = new Int32Array(1)
  window.crypto.getRandomValues(array)
  const i = array[0]

  return (
    base[(i >>> 27) & 0x1f] +
    base[(i >>> 22) & 0x1f] +
    base[(i >>> 17) & 0x1f] +
    base[(i >>> 12) & 0x1f] +
    base[(i >>> 7) & 0x1f] +
    base[(i >>> 2) & 0x1f]
  )
 }

 function gensym() {
  return `${zb32word()}${zb32word()}`
 }

 class Stream {
  constructor(setup) {
    this.buffer = []

    const next = value => {
      if (this.promise) {
        this.resolve({ value, done: false })
        this.promise = null
      } else {
        this.buffer.push(value)
      }
    }

    const stop = () => {
      this.resolve({ done: true })
    }

    const fail = error => {
      this.reject(error)
    }

    setup({ next, stop, fail })
  }

  async next() {
    if (this.buffer.length > 0) {
      return Promise.resolve({
        value: this.buffer.shift(),
        done: false,
      })
    }

    if (!this.promise) {
      this.promise = new Promise((r, e) => {
        this.resolve = r
        this.reject = e
      })
    }

    return this.promise
  }

  return() {
    this.resolve({ done: true })
    return Promise.resolve({ done: true })
  }

  throw(error) {
    this.reject(error)
  }

  [Symbol.asyncIterator]() {
    return this
  }

  static async *merge(iterators) {
    const promises = iterators.map((iterator, index) =>
      iterator.next().then(result => ({ ...result, source: index }))
    )

    while (promises.length > 0) {
      const nextPromise = Promise.race(promises)
      const { value, done, source } = await nextPromise

      if (done) {
        const index = promises.findIndex((_, i) => i === source)
        if (index !== -1) {
          promises.splice(index, 1)
        }
      } else {
        yield value
        promises[source] = iterators[source]
          .next()
          .then(result => ({ ...result, source }))
      }
    }
  }
 }

 class BaseComponent extends HTMLElement {
  constructor(templateContent) {
    super()
    this.attachShadow({ mode: "open" })
    this.appendTemplate(templateContent)
  }

  $(selector) {
    return this.shadowRoot.querySelector(selector)
  }

  $$(selector) {
    return this.shadowRoot.querySelectorAll(selector)
  }

  appendTemplate(templateContent) {
    const template = document.createElement("template")
    template.innerHTML = templateContent
    this.shadowRoot.appendChild(template.content.cloneNode(true))
  }

  tag(tagName, attributes = {}, children = []) {
    const element = document.createElement(tagName)
    Object.keys(attributes).forEach(key => {
      element.setAttribute(key, attributes[key])
    })
    children.forEach(child => {
      if (typeof child === "string") {
        child = document.createTextNode(child)
      } else if (child instanceof HTMLElement) {
        // do nothing
      } else {
        throw new Error("Invalid child type")
      }
      element.appendChild(child)
    })
    return element
  }
 }

 function speechRecognitionEventStream({ language = "en-US" }) {
  return new Stream(({ next, fail }) => {
    const recognition = new (window.SpeechRecognition ||
      window.webkitSpeechRecognition)()
    recognition.interimResults = true
    recognition.continuous = true
    recognition.lang = language

    recognition.onresult = event => {
      const timestamp = new Date().toISOString()
      next({ type: "Result", timestamp })
      Array.from(event.results)
        .slice(event.resultIndex)
        .forEach(result => {
          next({
            type: result.isFinal ? "FinalTranscript" : "InterimTranscript",
            transcript: result[0].transcript,
            grade: result.isFinal
              ? confidenceGrade(result[0].confidence)
              : undefined,
            timestamp,
            id: gensym(),
          })
        })
    }

    recognition.onerror = error => {
      if (error.error === "no-speech") {
        next({ type: "NoSpeech", timestamp: new Date().toISOString() })
      } else if (error.error === "network") {
        next({ type: "NetworkDown" })
      } else {
        fail(error)
      }
    }

    recognition.onend = () => {
      recognition.start()
    }

    recognition.start()
  })
 }

 class AudioRecorder {
  constructor() {
    this.mediaRecorder = null
    this.chunks = []
    this.stream = null
    this.startTime = null
  }

  async setup() {
    if (!this.stream) {
      this.stream = await navigator.mediaDevices.getUserMedia({ audio: true })
      this.mediaRecorder = new MediaRecorder(this.stream)

      this.mediaRecorder.ondataavailable = e => {
        this.chunks.push(e.data)
      }
    }
  }

  async start() {
    await this.setup()
    if (this.mediaRecorder.state === "inactive") {
      this.mediaRecorder.start(100)
      this.startTime = Date.now()
    }
  }

  dump() {
    const blob = new Blob(this.chunks, { type: "audio/webm; codecs=opus" })
    return blob
  }

  stop() {
    return new Promise(resolve => {
      this.mediaRecorder.onstop = () => {
        const blob = this.dump()
        this.chunks = []
        resolve(blob)
      }

      this.mediaRecorder.stop()
    })
  }

  async restart() {
    console.info("restarting audio")
    const blob = await this.stop()
    await this.start()
    return blob
  }
 }

 async function transcribe({ file, token, language = "en", prompt = "" }) {
  const formData = new FormData()
  formData.append("file", file, "audio.webm")
  formData.append("model", "whisper-1")
  formData.append("response_format", "verbose_json")
  formData.append("prompt", prompt)
  formData.append("language", language)

  const response = await fetch(
    "https://api.openai.com/v1/audio/transcriptions",
    {
      method: "POST",
      body: formData,
      headers: {
        Authorization: `Bearer ${token}`,
      },
    }
  )

  if (!response.ok) {
    console.error(await response.text())
    throw new Error(`HTTP error! status: ${response.status}`)
  }

  return await response.json()
 }

 async function demand({ key, message = key }) {
  return new Promise(resolve => {
    const x = localStorage.getItem(key) || prompt(message)
    localStorage.setItem(key, x)
    resolve(x)
  })
 }

 class ResettableTimer {
  constructor(timeoutDuration, onTimeout) {
    this.timeoutDuration = timeoutDuration
    this.onTimeout = onTimeout
    this.timeoutId = null
  }

  start() {
    this.reset()
  }

  reset() {
    clearTimeout(this.timeoutId)
    this.timeoutId = setTimeout(this.onTimeout, this.timeoutDuration)
  }

  stop() {
    clearTimeout(this.timeoutId)
    this.timeoutId = null
  }
 }

 class SwashDictaphone extends BaseComponent {
  constructor() {
    super(`
      <link rel="stylesheet" href="index.css">
      <article>
        <div class="final"><p></p></div>
        <div class="interim"></div>
      </article>
      <audio controls></audio>
    `)
  }

  async connectedCallback() {
    this.db = this.getAttribute("db")
    this.loadAndHandleEvents()

    const language = this.getAttribute("lang") || "en-US"
    this.shortLanguage = language.split("-")[0]

    this.recognitionEventStream = speechRecognitionEventStream({
      language,
    })

    this.recorder = new AudioRecorder()
    await this.recorder.start()

    this.timer = new ResettableTimer(5000, async () => {
      const blob = await this.recorder.restart()
      if (!this.$(".final p:empty:last-child")) {
        this.$(".final").appendChild(this.tag("p"))
      }
      this.timer.reset()
    })

    for await (const event of this.recognitionEventStream) {
      console.log("ok", event)
      this.handleEvent(event, true)
    }
  }

  loadAndHandleEvents() {
    const events = JSON.parse(localStorage.getItem(this.db) || "[]")
    events.forEach(event => this.handleEvent(event, false))
  }

  saveEvent(event) {
    let events = JSON.parse(localStorage.getItem(this.db) || "[]")
    events = [...events, event]
    localStorage.setItem(this.db, JSON.stringify(events))
  }

  reset() {
    localStorage.removeItem(this.db)
    this.$(".final").innerHTML = ""
    this.$(".interim").textContent = ""
  }

  async handleEvent(event, shouldSave) {
    if (shouldSave) {
      this.saveEvent(event)
    }

    const eventTypeHandlers = {
      Result: async () => {
        this.$(".interim").textContent = ""
      },

      FinalTranscript: async event => {
        const commandFunc = {
          "reset bro": () => this.reset(),
        }[event.transcript.trim().toLowerCase()]

        if (commandFunc) {
          await commandFunc()
        } else {
          let recording = this.tag(
            "span",
            {
              "data-grade": event.grade,
              "data-id": event.id,
              "data-timestamp": event.timestamp,
              class: shouldSave ? "recording" : "",
            },
            [event.transcript]
          )

          this.$(".final p:last-of-type").appendChild(recording)

          if (shouldSave) {
            const p = this.$(".final p:last-of-type")
            const target = this.tag("span", {
              class: "whisper transcription pending",
            })

            p.appendChild(target)

            const transcription = await transcribe({
              file: this.recorder.dump(),
              token: await demand({
                key: "openai-token",
                message: "Please enter your OpenAI API token",
              }),
              language: this.shortLanguage,
            })

            target.classList.remove("pending")
            target.classList.add("done")

            // remove all other transcriptions in the same paragraph
            for (const span of p.querySelectorAll(".whisper.transcription")) {
              if (span !== target) {
                span.remove()
              }
            }

            recording.remove()

            // {"task":"transcribe","language":"english","duration":2.94,"segments":[{"id":0,"seek":0,"start":0.0,"end":3.0,"text":" Hello.","tokens":[50364,2425,13,50514],"temperature":0.0,"avg_logprob":-0.936490821838379,"compression_ratio":0.42857142857142855,"no_speech_prob":0.2167164534330368,"transient":false}],"text":"Hello."}

            console.info(transcription)

            target.textContent = transcription.text
          }

          this.$(".interim").textContent = ""
        }
      },

      InterimTranscript: async event => {
        this.$(".interim").textContent += event.transcript

        if (shouldSave) {
          this.timer.reset()
        }
      },

      NoSpeech: async event => {
        if (shouldSave) {
        }
      },
    }

    const handlerFunc = eventTypeHandlers[event.type]
    if (handlerFunc) {
      await handlerFunc(event)
    }

    // scroll to bottom smoothly, centering the last line
    this.$(".final > :last-child, .interim").scrollIntoView({
      behavior: "smooth",
      block: "center",
    })
  }
 }

 // Define the new element
 customElements.define("swash-dictaphone", SwashDictaphone)

 function confidenceGrade(confidence) {
  let grade
  if (confidence > 0.95) {
    grade = "A+"
  } else if (confidence > 0.9) {
    grade = "A"
  } else if (confidence > 0.8) {
    grade = "B"
  } else if (confidence > 0.7) {
    grade = "C"
  } else if (confidence > 0.6) {
    grade = "D"
  } else {
    grade = "F"
  }
  return grade
 }
	// swa.sh - a tool, for naught
	// Copyright (C) 2023 Mikael Brockman
	//
	// This program is free software: you can redistribute it and/or modify
	// it under the terms of the GNU Affero General Public License as published by
	// the Free Software Foundation, either version 3 of the License, or
	// (at your option) any later version.
	//
	// This program is distributed in the hope that it will be useful,
	// but WITHOUT ANY WARRANTY; without even the implied warranty of
	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	// GNU Affero General Public License for more details.
	//
	// You should have received a copy of the GNU Affero General Public License
	// along with this program. If not, see <http://www.gnu.org/licenses/>.

	function zb32word() {
	const base = "ybndrfg8ejkmcpqxot1uwisza345h769"
	const array = new Int32Array(1)
	window.crypto.getRandomValues(array)
	const i = array[0]

	return (
	base[(i >>> 27) & 0x1f] +
	base[(i >>> 22) & 0x1f] +
	base[(i >>> 17) & 0x1f] +
	base[(i >>> 12) & 0x1f] +
	base[(i >>> 7) & 0x1f] +
	base[(i >>> 2) & 0x1f]
	)
	}

	function gensym() {
	return `${zb32word()}${zb32word()}`
	}

	class Stream {
	constructor(setup) {
	this.buffer = []

	const next = value => {
	if (this.promise) {
	this.resolve({ value, done: false })
	this.promise = null
	} else {
	this.buffer.push(value)
	}
	}

	const stop = () => {
	this.resolve({ done: true })
	}

	const fail = error => {
	this.reject(error)
	}

	setup({ next, stop, fail })
	}

	async next() {
	if (this.buffer.length > 0) {
	return Promise.resolve({
	value: this.buffer.shift(),
	done: false,
	})
	}

	if (!this.promise) {
	this.promise = new Promise((r, e) => {
	this.resolve = r
	this.reject = e
	})
	}

	return this.promise
	}

	return() {
	this.resolve({ done: true })
	return Promise.resolve({ done: true })
	}

	throw(error) {
	this.reject(error)
	}

	[Symbol.asyncIterator]() {
	return this
	}

	static async *merge(iterators) {
	const promises = iterators.map((iterator, index) =>
	iterator.next().then(result => ({ ...result, source: index }))
	)

	while (promises.length > 0) {
	const nextPromise = Promise.race(promises)
	const { value, done, source } = await nextPromise

	if (done) {
	const index = promises.findIndex((_, i) => i === source)
	if (index !== -1) {
	promises.splice(index, 1)
	}
	} else {
	yield value
	promises[source] = iterators[source]
	.next()
	.then(result => ({ ...result, source }))
	}
	}
	}
	}

	class BaseComponent extends HTMLElement {
	constructor(templateContent) {
	super()
	this.attachShadow({ mode: "open" })
	this.appendTemplate(templateContent)
	}

	$(selector) {
	return this.shadowRoot.querySelector(selector)
	}

	$$(selector) {
	return this.shadowRoot.querySelectorAll(selector)
	}

	appendTemplate(templateContent) {
	const template = document.createElement("template")
	template.innerHTML = templateContent
	this.shadowRoot.appendChild(template.content.cloneNode(true))
	}

	tag(tagName, attributes = {}, children = []) {
	const element = document.createElement(tagName)
	Object.keys(attributes).forEach(key => {
	element.setAttribute(key, attributes[key])
	})
	children.forEach(child => {
	if (typeof child === "string") {
	child = document.createTextNode(child)
	} else if (child instanceof HTMLElement) {
	// do nothing
	} else {
	throw new Error("Invalid child type")
	}
	element.appendChild(child)
	})
	return element
	}
	}

	function speechRecognitionEventStream({ language = "en-US" }) {
	return new Stream(({ next, fail }) => {
	const recognition = new (window.SpeechRecognition \|\|
	window.webkitSpeechRecognition)()
	recognition.interimResults = true
	recognition.continuous = true
	recognition.lang = language

	recognition.onresult = event => {
	const timestamp = new Date().toISOString()
	next({ type: "Result", timestamp })
	Array.from(event.results)
	.slice(event.resultIndex)
	.forEach(result => {
	next({
	type: result.isFinal ? "FinalTranscript" : "InterimTranscript",
	transcript: result[0].transcript,
	grade: result.isFinal
	? confidenceGrade(result[0].confidence)
	: undefined,
	timestamp,
	id: gensym(),
	})
	})
	}

	recognition.onerror = error => {
	if (error.error === "no-speech") {
	next({ type: "NoSpeech", timestamp: new Date().toISOString() })
	} else if (error.error === "network") {
	next({ type: "NetworkDown" })
	} else {
	fail(error)
	}
	}

	recognition.onend = () => {
	recognition.start()
	}

	recognition.start()
	})
	}

	class AudioRecorder {
	constructor() {
	this.mediaRecorder = null
	this.chunks = []
	this.stream = null
	this.startTime = null
	}

	async setup() {
	if (!this.stream) {
	this.stream = await navigator.mediaDevices.getUserMedia({ audio: true })
	this.mediaRecorder = new MediaRecorder(this.stream)

	this.mediaRecorder.ondataavailable = e => {
	this.chunks.push(e.data)
	}
	}
	}

	async start() {
	await this.setup()
	if (this.mediaRecorder.state === "inactive") {
	this.mediaRecorder.start(100)
	this.startTime = Date.now()
	}
	}

	dump() {
	const blob = new Blob(this.chunks, { type: "audio/webm; codecs=opus" })
	return blob
	}

	stop() {
	return new Promise(resolve => {
	this.mediaRecorder.onstop = () => {
	const blob = this.dump()
	this.chunks = []
	resolve(blob)
	}

	this.mediaRecorder.stop()
	})
	}

	async restart() {
	console.info("restarting audio")
	const blob = await this.stop()
	await this.start()
	return blob
	}
	}

	async function transcribe({ file, token, language = "en", prompt = "" }) {
	const formData = new FormData()
	formData.append("file", file, "audio.webm")
	formData.append("model", "whisper-1")
	formData.append("response_format", "verbose_json")
	formData.append("prompt", prompt)
	formData.append("language", language)

	const response = await fetch(
	"https://api.openai.com/v1/audio/transcriptions",
	{
	method: "POST",
	body: formData,
	headers: {
	Authorization: `Bearer ${token}`,
	},
	}
	)

	if (!response.ok) {
	console.error(await response.text())
	throw new Error(`HTTP error! status: ${response.status}`)
	}

	return await response.json()
	}

	async function demand({ key, message = key }) {
	return new Promise(resolve => {
	const x = localStorage.getItem(key) \|\| prompt(message)
	localStorage.setItem(key, x)
	resolve(x)
	})
	}

	class ResettableTimer {
	constructor(timeoutDuration, onTimeout) {
	this.timeoutDuration = timeoutDuration
	this.onTimeout = onTimeout
	this.timeoutId = null
	}

	start() {
	this.reset()
	}

	reset() {
	clearTimeout(this.timeoutId)
	this.timeoutId = setTimeout(this.onTimeout, this.timeoutDuration)
	}

	stop() {
	clearTimeout(this.timeoutId)
	this.timeoutId = null
	}
	}

	class SwashDictaphone extends BaseComponent {
	constructor() {
	super(`
	<link rel="stylesheet" href="index.css">
	<article>
	<div class="final"><p></p></div>
	<div class="interim"></div>
	</article>
	<audio controls></audio>
	`)
	}

	async connectedCallback() {
	this.db = this.getAttribute("db")
	this.loadAndHandleEvents()

	const language = this.getAttribute("lang") \|\| "en-US"
	this.shortLanguage = language.split("-")[0]

	this.recognitionEventStream = speechRecognitionEventStream({
	language,
	})

	this.recorder = new AudioRecorder()
	await this.recorder.start()

	this.timer = new ResettableTimer(5000, async () => {
	const blob = await this.recorder.restart()
	if (!this.$(".final p:empty:last-child")) {
	this.$(".final").appendChild(this.tag("p"))
	}
	this.timer.reset()
	})

	for await (const event of this.recognitionEventStream) {
	console.log("ok", event)
	this.handleEvent(event, true)
	}
	}

	loadAndHandleEvents() {
	const events = JSON.parse(localStorage.getItem(this.db) \|\| "[]")
	events.forEach(event => this.handleEvent(event, false))
	}

	saveEvent(event) {
	let events = JSON.parse(localStorage.getItem(this.db) \|\| "[]")
	events = [...events, event]
	localStorage.setItem(this.db, JSON.stringify(events))
	}

	reset() {
	localStorage.removeItem(this.db)
	this.$(".final").innerHTML = ""
	this.$(".interim").textContent = ""
	}

	async handleEvent(event, shouldSave) {
	if (shouldSave) {
	this.saveEvent(event)
	}

	const eventTypeHandlers = {
	Result: async () => {
	this.$(".interim").textContent = ""
	},

	FinalTranscript: async event => {
	const commandFunc = {
	"reset bro": () => this.reset(),
	}[event.transcript.trim().toLowerCase()]

	if (commandFunc) {
	await commandFunc()
	} else {
	let recording = this.tag(
	"span",
	{
	"data-grade": event.grade,
	"data-id": event.id,
	"data-timestamp": event.timestamp,
	class: shouldSave ? "recording" : "",
	},
	[event.transcript]
	)

	this.$(".final p:last-of-type").appendChild(recording)

	if (shouldSave) {
	const p = this.$(".final p:last-of-type")
	const target = this.tag("span", {
	class: "whisper transcription pending",
	})

	p.appendChild(target)

	const transcription = await transcribe({
	file: this.recorder.dump(),
	token: await demand({
	key: "openai-token",
	message: "Please enter your OpenAI API token",
	}),
	language: this.shortLanguage,
	})

	target.classList.remove("pending")
	target.classList.add("done")

	// remove all other transcriptions in the same paragraph
	for (const span of p.querySelectorAll(".whisper.transcription")) {
	if (span !== target) {
	span.remove()
	}
	}

	recording.remove()

	// {"task":"transcribe","language":"english","duration":2.94,"segments":[{"id":0,"seek":0,"start":0.0,"end":3.0,"text":" Hello.","tokens":[50364,2425,13,50514],"temperature":0.0,"avg_logprob":-0.936490821838379,"compression_ratio":0.42857142857142855,"no_speech_prob":0.2167164534330368,"transient":false}],"text":"Hello."}

	console.info(transcription)

	target.textContent = transcription.text
	}

	this.$(".interim").textContent = ""
	}
	},

	InterimTranscript: async event => {
	this.$(".interim").textContent += event.transcript

	if (shouldSave) {
	this.timer.reset()
	}
	},

	NoSpeech: async event => {
	if (shouldSave) {
	}
	},
	}

	const handlerFunc = eventTypeHandlers[event.type]
	if (handlerFunc) {
	await handlerFunc(event)
	}

	// scroll to bottom smoothly, centering the last line
	this.$(".final > :last-child, .interim").scrollIntoView({
	behavior: "smooth",
	block: "center",
	})
	}
	}

	// Define the new element
	customElements.define("swash-dictaphone", SwashDictaphone)

	function confidenceGrade(confidence) {
	let grade
	if (confidence > 0.95) {
	grade = "A+"
	} else if (confidence > 0.9) {
	grade = "A"
	} else if (confidence > 0.8) {
	grade = "B"
	} else if (confidence > 0.7) {
	grade = "C"
	} else if (confidence > 0.6) {
	grade = "D"
	} else {
	grade = "F"
	}
	return grade
	}
No results found