Created
December 24, 2024 10:10
-
-
Save matsubo/39adc10cf5167e5cc6d05fe65215ae72 to your computer and use it in GitHub Desktop.
Realtime translation using OpenAI realtime API via WebRTC
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootswatch/4.5.2/cerulean/bootstrap.min.css"> | |
<title>Realtime Translation Test</title> | |
</head> | |
<body> | |
<div class="container mt-5"> | |
<div class="form-group"> | |
<label for="inputLanguage">Model</label> | |
</div> | |
<div class="form-group"> | |
<label for="inputLanguage">Input Language:</label> | |
<select id="inputLanguage" class="form-control"> | |
<option value="Japanese">Japanese</option> | |
<option value="Spanish">Spanish</option> | |
<option value="French">French</option> | |
</select> | |
</div> | |
<div class="form-group"> | |
<label for="outputLanguage">Output Language:</label> | |
<select id="outputLanguage" class="form-control"> | |
<option value="English">English</option> | |
<option value="Spanish">Spanish</option> | |
<option value="Hindi">Hindi</option> | |
<option value="Mandarin Chinese">Mandarin Chinese</option> | |
<option value="French">French</option> | |
</select> | |
</div> | |
<div class="card my-3"> | |
<div class="card-body"> | |
<h5 class="card-title">Response</h5> | |
<p class="card-text"> | |
<blockquote id="translatedText"></blockquote> | |
</p> | |
</div> | |
</div> | |
<div class="card my-3"> | |
<div class="card-body"> | |
<h5 class="card-title">Prompt via data channel</h5> | |
<p class="card-text"> | |
<blockquote id="promptText"></blockquote> | |
</p> | |
</div> | |
</div> | |
<script> | |
async function init() { | |
const synth = window.speechSynthesis; | |
const tokenResponse = await fetch("/api"); | |
const data = await tokenResponse.json(); | |
const EPHEMERAL_KEY = data.client_secret.value; | |
const baseUrl = "https://api.openai.com/v1/realtime"; | |
const modelSelect = document.createElement('select'); | |
modelSelect.id = 'modelSelect'; | |
modelSelect.className = 'form-control'; | |
const models = ["gpt-4o-realtime-preview-2024-12-17", "gpt-3.5-turbo", "gpt-3.5-turbo-16k"]; | |
models.forEach(m => { | |
const option = document.createElement('option'); | |
option.value = m; | |
option.text = m; | |
modelSelect.appendChild(option); | |
}); | |
document.querySelector('.form-group').appendChild(modelSelect); | |
let model = modelSelect.value; | |
modelSelect.onchange = () => { | |
model = modelSelect.value; | |
sendTranslationInstructions(); | |
}; | |
const pc = new RTCPeerConnection(); | |
const audioEl = document.createElement("audio"); | |
audioEl.autoplay = true; // Disabled autoplay to prevent audio from playing | |
pc.ontrack = e => audioEl.srcObject = e.streams[0]; | |
const ms = await navigator.mediaDevices.getUserMedia({ | |
audio: true | |
}); | |
pc.addTrack(ms.getTracks()[0]); | |
const dc = pc.createDataChannel("oai-events"); | |
dc.addEventListener("open", () => { | |
// Initial instruction send when the data channel is open | |
sendTranslationInstructions(); | |
}); | |
dc.addEventListener("message", (e) => { | |
const realtimeEvent = JSON.parse(e.data); | |
console.info(e.data); | |
const translatedTextElement = document.getElementById('translatedText'); | |
if (realtimeEvent.type == "response.audio_transcript.delta") { | |
const translatedText = realtimeEvent.delta; | |
translatedTextElement.innerHTML += translatedText; | |
// Add a newline when the sentence ends | |
// Speak the translated text | |
// const utterance = new SpeechSynthesisUtterance(translatedText); | |
// synth.speak(utterance); | |
} else if (realtimeEvent.type = 'response.done' && realtimeEvent.status == 'completed') { | |
translatedTextElement.innerHTML += '<br>'; | |
} | |
}); | |
const offer = await pc.createOffer(); | |
await pc.setLocalDescription(offer); | |
const sdpResponse = await fetch(`${baseUrl}?model=${model}`, { | |
method: "POST", | |
body: offer.sdp, | |
headers: { | |
Authorization: `Bearer ${EPHEMERAL_KEY}`, | |
"Content-Type": "application/sdp" | |
}, | |
}); | |
const answer = { | |
type: "answer", | |
sdp: await sdpResponse.text(), | |
}; | |
await pc.setRemoteDescription(answer); | |
const sendTranslationInstructions = () => { | |
if (dc.readyState === "open") { | |
const inputLang = document.getElementById('inputLanguage').value; | |
const outputLang = document.getElementById('outputLanguage').value; | |
const promptText = `Translate from ${inputLang} to ${outputLang}`; | |
const responseCreate = { | |
type: "response.create", | |
response: { | |
modalities: ["text"], | |
instructions: promptText, | |
}, | |
}; | |
document.getElementById('promptText').innerHTML = promptText; | |
dc.send(JSON.stringify(responseCreate)); | |
console.info(responseCreate) | |
} | |
}; | |
document.getElementById('inputLanguage').onchange = sendTranslationInstructions; | |
document.getElementById('outputLanguage').onchange = sendTranslationInstructions; | |
} | |
init(); | |
</script> | |
</body> | |
</html> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import express from "express"; | |
import { Request, Response } from "express"; | |
import fetch from "node-fetch"; | |
const app = express(); | |
app.get("/api", async (req: Request, res: Response) => { | |
try { | |
const r = await fetch("https://api.openai.com/v1/realtime/sessions", { | |
method: "POST", | |
headers: { | |
"Authorization": `Bearer ${process.env.OPENAI_API_KEY}`, | |
"Content-Type": "application/json", | |
}, | |
body: JSON.stringify({ | |
model: "gpt-4o-realtime-preview-2024-12-17", | |
voice: "verse", | |
}), | |
}); | |
if (!r.ok) { | |
res.status(r.status).send({ error: "Failed to fetch data from OpenAI API" }); | |
return; | |
} | |
const data = await r.json(); | |
res.send(data); | |
} catch (error) { | |
res.status(500).send({ error: "Internal Server Error" }); | |
} | |
}); | |
app.get("/", async (req: Request, res: Response) => { | |
res.sendFile("index.html", { root: new URL("../public", import.meta.url).pathname }); | |
}); | |
if (process.env.NODE_ENV !== "production") { | |
app.listen(3000, () => { | |
console.log("Server is running on port 3000"); | |
}); | |
} | |
export default app; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "express", | |
"version": "1.0.0", | |
"description": "", | |
"main": "api/index.ts", | |
"type": "module", | |
"scripts": { | |
"test": "echo \"Error: no test specified\" && exit 1", | |
"start": "npx ts-node --loader ts-node/esm api/index.ts" | |
}, | |
"keywords": [], | |
"author": "", | |
"license": "ISC", | |
"dependencies": { | |
"dotenv": "^16.4.1", | |
"express": "^4.21.2", | |
"node-fetch": "^3.3.2", | |
"typescript": "^5.7.2" | |
}, | |
"devDependencies": { | |
"@types/dotenv": "^6.1.1", | |
"@types/express": "^5.0.0", | |
"ts-node": "^10.9.2" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment