Skip to content

Instantly share code, notes, and snippets.

@matsubo
Created December 24, 2024 10:10
Show Gist options
  • Save matsubo/39adc10cf5167e5cc6d05fe65215ae72 to your computer and use it in GitHub Desktop.
Save matsubo/39adc10cf5167e5cc6d05fe65215ae72 to your computer and use it in GitHub Desktop.
Realtime translation using OpenAI realtime API via WebRTC
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootswatch/4.5.2/cerulean/bootstrap.min.css">
<title>Realtime Translation Test</title>
</head>
<body>
<div class="container mt-5">
<div class="form-group">
<label for="inputLanguage">Model</label>
</div>
<div class="form-group">
<label for="inputLanguage">Input Language:</label>
<select id="inputLanguage" class="form-control">
<option value="Japanese">Japanese</option>
<option value="Spanish">Spanish</option>
<option value="French">French</option>
</select>
</div>
<div class="form-group">
<label for="outputLanguage">Output Language:</label>
<select id="outputLanguage" class="form-control">
<option value="English">English</option>
<option value="Spanish">Spanish</option>
<option value="Hindi">Hindi</option>
<option value="Mandarin Chinese">Mandarin Chinese</option>
<option value="French">French</option>
</select>
</div>
<div class="card my-3">
<div class="card-body">
<h5 class="card-title">Response</h5>
<p class="card-text">
<blockquote id="translatedText"></blockquote>
</p>
</div>
</div>
<div class="card my-3">
<div class="card-body">
<h5 class="card-title">Prompt via data channel</h5>
<p class="card-text">
<blockquote id="promptText"></blockquote>
</p>
</div>
</div>
<script>
async function init() {
const synth = window.speechSynthesis;
const tokenResponse = await fetch("/api");
const data = await tokenResponse.json();
const EPHEMERAL_KEY = data.client_secret.value;
const baseUrl = "https://api.openai.com/v1/realtime";
const modelSelect = document.createElement('select');
modelSelect.id = 'modelSelect';
modelSelect.className = 'form-control';
const models = ["gpt-4o-realtime-preview-2024-12-17", "gpt-3.5-turbo", "gpt-3.5-turbo-16k"];
models.forEach(m => {
const option = document.createElement('option');
option.value = m;
option.text = m;
modelSelect.appendChild(option);
});
document.querySelector('.form-group').appendChild(modelSelect);
let model = modelSelect.value;
modelSelect.onchange = () => {
model = modelSelect.value;
sendTranslationInstructions();
};
const pc = new RTCPeerConnection();
const audioEl = document.createElement("audio");
audioEl.autoplay = true; // Disabled autoplay to prevent audio from playing
pc.ontrack = e => audioEl.srcObject = e.streams[0];
const ms = await navigator.mediaDevices.getUserMedia({
audio: true
});
pc.addTrack(ms.getTracks()[0]);
const dc = pc.createDataChannel("oai-events");
dc.addEventListener("open", () => {
// Initial instruction send when the data channel is open
sendTranslationInstructions();
});
dc.addEventListener("message", (e) => {
const realtimeEvent = JSON.parse(e.data);
console.info(e.data);
const translatedTextElement = document.getElementById('translatedText');
if (realtimeEvent.type == "response.audio_transcript.delta") {
const translatedText = realtimeEvent.delta;
translatedTextElement.innerHTML += translatedText;
// Add a newline when the sentence ends
// Speak the translated text
// const utterance = new SpeechSynthesisUtterance(translatedText);
// synth.speak(utterance);
} else if (realtimeEvent.type = 'response.done' && realtimeEvent.status == 'completed') {
translatedTextElement.innerHTML += '<br>';
}
});
const offer = await pc.createOffer();
await pc.setLocalDescription(offer);
const sdpResponse = await fetch(`${baseUrl}?model=${model}`, {
method: "POST",
body: offer.sdp,
headers: {
Authorization: `Bearer ${EPHEMERAL_KEY}`,
"Content-Type": "application/sdp"
},
});
const answer = {
type: "answer",
sdp: await sdpResponse.text(),
};
await pc.setRemoteDescription(answer);
const sendTranslationInstructions = () => {
if (dc.readyState === "open") {
const inputLang = document.getElementById('inputLanguage').value;
const outputLang = document.getElementById('outputLanguage').value;
const promptText = `Translate from ${inputLang} to ${outputLang}`;
const responseCreate = {
type: "response.create",
response: {
modalities: ["text"],
instructions: promptText,
},
};
document.getElementById('promptText').innerHTML = promptText;
dc.send(JSON.stringify(responseCreate));
console.info(responseCreate)
}
};
document.getElementById('inputLanguage').onchange = sendTranslationInstructions;
document.getElementById('outputLanguage').onchange = sendTranslationInstructions;
}
init();
</script>
</body>
</html>
import express from "express";
import { Request, Response } from "express";
import fetch from "node-fetch";
const app = express();
app.get("/api", async (req: Request, res: Response) => {
try {
const r = await fetch("https://api.openai.com/v1/realtime/sessions", {
method: "POST",
headers: {
"Authorization": `Bearer ${process.env.OPENAI_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: "gpt-4o-realtime-preview-2024-12-17",
voice: "verse",
}),
});
if (!r.ok) {
res.status(r.status).send({ error: "Failed to fetch data from OpenAI API" });
return;
}
const data = await r.json();
res.send(data);
} catch (error) {
res.status(500).send({ error: "Internal Server Error" });
}
});
app.get("/", async (req: Request, res: Response) => {
res.sendFile("index.html", { root: new URL("../public", import.meta.url).pathname });
});
if (process.env.NODE_ENV !== "production") {
app.listen(3000, () => {
console.log("Server is running on port 3000");
});
}
export default app;
{
"name": "express",
"version": "1.0.0",
"description": "",
"main": "api/index.ts",
"type": "module",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"start": "npx ts-node --loader ts-node/esm api/index.ts"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"dotenv": "^16.4.1",
"express": "^4.21.2",
"node-fetch": "^3.3.2",
"typescript": "^5.7.2"
},
"devDependencies": {
"@types/dotenv": "^6.1.1",
"@types/express": "^5.0.0",
"ts-node": "^10.9.2"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment