Skip to content

Instantly share code, notes, and snippets.

@misterburton
Created January 5, 2026 22:25
Show Gist options
  • Select an option

  • Save misterburton/5fe2e48ead4b456f98b9c701ded54314 to your computer and use it in GitHub Desktop.

Select an option

Save misterburton/5fe2e48ead4b456f98b9c701ded54314 to your computer and use it in GitHub Desktop.
AI-Powered Localization - Audio Narration Generation Script
/**
* generate-narration.js
*
* Generates audio narration for HTML pages using ElevenLabs text-to-speech API.
* Supports multiple languages using multilingual voice models.
*
* REQUIREMENTS:
* - Node.js 18+
* - npm packages: fs-extra, jsdom, @vercel/kv, dotenv
* - ElevenLabs API key (https://elevenlabs.io)
* - Vercel KV credentials (for fetching translations)
*
* SETUP:
* 1. npm install fs-extra jsdom @vercel/kv dotenv
* 2. Add ELEVENLABS_API_KEY to .env.local
* 3. Run `vercel env pull .env.local` to get KV credentials
* 4. Create a voice in ElevenLabs (or use a preset voice)
* 5. Update VOICE_ID below with your voice ID
*
* USAGE:
* node generate-narration.js <path-to-html> [options]
*
* OPTIONS:
* --lang <code> Generate for specific language (default: en)
* --all-langs Generate for all narration-enabled languages
* --resume Skip existing audio files (useful for interrupted runs)
*
* EXAMPLES:
* node generate-narration.js index.html
* node generate-narration.js about/index.html --lang es
* node generate-narration.js index.html --all-langs
* node generate-narration.js index.html --all-langs --resume
*
* OUTPUT:
* Audio files are saved to: /audio/{lang}/{page}/p{n}.mp3
* Example: /audio/en/home/p0.mp3, /audio/es/about/p1.mp3
*/
const fs = require('fs-extra');
const path = require('path');
const { JSDOM } = require('jsdom');
const crypto = require('crypto');
const { createClient } = require('@vercel/kv');
require('dotenv').config({ path: '.env.local' });
// ============================================================================
// CONFIGURATION - Update these values for your project
// ============================================================================
// Your ElevenLabs API key (from .env.local)
const API_KEY = process.env.ELEVENLABS_API_KEY;
// Your ElevenLabs voice ID
// Find this in your ElevenLabs dashboard under "Voices"
// You can use a cloned voice or any preset voice
const VOICE_ID = 'YOUR_VOICE_ID_HERE';
// ElevenLabs API endpoint
const API_URL = `https://api.elevenlabs.io/v1/text-to-speech/${VOICE_ID}`;
// Languages with narration support
// These should match languages you've configured in your localization system
const NARRATION_ENABLED_LANGUAGES = ['en', 'es', 'zh', 'hi', 'ar', 'fr'];
// Language code to name mapping (for fetching translations from KV)
const LANGUAGE_NAMES = {
'en': 'English',
'es': 'Spanish',
'zh': 'Chinese',
'hi': 'Hindi',
'ar': 'Arabic',
'fr': 'French'
};
// ============================================================================
// END CONFIGURATION
// ============================================================================
// Initialize Vercel KV client for fetching translations
let kv = null;
if (process.env.KV_REST_API_URL && process.env.KV_REST_API_TOKEN) {
kv = createClient({
url: process.env.KV_REST_API_URL,
token: process.env.KV_REST_API_TOKEN,
});
}
/**
* Main narration generation function
*/
async function generateNarration(filePath, langCode = 'en', resumeMode = false) {
// Validate API key
if (!API_KEY) {
console.error('Error: ELEVENLABS_API_KEY not found in .env.local');
console.error('Add your API key: ELEVENLABS_API_KEY=your_key_here');
process.exit(1);
}
if (VOICE_ID === 'YOUR_VOICE_ID_HERE') {
console.error('Error: VOICE_ID not configured');
console.error('Update VOICE_ID in this script with your ElevenLabs voice ID');
process.exit(1);
}
const isEnglish = langCode === 'en';
const langName = LANGUAGE_NAMES[langCode] || langCode;
// For non-English, we need KV access to get translations
if (!isEnglish && !kv) {
console.error('Error: Vercel KV credentials not found in .env.local');
console.error('Run: vercel env pull .env.local');
process.exit(1);
}
// Validate file exists
const fullPath = path.resolve(filePath);
if (!fs.existsSync(fullPath)) {
console.error(`Error: File not found: ${filePath}`);
process.exit(1);
}
// Parse HTML
let html = await fs.readFile(fullPath, 'utf8');
const dom = new JSDOM(html);
const document = dom.window.document;
// Find all elements with data-narration attribute
const elements = Array.from(document.querySelectorAll('[data-narration]'));
if (elements.length === 0) {
console.log('No data-narration elements found.');
return;
}
const pageName = getPageName(filePath);
// Create audio directory: /audio/{lang}/{page}/
const audioDir = path.join(
process.cwd(),
'audio',
langCode,
pageName === 'home' ? '' : pageName
);
await fs.ensureDir(audioDir);
// Fetch translations if non-English
let translations = null;
if (!isEnglish) {
console.log(`Fetching ${langName} translations from Vercel KV...`);
const cacheKey = `trans:${pageName}:${langName}`;
try {
const cached = await kv.get(cacheKey);
if (cached && cached.content) {
translations = cached.content;
console.log(`Found cached ${langName} translations`);
} else {
console.error(`Error: No ${langName} translations found for ${pageName}`);
console.error(`Run: node pre-translate.js first`);
process.exit(1);
}
} catch (error) {
console.error('Error fetching translations:', error.message);
process.exit(1);
}
}
// Load content hashes for tracking
const hashFile = path.join(process.cwd(), 'content-hashes.json');
let hashes = {};
if (fs.existsSync(hashFile)) {
hashes = await fs.readJson(hashFile);
}
if (!hashes[pageName]) {
hashes[pageName] = {};
}
// Select model based on language
// eleven_monolingual_v1: English only, faster
// eleven_multilingual_v2: All languages, supports voice cloning across languages
const modelId = isEnglish ? 'eleven_monolingual_v1' : 'eleven_multilingual_v2';
console.log(`Processing ${elements.length} narration elements for ${pageName} [${langCode}]...`);
console.log(`Using model: ${modelId}`);
// For English, renumber elements sequentially and update HTML
if (isEnglish) {
let htmlModified = false;
for (let newIndex = 0; newIndex < elements.length; newIndex++) {
const el = elements[newIndex];
const oldIndex = el.dataset.narration;
if (oldIndex !== String(newIndex)) {
const oldAttr = `data-narration="${oldIndex}"`;
const newAttr = `data-narration="${newIndex}"`;
const elementOuterHTML = el.outerHTML;
const updatedOuterHTML = elementOuterHTML.replace(oldAttr, newAttr);
html = html.replace(elementOuterHTML, updatedOuterHTML);
htmlModified = true;
console.log(`Renumbering: p${oldIndex} → p${newIndex}`);
}
}
if (htmlModified) {
await fs.writeFile(fullPath, html, 'utf8');
console.log(`Updated ${filePath} with renumbered data-narration attributes`);
}
}
// Handle existing audio files
let existingAudioFiles = new Set();
try {
const existingFiles = await fs.readdir(audioDir);
const audioFiles = existingFiles.filter(f => f.match(/^p\d+\.mp3$/));
if (resumeMode) {
existingAudioFiles = new Set(audioFiles);
console.log(`Resume mode: Found ${audioFiles.length} existing audio files to skip`);
} else {
// Delete existing files for clean regeneration
for (const file of audioFiles) {
await fs.remove(path.join(audioDir, file));
}
if (audioFiles.length > 0) {
console.log(`Deleted ${audioFiles.length} existing audio files`);
}
}
} catch (e) {
// Directory might not exist yet
}
// Generate audio for each element
const newHashes = {};
for (let i = 0; i < elements.length; i++) {
const el = elements[i];
let text;
if (isEnglish) {
// Extract text from English HTML
text = extractTextFromElement(el);
} else {
// Get translated text
text = getTranslatedText(el, translations);
}
// Normalize text
text = normalizeText(text);
if (!text || text.length === 0) {
console.warn(`Warning: p${i} has no text content, skipping...`);
continue;
}
const hash = crypto.createHash('sha256').update(text).digest('hex');
const fileName = `p${i}.mp3`;
const outputPath = path.join(audioDir, fileName);
// Skip if file exists in resume mode
if (resumeMode && existingAudioFiles.has(fileName)) {
console.log(`Skipping p${i} (already exists)`);
newHashes[i] = hash;
continue;
}
console.log(`Generating p${i}... "${text.substring(0, 50)}..."`);
try {
const response = await fetch(API_URL, {
method: 'POST',
headers: {
'Accept': 'audio/mpeg',
'Content-Type': 'application/json',
'xi-api-key': API_KEY
},
body: JSON.stringify({
text: text,
model_id: modelId,
voice_settings: {
stability: 0.5,
similarity_boost: 0.75
}
})
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`ElevenLabs API error: ${response.status} - ${errorText}`);
}
const buffer = Buffer.from(await response.arrayBuffer());
await fs.writeFile(outputPath, buffer);
newHashes[i] = hash;
console.log(`Saved ${fileName}`);
} catch (error) {
console.error(`Failed to generate p${i}:`, error.message);
}
}
// Update content hashes (English only)
if (isEnglish) {
if (hashes[pageName] && hashes[pageName]._translationHash) {
newHashes._translationHash = hashes[pageName]._translationHash;
}
hashes[pageName] = newHashes;
await fs.writeJson(hashFile, hashes, { spaces: 2 });
}
console.log(`Narration generation complete for ${langCode}.`);
}
/**
* Extract readable text from an HTML element
*/
function extractTextFromElement(el) {
const clone = el.cloneNode(true);
// Remove elements that shouldn't be narrated
clone.querySelectorAll('.copy-button, .line-number').forEach(node => node.remove());
let text = clone.textContent
.replace(/\|/g, ', ') // Tables: pipe to comma
.replace(/\\\((.*?)\\\)/g, '$1') // LaTeX inline
.replace(/\\\[(.*?)\\\]/g, '$1') // LaTeX display
.replace(/\s*\n\s*/g, ' . ') // Newlines to pauses
.trim();
return text;
}
/**
* Get translated text for an element
*/
function getTranslatedText(el, translations) {
const l10nId = el.dataset.l10nId;
if (l10nId && translations[l10nId]) {
// Element has l10n-id - use its translation
const translatedHtml = translations[l10nId];
const tempDom = new JSDOM(`<div>${translatedHtml}</div>`);
const tempEl = tempDom.window.document.querySelector('div');
return tempEl.textContent.trim();
}
// Check if children have l10n-ids
const childL10nElements = el.querySelectorAll('[data-l10n-id]');
if (childL10nElements.length > 0) {
const translatedParts = [];
for (const child of childL10nElements) {
const childL10nId = child.dataset.l10nId;
if (childL10nId && translations[childL10nId]) {
const translatedHtml = translations[childL10nId];
const tempDom = new JSDOM(`<div>${translatedHtml}</div>`);
const tempEl = tempDom.window.document.querySelector('div');
translatedParts.push(tempEl.textContent.trim());
}
}
if (translatedParts.length > 0) {
return translatedParts.join(' . ');
}
}
// Fallback: use English source text
return extractTextFromElement(el);
}
/**
* Normalize text for TTS
*/
function normalizeText(text) {
return text
.replace(/\|/g, ', ')
.replace(/\\\((.*?)\\\)/g, '$1')
.replace(/\\\[(.*?)\\\]/g, '$1')
.replace(/\s*\n\s*/g, ' . ')
.trim()
.replace(/\.\s*\./g, '.') // Remove double periods
.replace(/^\.\s*/, '') // Remove leading period
.replace(/\s+/g, ' ') // Collapse whitespace
.replace(/[\u200B-\u200D\uFEFF]/g, ''); // Remove zero-width chars
}
/**
* Extract page name from file path
*/
function getPageName(filePath) {
const fullPath = path.resolve(filePath);
const fileName = path.basename(fullPath, '.html');
const parentDir = path.dirname(fullPath);
const dirName = path.basename(parentDir);
const rootDir = process.cwd();
if (fileName === 'index') {
if (parentDir === rootDir) return 'home';
return dirName;
}
return fileName;
}
// ============================================================================
// CLI ARGUMENT PARSING
// ============================================================================
const args = process.argv.slice(2);
let targetFile = null;
let langCode = 'en';
let resumeMode = false;
let allLangsMode = false;
for (let i = 0; i < args.length; i++) {
if (args[i] === '--lang' && args[i + 1]) {
langCode = args[i + 1];
i++;
} else if (args[i] === '--resume') {
resumeMode = true;
} else if (args[i] === '--all-langs') {
allLangsMode = true;
} else if (!args[i].startsWith('--')) {
targetFile = args[i];
}
}
if (!targetFile) {
console.log('Usage: node generate-narration.js <path-to-html> [options]');
console.log('');
console.log('Options:');
console.log(' --lang <code> Generate for specific language (default: en)');
console.log(' --all-langs Generate for all narration-enabled languages');
console.log(' --resume Skip existing audio files');
console.log('');
console.log('Examples:');
console.log(' node generate-narration.js index.html');
console.log(' node generate-narration.js about/index.html --lang es');
console.log(' node generate-narration.js index.html --all-langs');
process.exit(1);
}
// Main execution
async function main() {
if (allLangsMode) {
console.log(`\n=== Generating narration for ${NARRATION_ENABLED_LANGUAGES.length} languages ===\n`);
for (const lang of NARRATION_ENABLED_LANGUAGES) {
console.log(`\n--- ${LANGUAGE_NAMES[lang] || lang} (${lang}) ---\n`);
await generateNarration(targetFile, lang, resumeMode);
}
console.log(`\n=== Complete! ===\n`);
} else {
await generateNarration(targetFile, langCode, resumeMode);
}
}
main().catch(console.error);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment