Created
January 5, 2026 22:25
-
-
Save misterburton/5fe2e48ead4b456f98b9c701ded54314 to your computer and use it in GitHub Desktop.
AI-Powered Localization - Audio Narration Generation Script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * generate-narration.js | |
| * | |
| * Generates audio narration for HTML pages using ElevenLabs text-to-speech API. | |
| * Supports multiple languages using multilingual voice models. | |
| * | |
| * REQUIREMENTS: | |
| * - Node.js 18+ | |
| * - npm packages: fs-extra, jsdom, @vercel/kv, dotenv | |
| * - ElevenLabs API key (https://elevenlabs.io) | |
| * - Vercel KV credentials (for fetching translations) | |
| * | |
| * SETUP: | |
| * 1. npm install fs-extra jsdom @vercel/kv dotenv | |
| * 2. Add ELEVENLABS_API_KEY to .env.local | |
| * 3. Run `vercel env pull .env.local` to get KV credentials | |
| * 4. Create a voice in ElevenLabs (or use a preset voice) | |
| * 5. Update VOICE_ID below with your voice ID | |
| * | |
| * USAGE: | |
| * node generate-narration.js <path-to-html> [options] | |
| * | |
| * OPTIONS: | |
| * --lang <code> Generate for specific language (default: en) | |
| * --all-langs Generate for all narration-enabled languages | |
| * --resume Skip existing audio files (useful for interrupted runs) | |
| * | |
| * EXAMPLES: | |
| * node generate-narration.js index.html | |
| * node generate-narration.js about/index.html --lang es | |
| * node generate-narration.js index.html --all-langs | |
| * node generate-narration.js index.html --all-langs --resume | |
| * | |
| * OUTPUT: | |
| * Audio files are saved to: /audio/{lang}/{page}/p{n}.mp3 | |
| * Example: /audio/en/home/p0.mp3, /audio/es/about/p1.mp3 | |
| */ | |
| const fs = require('fs-extra'); | |
| const path = require('path'); | |
| const { JSDOM } = require('jsdom'); | |
| const crypto = require('crypto'); | |
| const { createClient } = require('@vercel/kv'); | |
| require('dotenv').config({ path: '.env.local' }); | |
| // ============================================================================ | |
| // CONFIGURATION - Update these values for your project | |
| // ============================================================================ | |
| // Your ElevenLabs API key (from .env.local) | |
| const API_KEY = process.env.ELEVENLABS_API_KEY; | |
| // Your ElevenLabs voice ID | |
| // Find this in your ElevenLabs dashboard under "Voices" | |
| // You can use a cloned voice or any preset voice | |
| const VOICE_ID = 'YOUR_VOICE_ID_HERE'; | |
| // ElevenLabs API endpoint | |
| const API_URL = `https://api.elevenlabs.io/v1/text-to-speech/${VOICE_ID}`; | |
| // Languages with narration support | |
| // These should match languages you've configured in your localization system | |
| const NARRATION_ENABLED_LANGUAGES = ['en', 'es', 'zh', 'hi', 'ar', 'fr']; | |
| // Language code to name mapping (for fetching translations from KV) | |
| const LANGUAGE_NAMES = { | |
| 'en': 'English', | |
| 'es': 'Spanish', | |
| 'zh': 'Chinese', | |
| 'hi': 'Hindi', | |
| 'ar': 'Arabic', | |
| 'fr': 'French' | |
| }; | |
| // ============================================================================ | |
| // END CONFIGURATION | |
| // ============================================================================ | |
| // Initialize Vercel KV client for fetching translations | |
| let kv = null; | |
| if (process.env.KV_REST_API_URL && process.env.KV_REST_API_TOKEN) { | |
| kv = createClient({ | |
| url: process.env.KV_REST_API_URL, | |
| token: process.env.KV_REST_API_TOKEN, | |
| }); | |
| } | |
| /** | |
| * Main narration generation function | |
| */ | |
| async function generateNarration(filePath, langCode = 'en', resumeMode = false) { | |
| // Validate API key | |
| if (!API_KEY) { | |
| console.error('Error: ELEVENLABS_API_KEY not found in .env.local'); | |
| console.error('Add your API key: ELEVENLABS_API_KEY=your_key_here'); | |
| process.exit(1); | |
| } | |
| if (VOICE_ID === 'YOUR_VOICE_ID_HERE') { | |
| console.error('Error: VOICE_ID not configured'); | |
| console.error('Update VOICE_ID in this script with your ElevenLabs voice ID'); | |
| process.exit(1); | |
| } | |
| const isEnglish = langCode === 'en'; | |
| const langName = LANGUAGE_NAMES[langCode] || langCode; | |
| // For non-English, we need KV access to get translations | |
| if (!isEnglish && !kv) { | |
| console.error('Error: Vercel KV credentials not found in .env.local'); | |
| console.error('Run: vercel env pull .env.local'); | |
| process.exit(1); | |
| } | |
| // Validate file exists | |
| const fullPath = path.resolve(filePath); | |
| if (!fs.existsSync(fullPath)) { | |
| console.error(`Error: File not found: ${filePath}`); | |
| process.exit(1); | |
| } | |
| // Parse HTML | |
| let html = await fs.readFile(fullPath, 'utf8'); | |
| const dom = new JSDOM(html); | |
| const document = dom.window.document; | |
| // Find all elements with data-narration attribute | |
| const elements = Array.from(document.querySelectorAll('[data-narration]')); | |
| if (elements.length === 0) { | |
| console.log('No data-narration elements found.'); | |
| return; | |
| } | |
| const pageName = getPageName(filePath); | |
| // Create audio directory: /audio/{lang}/{page}/ | |
| const audioDir = path.join( | |
| process.cwd(), | |
| 'audio', | |
| langCode, | |
| pageName === 'home' ? '' : pageName | |
| ); | |
| await fs.ensureDir(audioDir); | |
| // Fetch translations if non-English | |
| let translations = null; | |
| if (!isEnglish) { | |
| console.log(`Fetching ${langName} translations from Vercel KV...`); | |
| const cacheKey = `trans:${pageName}:${langName}`; | |
| try { | |
| const cached = await kv.get(cacheKey); | |
| if (cached && cached.content) { | |
| translations = cached.content; | |
| console.log(`Found cached ${langName} translations`); | |
| } else { | |
| console.error(`Error: No ${langName} translations found for ${pageName}`); | |
| console.error(`Run: node pre-translate.js first`); | |
| process.exit(1); | |
| } | |
| } catch (error) { | |
| console.error('Error fetching translations:', error.message); | |
| process.exit(1); | |
| } | |
| } | |
| // Load content hashes for tracking | |
| const hashFile = path.join(process.cwd(), 'content-hashes.json'); | |
| let hashes = {}; | |
| if (fs.existsSync(hashFile)) { | |
| hashes = await fs.readJson(hashFile); | |
| } | |
| if (!hashes[pageName]) { | |
| hashes[pageName] = {}; | |
| } | |
| // Select model based on language | |
| // eleven_monolingual_v1: English only, faster | |
| // eleven_multilingual_v2: All languages, supports voice cloning across languages | |
| const modelId = isEnglish ? 'eleven_monolingual_v1' : 'eleven_multilingual_v2'; | |
| console.log(`Processing ${elements.length} narration elements for ${pageName} [${langCode}]...`); | |
| console.log(`Using model: ${modelId}`); | |
| // For English, renumber elements sequentially and update HTML | |
| if (isEnglish) { | |
| let htmlModified = false; | |
| for (let newIndex = 0; newIndex < elements.length; newIndex++) { | |
| const el = elements[newIndex]; | |
| const oldIndex = el.dataset.narration; | |
| if (oldIndex !== String(newIndex)) { | |
| const oldAttr = `data-narration="${oldIndex}"`; | |
| const newAttr = `data-narration="${newIndex}"`; | |
| const elementOuterHTML = el.outerHTML; | |
| const updatedOuterHTML = elementOuterHTML.replace(oldAttr, newAttr); | |
| html = html.replace(elementOuterHTML, updatedOuterHTML); | |
| htmlModified = true; | |
| console.log(`Renumbering: p${oldIndex} → p${newIndex}`); | |
| } | |
| } | |
| if (htmlModified) { | |
| await fs.writeFile(fullPath, html, 'utf8'); | |
| console.log(`Updated ${filePath} with renumbered data-narration attributes`); | |
| } | |
| } | |
| // Handle existing audio files | |
| let existingAudioFiles = new Set(); | |
| try { | |
| const existingFiles = await fs.readdir(audioDir); | |
| const audioFiles = existingFiles.filter(f => f.match(/^p\d+\.mp3$/)); | |
| if (resumeMode) { | |
| existingAudioFiles = new Set(audioFiles); | |
| console.log(`Resume mode: Found ${audioFiles.length} existing audio files to skip`); | |
| } else { | |
| // Delete existing files for clean regeneration | |
| for (const file of audioFiles) { | |
| await fs.remove(path.join(audioDir, file)); | |
| } | |
| if (audioFiles.length > 0) { | |
| console.log(`Deleted ${audioFiles.length} existing audio files`); | |
| } | |
| } | |
| } catch (e) { | |
| // Directory might not exist yet | |
| } | |
| // Generate audio for each element | |
| const newHashes = {}; | |
| for (let i = 0; i < elements.length; i++) { | |
| const el = elements[i]; | |
| let text; | |
| if (isEnglish) { | |
| // Extract text from English HTML | |
| text = extractTextFromElement(el); | |
| } else { | |
| // Get translated text | |
| text = getTranslatedText(el, translations); | |
| } | |
| // Normalize text | |
| text = normalizeText(text); | |
| if (!text || text.length === 0) { | |
| console.warn(`Warning: p${i} has no text content, skipping...`); | |
| continue; | |
| } | |
| const hash = crypto.createHash('sha256').update(text).digest('hex'); | |
| const fileName = `p${i}.mp3`; | |
| const outputPath = path.join(audioDir, fileName); | |
| // Skip if file exists in resume mode | |
| if (resumeMode && existingAudioFiles.has(fileName)) { | |
| console.log(`Skipping p${i} (already exists)`); | |
| newHashes[i] = hash; | |
| continue; | |
| } | |
| console.log(`Generating p${i}... "${text.substring(0, 50)}..."`); | |
| try { | |
| const response = await fetch(API_URL, { | |
| method: 'POST', | |
| headers: { | |
| 'Accept': 'audio/mpeg', | |
| 'Content-Type': 'application/json', | |
| 'xi-api-key': API_KEY | |
| }, | |
| body: JSON.stringify({ | |
| text: text, | |
| model_id: modelId, | |
| voice_settings: { | |
| stability: 0.5, | |
| similarity_boost: 0.75 | |
| } | |
| }) | |
| }); | |
| if (!response.ok) { | |
| const errorText = await response.text(); | |
| throw new Error(`ElevenLabs API error: ${response.status} - ${errorText}`); | |
| } | |
| const buffer = Buffer.from(await response.arrayBuffer()); | |
| await fs.writeFile(outputPath, buffer); | |
| newHashes[i] = hash; | |
| console.log(`Saved ${fileName}`); | |
| } catch (error) { | |
| console.error(`Failed to generate p${i}:`, error.message); | |
| } | |
| } | |
| // Update content hashes (English only) | |
| if (isEnglish) { | |
| if (hashes[pageName] && hashes[pageName]._translationHash) { | |
| newHashes._translationHash = hashes[pageName]._translationHash; | |
| } | |
| hashes[pageName] = newHashes; | |
| await fs.writeJson(hashFile, hashes, { spaces: 2 }); | |
| } | |
| console.log(`Narration generation complete for ${langCode}.`); | |
| } | |
| /** | |
| * Extract readable text from an HTML element | |
| */ | |
| function extractTextFromElement(el) { | |
| const clone = el.cloneNode(true); | |
| // Remove elements that shouldn't be narrated | |
| clone.querySelectorAll('.copy-button, .line-number').forEach(node => node.remove()); | |
| let text = clone.textContent | |
| .replace(/\|/g, ', ') // Tables: pipe to comma | |
| .replace(/\\\((.*?)\\\)/g, '$1') // LaTeX inline | |
| .replace(/\\\[(.*?)\\\]/g, '$1') // LaTeX display | |
| .replace(/\s*\n\s*/g, ' . ') // Newlines to pauses | |
| .trim(); | |
| return text; | |
| } | |
| /** | |
| * Get translated text for an element | |
| */ | |
| function getTranslatedText(el, translations) { | |
| const l10nId = el.dataset.l10nId; | |
| if (l10nId && translations[l10nId]) { | |
| // Element has l10n-id - use its translation | |
| const translatedHtml = translations[l10nId]; | |
| const tempDom = new JSDOM(`<div>${translatedHtml}</div>`); | |
| const tempEl = tempDom.window.document.querySelector('div'); | |
| return tempEl.textContent.trim(); | |
| } | |
| // Check if children have l10n-ids | |
| const childL10nElements = el.querySelectorAll('[data-l10n-id]'); | |
| if (childL10nElements.length > 0) { | |
| const translatedParts = []; | |
| for (const child of childL10nElements) { | |
| const childL10nId = child.dataset.l10nId; | |
| if (childL10nId && translations[childL10nId]) { | |
| const translatedHtml = translations[childL10nId]; | |
| const tempDom = new JSDOM(`<div>${translatedHtml}</div>`); | |
| const tempEl = tempDom.window.document.querySelector('div'); | |
| translatedParts.push(tempEl.textContent.trim()); | |
| } | |
| } | |
| if (translatedParts.length > 0) { | |
| return translatedParts.join(' . '); | |
| } | |
| } | |
| // Fallback: use English source text | |
| return extractTextFromElement(el); | |
| } | |
| /** | |
| * Normalize text for TTS | |
| */ | |
| function normalizeText(text) { | |
| return text | |
| .replace(/\|/g, ', ') | |
| .replace(/\\\((.*?)\\\)/g, '$1') | |
| .replace(/\\\[(.*?)\\\]/g, '$1') | |
| .replace(/\s*\n\s*/g, ' . ') | |
| .trim() | |
| .replace(/\.\s*\./g, '.') // Remove double periods | |
| .replace(/^\.\s*/, '') // Remove leading period | |
| .replace(/\s+/g, ' ') // Collapse whitespace | |
| .replace(/[\u200B-\u200D\uFEFF]/g, ''); // Remove zero-width chars | |
| } | |
| /** | |
| * Extract page name from file path | |
| */ | |
| function getPageName(filePath) { | |
| const fullPath = path.resolve(filePath); | |
| const fileName = path.basename(fullPath, '.html'); | |
| const parentDir = path.dirname(fullPath); | |
| const dirName = path.basename(parentDir); | |
| const rootDir = process.cwd(); | |
| if (fileName === 'index') { | |
| if (parentDir === rootDir) return 'home'; | |
| return dirName; | |
| } | |
| return fileName; | |
| } | |
| // ============================================================================ | |
| // CLI ARGUMENT PARSING | |
| // ============================================================================ | |
| const args = process.argv.slice(2); | |
| let targetFile = null; | |
| let langCode = 'en'; | |
| let resumeMode = false; | |
| let allLangsMode = false; | |
| for (let i = 0; i < args.length; i++) { | |
| if (args[i] === '--lang' && args[i + 1]) { | |
| langCode = args[i + 1]; | |
| i++; | |
| } else if (args[i] === '--resume') { | |
| resumeMode = true; | |
| } else if (args[i] === '--all-langs') { | |
| allLangsMode = true; | |
| } else if (!args[i].startsWith('--')) { | |
| targetFile = args[i]; | |
| } | |
| } | |
| if (!targetFile) { | |
| console.log('Usage: node generate-narration.js <path-to-html> [options]'); | |
| console.log(''); | |
| console.log('Options:'); | |
| console.log(' --lang <code> Generate for specific language (default: en)'); | |
| console.log(' --all-langs Generate for all narration-enabled languages'); | |
| console.log(' --resume Skip existing audio files'); | |
| console.log(''); | |
| console.log('Examples:'); | |
| console.log(' node generate-narration.js index.html'); | |
| console.log(' node generate-narration.js about/index.html --lang es'); | |
| console.log(' node generate-narration.js index.html --all-langs'); | |
| process.exit(1); | |
| } | |
| // Main execution | |
| async function main() { | |
| if (allLangsMode) { | |
| console.log(`\n=== Generating narration for ${NARRATION_ENABLED_LANGUAGES.length} languages ===\n`); | |
| for (const lang of NARRATION_ENABLED_LANGUAGES) { | |
| console.log(`\n--- ${LANGUAGE_NAMES[lang] || lang} (${lang}) ---\n`); | |
| await generateNarration(targetFile, lang, resumeMode); | |
| } | |
| console.log(`\n=== Complete! ===\n`); | |
| } else { | |
| await generateNarration(targetFile, langCode, resumeMode); | |
| } | |
| } | |
| main().catch(console.error); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment