Skip to content

Instantly share code, notes, and snippets.

@patrickisgreat
Last active August 9, 2025 18:48
Show Gist options
  • Select an option

  • Save patrickisgreat/c161e78d6880fcfa4b5dddaf8ec2cdc3 to your computer and use it in GitHub Desktop.

Select an option

Save patrickisgreat/c161e78d6880fcfa4b5dddaf8ec2cdc3 to your computer and use it in GitHub Desktop.
OpenAI TTS Number Generator
#!/usr/bin/env python3
"""
OpenAI TTS Number Generator
Generates WAV files for numbers 1-8000 using OpenAI's Text-to-Speech API
"""
import os
import sys
import time
import zipfile
from pathlib import Path
from openai import OpenAI
import logging
from tqdm import tqdm
import argparse
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('tts_generation.log'),
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger(__name__)
class TTSNumberGenerator:
def __init__(self, api_key=None, voice="alloy", model="tts-1"):
"""
Initialize the TTS Number Generator
Args:
api_key: OpenAI API key (if None, will use OPENAI_API_KEY env var)
voice: Voice to use (alloy, echo, fable, onyx, nova, shimmer)
model: Model to use (tts-1 or tts-1-hd)
"""
self.client = OpenAI(api_key=api_key)
self.voice = voice
self.model = model
self.output_dir = Path("number_audio_files")
self.failed_numbers = []
# Create output directory
self.output_dir.mkdir(exist_ok=True)
logger.info(f"Initialized TTS Generator with voice: {voice}, model: {model}")
def number_to_words(self, num):
"""Convert number to words for better TTS pronunciation"""
# Define word arrays for numbers
ones = ["", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
"ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
"sixteen", "seventeen", "eighteen", "nineteen"]
tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
if num == 0:
return "zero"
elif num < 20:
return ones[num]
elif num < 100:
result = tens[num // 10]
if num % 10 != 0:
result += " " + ones[num % 10]
return result
elif num < 1000:
result = ones[num // 100] + " hundred"
remainder = num % 100
if remainder != 0:
result += " " + self.number_to_words(remainder)
return result
elif num < 1000000:
thousands = num // 1000
result = self.number_to_words(thousands) + " thousand"
remainder = num % 1000
if remainder != 0:
result += " " + self.number_to_words(remainder)
return result
else:
return str(num) # Fallback for numbers > 999,999
def generate_single_audio(self, number, retry_delay=60):
"""Generate audio for a single number with rate limit handling"""
max_retries = 5
retry_count = 0
while retry_count < max_retries:
try:
# Convert number to words for better pronunciation
text = self.number_to_words(number)
# Generate speech
response = self.client.audio.speech.create(
model=self.model,
voice=self.voice,
input=text,
response_format="wav"
)
# Save to file
output_file = self.output_dir / f"{number:05d}.wav"
with open(output_file, "wb") as f:
f.write(response.content)
return True, None
except Exception as e:
error_msg = str(e).lower()
# Handle rate limit errors specifically
if "rate limit" in error_msg or "429" in error_msg:
retry_count += 1
wait_time = retry_delay * (2 ** (retry_count - 1)) # Exponential backoff
logger.warning(f"Rate limit hit for {number}. Waiting {wait_time}s (attempt {retry_count}/{max_retries})")
time.sleep(wait_time)
continue
else:
# Non-rate-limit error, don't retry
error_msg = f"Error generating audio for {number}: {str(e)}"
logger.error(error_msg)
return False, error_msg
# All retries exhausted
error_msg = f"Failed to generate audio for {number} after {max_retries} rate limit retries"
logger.error(error_msg)
return False, error_msg
def generate_batch(self, start_num, end_num, delay_between_requests=20):
"""Generate audio files with careful rate limiting"""
logger.info(f"Generating audio for numbers {start_num} to {end_num}")
logger.info(f"Using {delay_between_requests}s delay between requests to respect rate limits")
total_numbers = end_num - start_num + 1
successful = 0
with tqdm(total=total_numbers, desc="Generating audio files") as pbar:
for i in range(start_num, end_num + 1):
# Check if file already exists
output_file = self.output_dir / f"{i:05d}.wav"
if output_file.exists():
logger.debug(f"File for {i} already exists, skipping")
successful += 1
pbar.update(1)
continue
success, error = self.generate_single_audio(i)
if success:
successful += 1
logger.debug(f"Successfully generated audio for {i}")
else:
self.failed_numbers.append((i, error))
pbar.update(1)
# Rate limiting - wait between requests (except for last request)
if i < end_num:
time.sleep(delay_between_requests)
logger.info(f"Batch complete: {successful}/{total_numbers} successful")
return successful
def retry_failed(self, max_retries=3):
"""Retry failed generations"""
if not self.failed_numbers:
return
logger.info(f"Retrying {len(self.failed_numbers)} failed numbers...")
for retry_count in range(max_retries):
if not self.failed_numbers:
break
logger.info(f"Retry attempt {retry_count + 1}/{max_retries}")
current_failed = self.failed_numbers.copy()
self.failed_numbers = []
for number, _ in tqdm(current_failed, desc=f"Retry {retry_count + 1}"):
success, error = self.generate_single_audio(number)
if not success:
self.failed_numbers.append((number, error))
time.sleep(0.5) # Longer pause for retries
def create_zip_archive(self, zip_filename="number_audio_files.zip"):
"""Create a zip archive of all generated audio files"""
logger.info("Creating zip archive...")
zip_path = Path(zip_filename)
audio_files = list(self.output_dir.glob("*.wav"))
if not audio_files:
logger.error("No audio files found to zip!")
return False
try:
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
for audio_file in tqdm(audio_files, desc="Adding files to zip"):
# Add file to zip with relative path
zipf.write(audio_file, audio_file.name)
file_size = zip_path.stat().st_size / (1024 * 1024) # MB
logger.info(f"Zip archive created: {zip_filename} ({file_size:.1f} MB)")
logger.info(f"Archive contains {len(audio_files)} audio files")
return True
except Exception as e:
logger.error(f"Error creating zip archive: {str(e)}")
return False
def generate_summary_report(self):
"""Generate a summary report of the generation process"""
total_expected = 8000
audio_files = list(self.output_dir.glob("*.wav"))
successful_count = len(audio_files)
failed_count = len(self.failed_numbers)
logger.info("\n" + "="*50)
logger.info("GENERATION SUMMARY REPORT")
logger.info("="*50)
logger.info(f"Total numbers to process: {total_expected}")
logger.info(f"Successfully generated: {successful_count}")
logger.info(f"Failed: {failed_count}")
logger.info(f"Success rate: {(successful_count/total_expected)*100:.1f}%")
if self.failed_numbers:
logger.info(f"\nFailed numbers: {[num for num, _ in self.failed_numbers[:10]]}")
if len(self.failed_numbers) > 10:
logger.info(f"... and {len(self.failed_numbers) - 10} more")
logger.info("="*50)
def main():
parser = argparse.ArgumentParser(description='Generate TTS audio files for numbers 1-8000')
parser.add_argument('--voice', default='nova',
choices=['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'],
help='Voice to use for TTS (alloy=neutral, echo=male, fable=british, onyx=deep, nova=young female, shimmer=soft female)')
parser.add_argument('--model', default='tts-1',
choices=['tts-1', 'tts-1-hd'],
help='TTS model to use')
parser.add_argument('--start', type=int, default=1, help='Starting number')
parser.add_argument('--end', type=int, default=8000, help='Ending number')
parser.add_argument('--delay', type=int, default=20,
help='Delay in seconds between API requests (default: 20s for rate limiting)')
parser.add_argument('--zip-name', default='number_audio_files.zip',
help='Name for the output zip file')
args = parser.parse_args()
# Check for API key
api_key = os.getenv('OPENAI_API_KEY')
if not api_key:
logger.error("OpenAI API key not found. Please set OPENAI_API_KEY environment variable.")
sys.exit(1)
logger.info("Starting TTS Number Generator")
logger.info(f"Range: {args.start} to {args.end}")
logger.info(f"Voice: {args.voice}, Model: {args.model}")
try:
# Initialize generator
generator = TTSNumberGenerator(voice=args.voice, model=args.model)
# Generate audio files
start_time = time.time()
successful = generator.generate_batch(args.start, args.end, args.delay)
# Retry failed ones
generator.retry_failed()
# Create zip archive
generator.create_zip_archive(args.zip_name)
# Generate summary
generator.generate_summary_report()
end_time = time.time()
total_time = end_time - start_time
logger.info(f"Total execution time: {total_time/60:.1f} minutes")
except KeyboardInterrupt:
logger.info("Generation interrupted by user")
sys.exit(1)
except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
sys.exit(1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment