Last active
August 9, 2025 18:48
-
-
Save patrickisgreat/c161e78d6880fcfa4b5dddaf8ec2cdc3 to your computer and use it in GitHub Desktop.
OpenAI TTS Number Generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| OpenAI TTS Number Generator | |
| Generates WAV files for numbers 1-8000 using OpenAI's Text-to-Speech API | |
| """ | |
| import os | |
| import sys | |
| import time | |
| import zipfile | |
| from pathlib import Path | |
| from openai import OpenAI | |
| import logging | |
| from tqdm import tqdm | |
| import argparse | |
| # Configure logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.FileHandler('tts_generation.log'), | |
| logging.StreamHandler(sys.stdout) | |
| ] | |
| ) | |
| logger = logging.getLogger(__name__) | |
| class TTSNumberGenerator: | |
| def __init__(self, api_key=None, voice="alloy", model="tts-1"): | |
| """ | |
| Initialize the TTS Number Generator | |
| Args: | |
| api_key: OpenAI API key (if None, will use OPENAI_API_KEY env var) | |
| voice: Voice to use (alloy, echo, fable, onyx, nova, shimmer) | |
| model: Model to use (tts-1 or tts-1-hd) | |
| """ | |
| self.client = OpenAI(api_key=api_key) | |
| self.voice = voice | |
| self.model = model | |
| self.output_dir = Path("number_audio_files") | |
| self.failed_numbers = [] | |
| # Create output directory | |
| self.output_dir.mkdir(exist_ok=True) | |
| logger.info(f"Initialized TTS Generator with voice: {voice}, model: {model}") | |
| def number_to_words(self, num): | |
| """Convert number to words for better TTS pronunciation""" | |
| # Define word arrays for numbers | |
| ones = ["", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", | |
| "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", | |
| "sixteen", "seventeen", "eighteen", "nineteen"] | |
| tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"] | |
| if num == 0: | |
| return "zero" | |
| elif num < 20: | |
| return ones[num] | |
| elif num < 100: | |
| result = tens[num // 10] | |
| if num % 10 != 0: | |
| result += " " + ones[num % 10] | |
| return result | |
| elif num < 1000: | |
| result = ones[num // 100] + " hundred" | |
| remainder = num % 100 | |
| if remainder != 0: | |
| result += " " + self.number_to_words(remainder) | |
| return result | |
| elif num < 1000000: | |
| thousands = num // 1000 | |
| result = self.number_to_words(thousands) + " thousand" | |
| remainder = num % 1000 | |
| if remainder != 0: | |
| result += " " + self.number_to_words(remainder) | |
| return result | |
| else: | |
| return str(num) # Fallback for numbers > 999,999 | |
| def generate_single_audio(self, number, retry_delay=60): | |
| """Generate audio for a single number with rate limit handling""" | |
| max_retries = 5 | |
| retry_count = 0 | |
| while retry_count < max_retries: | |
| try: | |
| # Convert number to words for better pronunciation | |
| text = self.number_to_words(number) | |
| # Generate speech | |
| response = self.client.audio.speech.create( | |
| model=self.model, | |
| voice=self.voice, | |
| input=text, | |
| response_format="wav" | |
| ) | |
| # Save to file | |
| output_file = self.output_dir / f"{number:05d}.wav" | |
| with open(output_file, "wb") as f: | |
| f.write(response.content) | |
| return True, None | |
| except Exception as e: | |
| error_msg = str(e).lower() | |
| # Handle rate limit errors specifically | |
| if "rate limit" in error_msg or "429" in error_msg: | |
| retry_count += 1 | |
| wait_time = retry_delay * (2 ** (retry_count - 1)) # Exponential backoff | |
| logger.warning(f"Rate limit hit for {number}. Waiting {wait_time}s (attempt {retry_count}/{max_retries})") | |
| time.sleep(wait_time) | |
| continue | |
| else: | |
| # Non-rate-limit error, don't retry | |
| error_msg = f"Error generating audio for {number}: {str(e)}" | |
| logger.error(error_msg) | |
| return False, error_msg | |
| # All retries exhausted | |
| error_msg = f"Failed to generate audio for {number} after {max_retries} rate limit retries" | |
| logger.error(error_msg) | |
| return False, error_msg | |
| def generate_batch(self, start_num, end_num, delay_between_requests=20): | |
| """Generate audio files with careful rate limiting""" | |
| logger.info(f"Generating audio for numbers {start_num} to {end_num}") | |
| logger.info(f"Using {delay_between_requests}s delay between requests to respect rate limits") | |
| total_numbers = end_num - start_num + 1 | |
| successful = 0 | |
| with tqdm(total=total_numbers, desc="Generating audio files") as pbar: | |
| for i in range(start_num, end_num + 1): | |
| # Check if file already exists | |
| output_file = self.output_dir / f"{i:05d}.wav" | |
| if output_file.exists(): | |
| logger.debug(f"File for {i} already exists, skipping") | |
| successful += 1 | |
| pbar.update(1) | |
| continue | |
| success, error = self.generate_single_audio(i) | |
| if success: | |
| successful += 1 | |
| logger.debug(f"Successfully generated audio for {i}") | |
| else: | |
| self.failed_numbers.append((i, error)) | |
| pbar.update(1) | |
| # Rate limiting - wait between requests (except for last request) | |
| if i < end_num: | |
| time.sleep(delay_between_requests) | |
| logger.info(f"Batch complete: {successful}/{total_numbers} successful") | |
| return successful | |
| def retry_failed(self, max_retries=3): | |
| """Retry failed generations""" | |
| if not self.failed_numbers: | |
| return | |
| logger.info(f"Retrying {len(self.failed_numbers)} failed numbers...") | |
| for retry_count in range(max_retries): | |
| if not self.failed_numbers: | |
| break | |
| logger.info(f"Retry attempt {retry_count + 1}/{max_retries}") | |
| current_failed = self.failed_numbers.copy() | |
| self.failed_numbers = [] | |
| for number, _ in tqdm(current_failed, desc=f"Retry {retry_count + 1}"): | |
| success, error = self.generate_single_audio(number) | |
| if not success: | |
| self.failed_numbers.append((number, error)) | |
| time.sleep(0.5) # Longer pause for retries | |
| def create_zip_archive(self, zip_filename="number_audio_files.zip"): | |
| """Create a zip archive of all generated audio files""" | |
| logger.info("Creating zip archive...") | |
| zip_path = Path(zip_filename) | |
| audio_files = list(self.output_dir.glob("*.wav")) | |
| if not audio_files: | |
| logger.error("No audio files found to zip!") | |
| return False | |
| try: | |
| with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: | |
| for audio_file in tqdm(audio_files, desc="Adding files to zip"): | |
| # Add file to zip with relative path | |
| zipf.write(audio_file, audio_file.name) | |
| file_size = zip_path.stat().st_size / (1024 * 1024) # MB | |
| logger.info(f"Zip archive created: {zip_filename} ({file_size:.1f} MB)") | |
| logger.info(f"Archive contains {len(audio_files)} audio files") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error creating zip archive: {str(e)}") | |
| return False | |
| def generate_summary_report(self): | |
| """Generate a summary report of the generation process""" | |
| total_expected = 8000 | |
| audio_files = list(self.output_dir.glob("*.wav")) | |
| successful_count = len(audio_files) | |
| failed_count = len(self.failed_numbers) | |
| logger.info("\n" + "="*50) | |
| logger.info("GENERATION SUMMARY REPORT") | |
| logger.info("="*50) | |
| logger.info(f"Total numbers to process: {total_expected}") | |
| logger.info(f"Successfully generated: {successful_count}") | |
| logger.info(f"Failed: {failed_count}") | |
| logger.info(f"Success rate: {(successful_count/total_expected)*100:.1f}%") | |
| if self.failed_numbers: | |
| logger.info(f"\nFailed numbers: {[num for num, _ in self.failed_numbers[:10]]}") | |
| if len(self.failed_numbers) > 10: | |
| logger.info(f"... and {len(self.failed_numbers) - 10} more") | |
| logger.info("="*50) | |
| def main(): | |
| parser = argparse.ArgumentParser(description='Generate TTS audio files for numbers 1-8000') | |
| parser.add_argument('--voice', default='nova', | |
| choices=['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'], | |
| help='Voice to use for TTS (alloy=neutral, echo=male, fable=british, onyx=deep, nova=young female, shimmer=soft female)') | |
| parser.add_argument('--model', default='tts-1', | |
| choices=['tts-1', 'tts-1-hd'], | |
| help='TTS model to use') | |
| parser.add_argument('--start', type=int, default=1, help='Starting number') | |
| parser.add_argument('--end', type=int, default=8000, help='Ending number') | |
| parser.add_argument('--delay', type=int, default=20, | |
| help='Delay in seconds between API requests (default: 20s for rate limiting)') | |
| parser.add_argument('--zip-name', default='number_audio_files.zip', | |
| help='Name for the output zip file') | |
| args = parser.parse_args() | |
| # Check for API key | |
| api_key = os.getenv('OPENAI_API_KEY') | |
| if not api_key: | |
| logger.error("OpenAI API key not found. Please set OPENAI_API_KEY environment variable.") | |
| sys.exit(1) | |
| logger.info("Starting TTS Number Generator") | |
| logger.info(f"Range: {args.start} to {args.end}") | |
| logger.info(f"Voice: {args.voice}, Model: {args.model}") | |
| try: | |
| # Initialize generator | |
| generator = TTSNumberGenerator(voice=args.voice, model=args.model) | |
| # Generate audio files | |
| start_time = time.time() | |
| successful = generator.generate_batch(args.start, args.end, args.delay) | |
| # Retry failed ones | |
| generator.retry_failed() | |
| # Create zip archive | |
| generator.create_zip_archive(args.zip_name) | |
| # Generate summary | |
| generator.generate_summary_report() | |
| end_time = time.time() | |
| total_time = end_time - start_time | |
| logger.info(f"Total execution time: {total_time/60:.1f} minutes") | |
| except KeyboardInterrupt: | |
| logger.info("Generation interrupted by user") | |
| sys.exit(1) | |
| except Exception as e: | |
| logger.error(f"Unexpected error: {str(e)}") | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment