Created
April 1, 2025 21:57
-
-
Save pavelanni/e233adda266865c0b2fb50b7a9bbac44 to your computer and use it in GitHub Desktop.
Text-to-speech generation script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import asyncio | |
import os | |
from pathlib import Path | |
from openai import AsyncOpenAI | |
openai = AsyncOpenAI() | |
async def text_to_speech(text_file: str, instructions_file: str = 'instructions.txt', voice: str = 'onyx') -> None: | |
# Read the text file | |
with open(text_file, 'r', encoding='utf-8') as f: | |
input_text = f.read() | |
# Read instructions if file exists, otherwise use empty string | |
instructions = '' | |
if os.path.exists(instructions_file): | |
with open(instructions_file, 'r', encoding='utf-8') as f: | |
instructions = f.read() | |
# Generate output filename by replacing extension with .mp3 | |
output_file = str(Path(text_file).with_suffix('.mp3')) | |
# Generate speech | |
async with openai.audio.speech.with_streaming_response.create( | |
model="gpt-4o-mini-tts", | |
voice=voice, | |
input=input_text, | |
instructions=instructions, | |
response_format="mp3", | |
) as response: | |
with open(output_file, "wb") as f: | |
async for chunk in response.iter_bytes(): | |
f.write(chunk) | |
print(f"Created {output_file}") | |
def main(): | |
parser = argparse.ArgumentParser(description='Convert text file to speech MP3') | |
parser.add_argument('text_file', help='Input text file to convert') | |
parser.add_argument('--instructions', '-i', | |
default='instructions.txt', | |
help='File containing voice instructions (default: instructions.txt)') | |
parser.add_argument('--voice', '-v', | |
default='onyx', | |
choices=['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'], | |
help='Voice to use for the speech (default: onyx)') | |
args = parser.parse_args() | |
asyncio.run(text_to_speech(args.text_file, args.instructions, args.voice)) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment