Last active
July 11, 2024 10:36
-
-
Save nonchris/d987ed199a307b2fc7fd1d9d63097965 to your computer and use it in GitHub Desktop.
Simple Text to Speech GUI for OpenAI's API written in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This is a very unclean, quick hack for educational purposes. | |
It allows you to interact with open ai's TTS backend via the API. | |
You can choose between all their voices. | |
Note: This code has a few known bugs/ issues: | |
* It uses a deprecated function for writing the audio | |
* The filename field for the user is just ignored. | |
* The API key is hardcoded in the software and can be extracted easily | |
* You can't paste using right click (ctrl+v works though) | |
* You can't change the language in the ui | |
...and probably a few more. | |
The UI part was mostly AI generated and isn't audited so far. | |
It just works good enough, for now. | |
You can use it if you like to, but keep these limitations in mind. | |
I had less than 25 minutes to build this. | |
And the key we're using was handed to everyone anyway so there was no security issue | |
(it had a hard limit and was invalidated afterwards) | |
""" | |
# to get started executed these commands in your terminal or inside your IDE: | |
# python3 -m venv venv (note: the command for python may vary on windows, it could be py or python too) | |
# source venv/bin/activate (note: you might need activate.bat or activate.ps1 on windows) | |
# python3 -m pip install openai | |
# python3 app.py | |
# build as .exe for windows: | |
# first replace the api key (around line 30) with your actual token | |
# pip install pyinstaller~=5.13 | |
# pyinstaller .\text-to-speech-gui-openai.py --onefile --name text-zu-sprache.exe | |
import datetime as dt | |
import os | |
from os.path import expanduser | |
import subprocess | |
from pathlib import Path | |
import tkinter as tk | |
import tkinter.font as tkfont | |
from tkinter import filedialog | |
from tkinter import ttk | |
from openai import OpenAI | |
import ctypes | |
os.environ["OPENAI_API_KEY"] = "YOUR TOKEN" | |
client = OpenAI() | |
def call_voice(text: str, output_path: Path, voice: str): | |
response = client.audio.speech.create( | |
model="tts-1", | |
voice=voice, | |
input=text | |
) | |
response.stream_to_file(output_path) | |
class App: | |
def __init__(self): | |
self.file_path = None | |
self.window = tk.Tk() | |
if os.name == "nt": | |
ctypes.windll.shcore.SetProcessDpiAwareness(1) | |
self.window.title("Text-zu-Sprache Generator") | |
self.custom_font = tkfont.Font(size=11) | |
self.window.geometry("700x600") | |
self.init_ui() | |
self.window.mainloop() | |
def init_ui(self): | |
self.whitespace() | |
tk.Label(self.window, text="Dateiname:", font=self.custom_font).pack() | |
self.output_file_entry = tk.Entry(self.window, font=self.custom_font) | |
self.output_file_entry.pack() | |
tk.Label(self.window, text="(Optional. Standard ist speech_0.mp3, speech_1.mp3 usw.)", | |
font=self.custom_font).pack() | |
self.whitespace(2) | |
tk.Label(self.window, text="Stimme:", font=self.custom_font).pack() | |
self.voice_var = tk.StringVar(value="alloy") | |
self.voice_dropdown = ttk.Combobox(self.window, textvariable=self.voice_var, | |
values=["alloy", "echo", "fable", "onyx", "nova", "shimmer"], | |
font=self.custom_font) | |
self.voice_dropdown.pack() | |
self.whitespace(2) | |
tk.Label(self.window, text="Text:", font=self.custom_font).pack() | |
self.text_entry = tk.Text(self.window, font=self.custom_font, wrap=tk.WORD, height=10) | |
self.text_entry.pack(expand=True, fill=tk.BOTH) | |
self.whitespace(2) | |
self.result_label = tk.Label(self.window, text="", font=self.custom_font) | |
self.result_label.pack() | |
button_frame = tk.Frame(self.window) | |
button_frame.pack() | |
tk.Button(button_frame, text="Generieren", command=self.generate_speech, font=self.custom_font).pack( | |
side=tk.LEFT) | |
tk.Button(button_frame, text="Ordner öffnen", command=self.open_folder, font=self.custom_font).pack( | |
side=tk.LEFT) | |
def generate_speech(self): | |
text = self.text_entry.get("1.0", tk.END).strip() | |
voice = self.voice_var.get() | |
if not text: | |
self.result_label.config(text="Fehler: Text darf nicht leer sein.", font=self.custom_font) | |
return | |
os.makedirs(f"{expanduser('~')}/audio", exist_ok=True) | |
base_file_name = self.output_file_entry.get() or "speech_0.mp3" | |
file = Path(f"{expanduser('~')}/audio/speech_0.mp3") | |
i = 0 | |
while os.path.exists(file): | |
i += 1 | |
file = Path(f"{expanduser('~')}/audio/speech_{i}.mp3") | |
print(file) | |
try: | |
start = dt.datetime.now() | |
print(f"Starting to generate at {start}") | |
call_voice(text, file, voice) | |
now = dt.datetime.now() | |
print(f"Done generating at {now}, this took: {(now - start).total_seconds()} seconds") | |
self.file_path = file | |
self.result_label.config(text=f"Erfolgreich! Datei gespeichert unter:\n{file}", font=self.custom_font) | |
except Exception as e: | |
self.result_label.config(text=f"Fehler: {e}", font=self.custom_font) | |
def open_folder(self): | |
if self.file_path: | |
folder = self.file_path.parent | |
if os.name == "nt": | |
os.startfile(folder) | |
elif os.name == "posix": | |
subprocess.call(["xdg-open", folder]) | |
else: | |
self.result_label.config(text="Fehler: Plattform nicht unterstützt.", font=self.custom_font) | |
else: | |
self.result_label.config(text="Fehler: Keine Datei vorhanden.", font=self.custom_font) | |
def whitespace(self, rows=1): | |
for _ in range(rows): | |
tk.Label(self.window, text="", font=self.custom_font).pack() | |
if __name__ == '__main__': | |
App() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment