Created
December 10, 2023 17:15
-
-
Save scottire/df7d2602c68b49dd27bce10680114426 to your computer and use it in GitHub Desktop.
Transcribe, ChatGPT, gTTS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flask import Flask, request, jsonify, send_file | |
import tempfile | |
import whisper | |
import gtts | |
import os | |
from openai import OpenAI | |
client = OpenAI() | |
conversation = [] | |
system_prompt = """ | |
You are a helpful assistant to help people learn Spanish. | |
Your task is to role play a native conversation with the user. | |
The transcription mightn't be perfect, but you should be able to understand the gist of what they're saying. | |
You're very concise and complementary. After responding, you should keep the conversation going by asking a question.""" | |
app = Flask(__name__) | |
model = whisper.load_model("base") | |
@app.route('/chat', methods=['POST']) | |
def chat(): | |
if 'file' not in request.files: | |
return "No file part", 400 | |
file = request.files['file'] | |
if file.filename == '': | |
return "No selected file", 400 | |
if file: | |
temp_path = save_temp_audio_file(file) | |
audio_transcription = transcribe(temp_path) | |
conversation.append(f'User: {audio_transcription}') | |
response = respond(0, system_prompt) | |
conversation.append(f'Assistant: {response}') | |
response_audio = say(response) | |
return send_file(response_audio, as_attachment=True) | |
def save_temp_audio_file(audio_file): | |
temp_file, temp_path = tempfile.mkstemp(suffix=".wav") | |
os.close(temp_file) | |
audio_file.save(temp_path) | |
return temp_path | |
def transcribe(audio_file): | |
result = model.transcribe(audio_file, language='es') | |
return result | |
def respond(temperature, system_prompt): | |
response = client.chat.completions.create( | |
model="gpt-4", | |
temperature=temperature, | |
messages=[ | |
{ | |
"role": "system", | |
"content": system_prompt | |
}, | |
{ | |
"role": "user", | |
"content": 'Conversation so far:' + "\n ".join(conversation) + 'Assistant: ' | |
} | |
] | |
) | |
return response.choices[0].message.content | |
def say(text): | |
tts = gtts.gTTS(text, lang='es') | |
filename = "response.mp3" | |
tts.save(filename) | |
return filename | |
if __name__ == '__main__': | |
app.run(debug=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment