Created
December 4, 2022 06:10
-
-
Save gigq/3c5f4bd83f15dd940c06402aff17b236 to your computer and use it in GitHub Desktop.
OpenAI's ChatGPT + Whisper speech recognition + MacOS say
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import speech_recognition as sr | |
"""Make some requests to OpenAI's chatbot""" | |
import time | |
import os | |
from playwright.sync_api import sync_playwright | |
PLAY = sync_playwright().start() | |
BROWSER = PLAY.chromium.launch_persistent_context( | |
user_data_dir="/tmp/playwright", | |
headless=False, | |
) | |
PAGE = BROWSER.new_page() | |
# Set up the speech recognition | |
r = sr.Recognizer() | |
def get_input_box(): | |
"""Get the child textarea of `PromptTextarea__TextareaWrapper`""" | |
return PAGE.query_selector("textarea") | |
def is_logged_in(): | |
# See if we have a textarea with data-id="root" | |
return get_input_box() is not None | |
def send_message(message): | |
# Send the message | |
box = get_input_box() | |
box.click() | |
box.fill(message) | |
box.press("Enter") | |
def get_last_message(): | |
"""Get the latest message""" | |
page_elements = PAGE.query_selector_all("div[class*='ConversationItem__Message']") | |
last_element = page_elements[-1] | |
return last_element.inner_text() | |
def start_browser(): | |
PAGE.goto("https://chat.openai.com/") | |
if not is_logged_in(): | |
print("Please log in to OpenAI Chat") | |
print("Press enter when you're done") | |
input() | |
else: | |
print("Logged in") | |
if __name__ == "__main__": | |
start_browser() | |
previous_response = "" | |
with sr.Microphone() as source: | |
print("Adjusting for ambient noise...") | |
r.adjust_for_ambient_noise(source) | |
while True: | |
# Listen for the user's input | |
print("Listening...") | |
audio = r.listen(source) | |
# Try to recognize the user's speech | |
try: | |
text = r.recognize_whisper(audio, language="english") | |
print("I think you said: %s" % text) | |
send_message(text) | |
time.sleep(5) # TODO: there are about ten million ways to be smarter than this | |
response = get_last_message() | |
if response != previous_response: | |
previous_response = response | |
os.system("say \"%s\"" % response) | |
except: | |
print("I'm sorry, I didn't catch that. Could you please repeat it?") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Credit goes to @taranjeet and @danielgross for the ChatGPT integration: https://github.com/taranjeet/chatgpt-api
The dependencies are kinda fiddly to get installed on an Apple Silicon mac.
Had to manually install portaudio to get pyaudio to recognize it as seen here:
https://stackoverflow.com/a/69293153
You'll need to install speech_recognition and whisper from git in order for r.recognize_whisper to work:
pip install git+https://github.com/openai/whisper.git
pip install git+https://github.com/Uberi/speech_recognition