Created
March 5, 2024 19:02
-
-
Save mtanco/8ba24cd0c3d758c8f0249bdc7efe7830 to your computer and use it in GitHub Desktop.
Testing system prompts, models, and specific ways of asking questions to understand how to get Python Executable code from an LLM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Code Executing Testing from LLMs | |
Michelle Tanco - [email protected] | |
March 4, 2024 | |
We are attempting to get executable Python code from an LLM. This testing suite helps us with that process. | |
""" | |
import os | |
from h2ogpte import H2OGPTE | |
from h2ogpte.types import SessionError | |
from loguru import logger | |
SYSTEM_PROMPT = "You are a Python code generator, you only ever response with Python code that can be executed. " \ | |
"You do not wrap your code in triple ticks and you do not return any non-code unless it " \ | |
"is commented out. All comments and explainations are commented out so as to ensure the code is " \ | |
"executable." | |
LLM = "h2oai/h2ogpt-32k-codellama-34b-instruct" | |
TEST_PROMPTS = { | |
# Requests that tend to be executable | |
1: "Create a function for calculating the factorial of a number.", # worked | |
2: "Create a function for calculating the factorial of a number and execute the function 5 times.", # worked | |
3: "Create a function for calculating the factorial of a number, comment your code.", # worked | |
7: "Print `Hello, world!` to the console.", # worked | |
8: "Write code for printing hello world.", # worked | |
10: "Write a function for summing two numbers", # worked | |
12: "A function for summing two numbers, execute the function 7 times", # worked | |
# It doesn't like checking valid input... | |
4: "Create a function for calculating the factorial of a number, check that the input is valid.", | |
5: "Write a function for calculating factorials, be sure to handle negative numbers", | |
# It cannot handle "how to" questions, it just wants to explain it's self so badly!!! | |
6: "How do I create a simple Python script that prints 'Hello, world' to the console?", | |
9: "How do I create a Python function that takes two numbers as input and returns their sum?", | |
11: "I need code to sum two numbers", | |
13: "How do I create a Python generator that yields a sequence of numbers.", | |
14: "Create a generator for sequences of numbers", | |
15: "I need a function for generating sequences of numbers" | |
} | |
client = H2OGPTE(address=os.getenv("H2OGPTE_URL"), api_key=os.getenv("H2OGPTE_API_TOKEN")) | |
collection_id = client.create_collection("Fake for testing", "") | |
chat_session_id = client.create_chat_session(collection_id) | |
logger.info(f"LLM: {LLM}") | |
logger.info(f"System Prompt: {SYSTEM_PROMPT}") | |
with client.connect(chat_session_id) as session: | |
for i in TEST_PROMPTS.keys(): | |
message = TEST_PROMPTS[i] | |
logger.info(f"User Input: {message}") | |
try: | |
reply = session.query( | |
message=message, | |
system_prompt=SYSTEM_PROMPT, | |
timeout=60, | |
rag_config={"rag_type": "llm_only"}, | |
llm=LLM | |
) | |
logger.info(f"LLM Output: {reply.content}") | |
exec(reply.content) | |
logger.success(f"The output was executable Python executable!") | |
except SessionError as ex: # Something went wrong with h2oPGTe - clean up and leave | |
logger.error(ex) | |
client.delete_collections([collection_id]) | |
client.delete_chat_sessions([chat_session_id]) | |
break | |
except SyntaxError as ex: # Output was not executable - document and try next test | |
logger.error(f"{LLM}\n{SYSTEM_PROMPT}\nQuestion {i}\n{reply.content}\n{ex}") | |
except Exception as ex: | |
logger.error(type(ex)) | |
logger.error(ex) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment