Created
November 15, 2023 08:10
-
-
Save Curtis-64/54a851c711549d4feec3799d667ff672 to your computer and use it in GitHub Desktop.
regex_generator.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Demonstration of generating value from a local LLM by Curtis White | |
import requests | |
import re | |
import json | |
base_url = "http://localhost:1234/v1/chat/completions" | |
headers = {"Content-Type": "application/json"} | |
def preprocess_response(response): | |
cleaned_response = response.replace('```', '').replace('r"', '"').replace("r'", "'") | |
return cleaned_response.replace("\\", "\\\\") | |
def get_regex_from_llm(prompt): | |
print("\nSending prompt to LLM:") | |
print(prompt) | |
data = { | |
"messages": [{"role": "user", "content": prompt}], | |
"temperature": 0.7, | |
"max_tokens": -1, | |
"stream": False | |
} | |
response = requests.post(base_url, headers=headers, json=data) | |
raw_response = response.json()["choices"][0]["message"]["content"] | |
print("Raw response from LLM:") | |
print(raw_response) | |
cleaned_response = preprocess_response(raw_response) | |
try: | |
response_json = json.loads(cleaned_response) | |
regex = response_json.get("regex", "") | |
print(f"Extracted Regex: {regex}") | |
return regex | |
except json.JSONDecodeError as e: | |
print(f"Failed to parse JSON. Error: {e}") | |
return "" | |
def test_regex(pattern, test_cases): | |
print(f"\nTesting Regex: {pattern}") | |
try: | |
regex = re.compile(pattern) | |
except re.error as e: | |
print(f"Invalid regex pattern. Error: {e}") | |
return None | |
results = {case: bool(regex.match(case)) == expected for case, expected in test_cases.items()} | |
for case, result in results.items(): | |
print(f"Test Case: {case}, Expected: {test_cases[case]}, Match: {result}") | |
return results | |
tasks = [ | |
{"prompt": "Create a regex that extracts data inside parentheses.", "test_cases": {"(data)": True, "no match": False, "(more data)": True}}, | |
{"prompt": "Create a regex that checks an email.", "test_cases": {"[email protected]": True, "invalid-email": False, "[email protected]": True}}, | |
{"prompt": "Create a regex that matches phone numbers.", "test_cases": {"123-456-7890": True, "invalid number": False, "987 654 3210": True}} | |
] | |
prompt_modifier = ". Respond only in json format with the regex as the value for the key 'regex' as a single line. Do not add any explanation nor anything else." | |
max_attempts_per_task = 5 | |
for index, task in enumerate(tasks, start=1): | |
print(f"\n--- Starting Task {index}: {task['prompt']} ---") | |
full_prompt = task['prompt'] + prompt_modifier | |
success = False | |
attempts = 0 | |
while attempts < max_attempts_per_task and not success: | |
attempts += 1 | |
print(f"\n--- Attempt {attempts} for Task {index} ---") | |
regex_pattern = get_regex_from_llm(full_prompt) | |
if not regex_pattern: | |
print("No valid regex pattern received. Retrying...") | |
continue | |
test_results = test_regex(regex_pattern, task['test_cases']) | |
if test_results is None: # Handle invalid regex pattern | |
continue | |
success = all(test_results.values()) | |
if success: | |
print("Task completed successfully.") | |
else: | |
print("Task not completed successfully within the attempt limit.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment