Created
March 8, 2025 22:16
-
-
Save lethain/ce2aa71e07a67e26d613386d49154462 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import json | |
import os | |
import re | |
import sys | |
from urllib import request | |
from urllib.error import HTTPError | |
def parse_arguments(): | |
"""Parse command line arguments.""" | |
parser = argparse.ArgumentParser(description="Find and fix typos and grammatical errors using OpenAI API.") | |
parser.add_argument("file_path", help="Path to the file to check") | |
return parser.parse_args() | |
def read_file(file_path): | |
"""Read the contents of a file.""" | |
try: | |
with open(file_path, 'r', encoding='utf-8') as file: | |
return file.read() | |
except FileNotFoundError: | |
print(f"Error: File not found at {file_path}") | |
sys.exit(1) | |
except Exception as e: | |
print(f"Error reading file: {e}") | |
sys.exit(1) | |
def get_openai_api_key(): | |
"""Get the OpenAI API key from environment variables.""" | |
api_key = os.environ.get("OPENAI_API_KEY") | |
if not api_key: | |
print("Error: OPENAI_API_KEY environment variable not set") | |
sys.exit(1) | |
return api_key | |
def get_corrections_from_openai(content, api_key): | |
""" | |
Send the file content to OpenAI API and get back a list of regex corrections. | |
""" | |
url = "https://api.openai.com/v1/chat/completions" | |
prompt = """ | |
Please find all typos and grammatical errors in the following document. | |
Format your response as a list of regular expressions to be applied, one per line, in the format: | |
search_pattern|replacement_string | |
Where 'search_pattern' is a regular expression that matches the error, | |
and 'replacement_string' is what the matched text should be replaced with. | |
Only include patterns that need correction, with no additional explanation. | |
Make the regular expressions as specific as possible to avoid false positives. | |
""" | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {api_key}" | |
} | |
data = { | |
"model": "gpt-4", # Or another appropriate model | |
"messages": [ | |
{"role": "system", "content": prompt}, | |
{"role": "user", "content": content} | |
], | |
"temperature": 0.7 | |
} | |
try: | |
req = request.Request(url, json.dumps(data).encode('utf-8'), headers) | |
with request.urlopen(req) as response: | |
result = json.loads(response.read().decode('utf-8')) | |
return parse_regex_list(result["choices"][0]["message"]["content"]) | |
except HTTPError as e: | |
error_message = e.read().decode('utf-8') | |
print(f"OpenAI API Error: {error_message}") | |
sys.exit(1) | |
except Exception as e: | |
print(f"Error: {e}") | |
sys.exit(1) | |
def parse_regex_list(content): | |
"""Parse the OpenAI response into a list of regex replacements.""" | |
regex_list = [] | |
for line in content.strip().split('\n'): | |
if '|' in line: | |
pattern, replacement = line.split('|', 1) | |
if pattern and replacement: | |
regex_list.append((pattern.strip(), replacement.strip())) | |
return regex_list | |
def apply_corrections(file_path, content, regex_corrections): | |
"""Apply the regex corrections to the file after user confirmation.""" | |
modified_content = content | |
for i, (pattern, replacement) in enumerate(regex_corrections, 1): | |
print(f"\nProcessing correction {i}/{len(regex_corrections)}: {pattern} -> {replacement}") | |
try: | |
# Compile the regex | |
regex = re.compile(pattern) | |
# Count total matches for this pattern | |
matches_count = len(list(regex.finditer(modified_content))) | |
print(f"\n{matches_count} occurrence{'s' if matches_count != 1 else ''} of {pattern}") | |
# Keep track of position to avoid infinite loops | |
pos = 0 | |
while True: | |
# Find the next match from the current position | |
match = regex.search(modified_content, pos) | |
if not match: | |
break | |
start, end = match.span() | |
# Find paragraph boundaries for context | |
para_start = max(0, modified_content.rfind('\n\n', 0, start) + 2) | |
if para_start <= 1: # No double newline found or at beginning | |
para_start = max(0, modified_content.rfind('\n', 0, start) + 1) | |
para_end = modified_content.find('\n\n', end) | |
if para_end == -1: # No double newline found | |
para_end = modified_content.find('\n', end) | |
if para_end == -1: | |
para_end = len(modified_content) | |
# Show before context | |
before_context = modified_content[para_start:para_end] | |
print("\n" + "="*50) | |
print("Before:") | |
print(before_context) | |
# Process the replacement string to handle captured groups | |
processed_replacement = replacement | |
for j, group in enumerate(match.groups(), 1): | |
processed_replacement = processed_replacement.replace(f"${j}", group) | |
# Create the modified text with the processed replacement | |
modified_text = ( | |
modified_content[:start] + | |
processed_replacement + | |
modified_content[end:] | |
) | |
# Show after context | |
after_context = modified_text[para_start:para_end + (len(modified_text) - len(modified_content))] | |
print("\nAfter:") | |
print(after_context) | |
# Get user confirmation | |
while True: | |
user_input = input("\nApply this change? (Y/N): ").strip().upper() | |
if user_input in ["Y", "N"]: | |
break | |
print("Invalid input. Please enter Y or N.") | |
# Apply the change if confirmed | |
if user_input == "Y": | |
modified_content = modified_text | |
print("Change applied.") | |
# Since we modified the content, keep the position at start | |
# to continue searching from the same place | |
pos = start | |
else: | |
print("Change rejected.") | |
# Move past this match so we don't find it again | |
pos = end | |
except re.error as e: | |
print(f"Invalid regex pattern '{pattern}': {e}") | |
continue | |
except Exception as e: | |
print(f"Error processing pattern '{pattern}': {e}") | |
continue | |
# Write the modified content back to the file | |
try: | |
with open(file_path, 'w', encoding='utf-8') as file: | |
file.write(modified_content) | |
print(f"\nAll accepted corrections applied to {file_path}") | |
except Exception as e: | |
print(f"Error writing to file: {e}") | |
sys.exit(1) | |
def main(): | |
"""Main function to run the program.""" | |
args = parse_arguments() | |
file_path = args.file_path | |
# Get the OpenAI API key | |
api_key = get_openai_api_key() | |
# Read the file content | |
content = read_file(file_path) | |
# Get corrections from OpenAI | |
print("Sending content to OpenAI for analysis...") | |
corrections = get_corrections_from_openai(content, api_key) | |
if not corrections: | |
print("No corrections found or the API did not return proper regex patterns.") | |
sys.exit(0) | |
print(f"Found {len(corrections)} potential corrections.") | |
# Apply the corrections after user confirmation | |
apply_corrections(file_path, content, corrections) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment