Skip to content

Instantly share code, notes, and snippets.

@lethain
Created March 8, 2025 22:16
Show Gist options
  • Save lethain/ce2aa71e07a67e26d613386d49154462 to your computer and use it in GitHub Desktop.
Save lethain/ce2aa71e07a67e26d613386d49154462 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import argparse
import json
import os
import re
import sys
from urllib import request
from urllib.error import HTTPError
def parse_arguments():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(description="Find and fix typos and grammatical errors using OpenAI API.")
parser.add_argument("file_path", help="Path to the file to check")
return parser.parse_args()
def read_file(file_path):
"""Read the contents of a file."""
try:
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
except FileNotFoundError:
print(f"Error: File not found at {file_path}")
sys.exit(1)
except Exception as e:
print(f"Error reading file: {e}")
sys.exit(1)
def get_openai_api_key():
"""Get the OpenAI API key from environment variables."""
api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
print("Error: OPENAI_API_KEY environment variable not set")
sys.exit(1)
return api_key
def get_corrections_from_openai(content, api_key):
"""
Send the file content to OpenAI API and get back a list of regex corrections.
"""
url = "https://api.openai.com/v1/chat/completions"
prompt = """
Please find all typos and grammatical errors in the following document.
Format your response as a list of regular expressions to be applied, one per line, in the format:
search_pattern|replacement_string
Where 'search_pattern' is a regular expression that matches the error,
and 'replacement_string' is what the matched text should be replaced with.
Only include patterns that need correction, with no additional explanation.
Make the regular expressions as specific as possible to avoid false positives.
"""
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
data = {
"model": "gpt-4", # Or another appropriate model
"messages": [
{"role": "system", "content": prompt},
{"role": "user", "content": content}
],
"temperature": 0.7
}
try:
req = request.Request(url, json.dumps(data).encode('utf-8'), headers)
with request.urlopen(req) as response:
result = json.loads(response.read().decode('utf-8'))
return parse_regex_list(result["choices"][0]["message"]["content"])
except HTTPError as e:
error_message = e.read().decode('utf-8')
print(f"OpenAI API Error: {error_message}")
sys.exit(1)
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
def parse_regex_list(content):
"""Parse the OpenAI response into a list of regex replacements."""
regex_list = []
for line in content.strip().split('\n'):
if '|' in line:
pattern, replacement = line.split('|', 1)
if pattern and replacement:
regex_list.append((pattern.strip(), replacement.strip()))
return regex_list
def apply_corrections(file_path, content, regex_corrections):
"""Apply the regex corrections to the file after user confirmation."""
modified_content = content
for i, (pattern, replacement) in enumerate(regex_corrections, 1):
print(f"\nProcessing correction {i}/{len(regex_corrections)}: {pattern} -> {replacement}")
try:
# Compile the regex
regex = re.compile(pattern)
# Count total matches for this pattern
matches_count = len(list(regex.finditer(modified_content)))
print(f"\n{matches_count} occurrence{'s' if matches_count != 1 else ''} of {pattern}")
# Keep track of position to avoid infinite loops
pos = 0
while True:
# Find the next match from the current position
match = regex.search(modified_content, pos)
if not match:
break
start, end = match.span()
# Find paragraph boundaries for context
para_start = max(0, modified_content.rfind('\n\n', 0, start) + 2)
if para_start <= 1: # No double newline found or at beginning
para_start = max(0, modified_content.rfind('\n', 0, start) + 1)
para_end = modified_content.find('\n\n', end)
if para_end == -1: # No double newline found
para_end = modified_content.find('\n', end)
if para_end == -1:
para_end = len(modified_content)
# Show before context
before_context = modified_content[para_start:para_end]
print("\n" + "="*50)
print("Before:")
print(before_context)
# Process the replacement string to handle captured groups
processed_replacement = replacement
for j, group in enumerate(match.groups(), 1):
processed_replacement = processed_replacement.replace(f"${j}", group)
# Create the modified text with the processed replacement
modified_text = (
modified_content[:start] +
processed_replacement +
modified_content[end:]
)
# Show after context
after_context = modified_text[para_start:para_end + (len(modified_text) - len(modified_content))]
print("\nAfter:")
print(after_context)
# Get user confirmation
while True:
user_input = input("\nApply this change? (Y/N): ").strip().upper()
if user_input in ["Y", "N"]:
break
print("Invalid input. Please enter Y or N.")
# Apply the change if confirmed
if user_input == "Y":
modified_content = modified_text
print("Change applied.")
# Since we modified the content, keep the position at start
# to continue searching from the same place
pos = start
else:
print("Change rejected.")
# Move past this match so we don't find it again
pos = end
except re.error as e:
print(f"Invalid regex pattern '{pattern}': {e}")
continue
except Exception as e:
print(f"Error processing pattern '{pattern}': {e}")
continue
# Write the modified content back to the file
try:
with open(file_path, 'w', encoding='utf-8') as file:
file.write(modified_content)
print(f"\nAll accepted corrections applied to {file_path}")
except Exception as e:
print(f"Error writing to file: {e}")
sys.exit(1)
def main():
"""Main function to run the program."""
args = parse_arguments()
file_path = args.file_path
# Get the OpenAI API key
api_key = get_openai_api_key()
# Read the file content
content = read_file(file_path)
# Get corrections from OpenAI
print("Sending content to OpenAI for analysis...")
corrections = get_corrections_from_openai(content, api_key)
if not corrections:
print("No corrections found or the API did not return proper regex patterns.")
sys.exit(0)
print(f"Found {len(corrections)} potential corrections.")
# Apply the corrections after user confirmation
apply_corrections(file_path, content, corrections)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment