lethain · March 8, 2025 22:16
diff --git a/grammar.py b/grammar.py
 #!/usr/bin/env python3

 import argparse
 import json
 import os
 import re
 import sys
 from urllib import request
 from urllib.error import HTTPError

 def parse_arguments():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(description="Find and fix typos and grammatical errors using OpenAI API.")
    parser.add_argument("file_path", help="Path to the file to check")
    return parser.parse_args()

 def read_file(file_path):
    """Read the contents of a file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        sys.exit(1)
    except Exception as e:
        print(f"Error reading file: {e}")
        sys.exit(1)

 def get_openai_api_key():
    """Get the OpenAI API key from environment variables."""
    api_key = os.environ.get("OPENAI_API_KEY")
    if not api_key:
        print("Error: OPENAI_API_KEY environment variable not set")
        sys.exit(1)
    return api_key

 def get_corrections_from_openai(content, api_key):
    """
    Send the file content to OpenAI API and get back a list of regex corrections.
    """
    url = "https://api.openai.com/v1/chat/completions"
    
    prompt = """
    Please find all typos and grammatical errors in the following document.
    Format your response as a list of regular expressions to be applied, one per line, in the format:
    
    search_pattern|replacement_string
    
    Where 'search_pattern' is a regular expression that matches the error,
    and 'replacement_string' is what the matched text should be replaced with.
    
    Only include patterns that need correction, with no additional explanation.
    Make the regular expressions as specific as possible to avoid false positives.
    """
    
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    
    data = {
        "model": "gpt-4", # Or another appropriate model
        "messages": [
            {"role": "system", "content": prompt},
            {"role": "user", "content": content}
        ],
        "temperature": 0.7
    }
    
    try:
        req = request.Request(url, json.dumps(data).encode('utf-8'), headers)
        with request.urlopen(req) as response:
            result = json.loads(response.read().decode('utf-8'))
            return parse_regex_list(result["choices"][0]["message"]["content"])
    except HTTPError as e:
        error_message = e.read().decode('utf-8')
        print(f"OpenAI API Error: {error_message}")
        sys.exit(1)
    except Exception as e:
        print(f"Error: {e}")
        sys.exit(1)

 def parse_regex_list(content):
    """Parse the OpenAI response into a list of regex replacements."""
    regex_list = []
    for line in content.strip().split('\n'):
        if '|' in line:
            pattern, replacement = line.split('|', 1)
            if pattern and replacement:
                regex_list.append((pattern.strip(), replacement.strip()))
    return regex_list

 def apply_corrections(file_path, content, regex_corrections):
    """Apply the regex corrections to the file after user confirmation."""
    modified_content = content
    
    for i, (pattern, replacement) in enumerate(regex_corrections, 1):
        print(f"\nProcessing correction {i}/{len(regex_corrections)}: {pattern} -> {replacement}")
        
        try:
            # Compile the regex
            regex = re.compile(pattern)
            
            # Count total matches for this pattern
            matches_count = len(list(regex.finditer(modified_content)))
            print(f"\n{matches_count} occurrence{'s' if matches_count != 1 else ''} of {pattern}")
            
            # Keep track of position to avoid infinite loops
            pos = 0
            
            while True:
                # Find the next match from the current position
                match = regex.search(modified_content, pos)
                if not match:
                    break
                
                start, end = match.span()
                
                # Find paragraph boundaries for context
                para_start = max(0, modified_content.rfind('\n\n', 0, start) + 2)
                if para_start <= 1:  # No double newline found or at beginning
                    para_start = max(0, modified_content.rfind('\n', 0, start) + 1)
                
                para_end = modified_content.find('\n\n', end)
                if para_end == -1:  # No double newline found
                    para_end = modified_content.find('\n', end)
                    if para_end == -1:
                        para_end = len(modified_content)
                
                # Show before context
                before_context = modified_content[para_start:para_end]
                print("\n" + "="*50)
                print("Before:")
                print(before_context)
                
                # Process the replacement string to handle captured groups
                processed_replacement = replacement
                for j, group in enumerate(match.groups(), 1):
                    processed_replacement = processed_replacement.replace(f"${j}", group)
                
                # Create the modified text with the processed replacement
                modified_text = (
                    modified_content[:start] +
                    processed_replacement +
                    modified_content[end:]
                )
                
                # Show after context
                after_context = modified_text[para_start:para_end + (len(modified_text) - len(modified_content))]
                print("\nAfter:")
                print(after_context)
                
                # Get user confirmation
                while True:
                    user_input = input("\nApply this change? (Y/N): ").strip().upper()
                    if user_input in ["Y", "N"]:
                        break
                    print("Invalid input. Please enter Y or N.")
                
                # Apply the change if confirmed
                if user_input == "Y":
                    modified_content = modified_text
                    print("Change applied.")
                    # Since we modified the content, keep the position at start
                    # to continue searching from the same place
                    pos = start
                else:
                    print("Change rejected.")
                    # Move past this match so we don't find it again
                    pos = end
                    
        except re.error as e:
            print(f"Invalid regex pattern '{pattern}': {e}")
            continue
        except Exception as e:
            print(f"Error processing pattern '{pattern}': {e}")
            continue
    
    # Write the modified content back to the file
    try:
        with open(file_path, 'w', encoding='utf-8') as file:
            file.write(modified_content)
        print(f"\nAll accepted corrections applied to {file_path}")
    except Exception as e:
        print(f"Error writing to file: {e}")
        sys.exit(1)

 def main():
    """Main function to run the program."""
    args = parse_arguments()
    file_path = args.file_path
    
    # Get the OpenAI API key
    api_key = get_openai_api_key()
    
    # Read the file content
    content = read_file(file_path)
    
    # Get corrections from OpenAI
    print("Sending content to OpenAI for analysis...")
    corrections = get_corrections_from_openai(content, api_key)
    
    if not corrections:
        print("No corrections found or the API did not return proper regex patterns.")
        sys.exit(0)
    
    print(f"Found {len(corrections)} potential corrections.")
    
    # Apply the corrections after user confirmation
    apply_corrections(file_path, content, corrections)

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3

	import argparse
	import json
	import os
	import re
	import sys
	from urllib import request
	from urllib.error import HTTPError

	def parse_arguments():
	"""Parse command line arguments."""
	parser = argparse.ArgumentParser(description="Find and fix typos and grammatical errors using OpenAI API.")
	parser.add_argument("file_path", help="Path to the file to check")
	return parser.parse_args()

	def read_file(file_path):
	"""Read the contents of a file."""
	try:
	with open(file_path, 'r', encoding='utf-8') as file:
	return file.read()
	except FileNotFoundError:
	print(f"Error: File not found at {file_path}")
	sys.exit(1)
	except Exception as e:
	print(f"Error reading file: {e}")
	sys.exit(1)

	def get_openai_api_key():
	"""Get the OpenAI API key from environment variables."""
	api_key = os.environ.get("OPENAI_API_KEY")
	if not api_key:
	print("Error: OPENAI_API_KEY environment variable not set")
	sys.exit(1)
	return api_key

	def get_corrections_from_openai(content, api_key):
	"""
	Send the file content to OpenAI API and get back a list of regex corrections.
	"""
	url = "https://api.openai.com/v1/chat/completions"

	prompt = """
	Please find all typos and grammatical errors in the following document.
	Format your response as a list of regular expressions to be applied, one per line, in the format:

	search_pattern\|replacement_string

	Where 'search_pattern' is a regular expression that matches the error,
	and 'replacement_string' is what the matched text should be replaced with.

	Only include patterns that need correction, with no additional explanation.
	Make the regular expressions as specific as possible to avoid false positives.
	"""

	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {api_key}"
	}

	data = {
	"model": "gpt-4", # Or another appropriate model
	"messages": [
	{"role": "system", "content": prompt},
	{"role": "user", "content": content}
	],
	"temperature": 0.7
	}

	try:
	req = request.Request(url, json.dumps(data).encode('utf-8'), headers)
	with request.urlopen(req) as response:
	result = json.loads(response.read().decode('utf-8'))
	return parse_regex_list(result["choices"][0]["message"]["content"])
	except HTTPError as e:
	error_message = e.read().decode('utf-8')
	print(f"OpenAI API Error: {error_message}")
	sys.exit(1)
	except Exception as e:
	print(f"Error: {e}")
	sys.exit(1)

	def parse_regex_list(content):
	"""Parse the OpenAI response into a list of regex replacements."""
	regex_list = []
	for line in content.strip().split('\n'):
	if '\|' in line:
	pattern, replacement = line.split('\|', 1)
	if pattern and replacement:
	regex_list.append((pattern.strip(), replacement.strip()))
	return regex_list

	def apply_corrections(file_path, content, regex_corrections):
	"""Apply the regex corrections to the file after user confirmation."""
	modified_content = content

	for i, (pattern, replacement) in enumerate(regex_corrections, 1):
	print(f"\nProcessing correction {i}/{len(regex_corrections)}: {pattern} -> {replacement}")

	try:
	# Compile the regex
	regex = re.compile(pattern)

	# Count total matches for this pattern
	matches_count = len(list(regex.finditer(modified_content)))
	print(f"\n{matches_count} occurrence{'s' if matches_count != 1 else ''} of {pattern}")

	# Keep track of position to avoid infinite loops
	pos = 0

	while True:
	# Find the next match from the current position
	match = regex.search(modified_content, pos)
	if not match:
	break

	start, end = match.span()

	# Find paragraph boundaries for context
	para_start = max(0, modified_content.rfind('\n\n', 0, start) + 2)
	if para_start <= 1: # No double newline found or at beginning
	para_start = max(0, modified_content.rfind('\n', 0, start) + 1)

	para_end = modified_content.find('\n\n', end)
	if para_end == -1: # No double newline found
	para_end = modified_content.find('\n', end)
	if para_end == -1:
	para_end = len(modified_content)

	# Show before context
	before_context = modified_content[para_start:para_end]
	print("\n" + "="*50)
	print("Before:")
	print(before_context)

	# Process the replacement string to handle captured groups
	processed_replacement = replacement
	for j, group in enumerate(match.groups(), 1):
	processed_replacement = processed_replacement.replace(f"${j}", group)

	# Create the modified text with the processed replacement
	modified_text = (
	modified_content[:start] +
	processed_replacement +
	modified_content[end:]
	)

	# Show after context
	after_context = modified_text[para_start:para_end + (len(modified_text) - len(modified_content))]
	print("\nAfter:")
	print(after_context)

	# Get user confirmation
	while True:
	user_input = input("\nApply this change? (Y/N): ").strip().upper()
	if user_input in ["Y", "N"]:
	break
	print("Invalid input. Please enter Y or N.")

	# Apply the change if confirmed
	if user_input == "Y":
	modified_content = modified_text
	print("Change applied.")
	# Since we modified the content, keep the position at start
	# to continue searching from the same place
	pos = start
	else:
	print("Change rejected.")
	# Move past this match so we don't find it again
	pos = end

	except re.error as e:
	print(f"Invalid regex pattern '{pattern}': {e}")
	continue
	except Exception as e:
	print(f"Error processing pattern '{pattern}': {e}")
	continue

	# Write the modified content back to the file
	try:
	with open(file_path, 'w', encoding='utf-8') as file:
	file.write(modified_content)
	print(f"\nAll accepted corrections applied to {file_path}")
	except Exception as e:
	print(f"Error writing to file: {e}")
	sys.exit(1)

	def main():
	"""Main function to run the program."""
	args = parse_arguments()
	file_path = args.file_path

	# Get the OpenAI API key
	api_key = get_openai_api_key()

	# Read the file content
	content = read_file(file_path)

	# Get corrections from OpenAI
	print("Sending content to OpenAI for analysis...")
	corrections = get_corrections_from_openai(content, api_key)

	if not corrections:
	print("No corrections found or the API did not return proper regex patterns.")
	sys.exit(0)

	print(f"Found {len(corrections)} potential corrections.")

	# Apply the corrections after user confirmation
	apply_corrections(file_path, content, corrections)

	if __name__ == "__main__":
	main()