Skip to content

Instantly share code, notes, and snippets.

@pablogsal
Created April 7, 2025 13:21
Show Gist options
  • Save pablogsal/66b5d2f43a011443c72af3a25f6e3102 to your computer and use it in GitHub Desktop.
Save pablogsal/66b5d2f43a011443c72af3a25f6e3102 to your computer and use it in GitHub Desktop.
import tokenize
from io import BytesIO
from typing import List, Tuple, Optional, Dict, Any
import keyword
from codeop import compile_command
from _suggestions import _generate_suggestions
from textwrap import dedent
class StatementFinder:
"""Finds the smallest surrounding statement given a line number and Python source."""
def __init__(self, source: str, line_number: int):
self.source = source
self.line_number = line_number
self.statement_tokens = []
self.all_statements = []
self.statement_brackets = []
self.begin_brackets = []
self.end_bracket = None
self.bad_token = None
self.prev_token = None
# Tokens that should begin a statement and cannot occur within brackets
self.should_begin_statement = [
"assert", "async", "await", "break", "class", "continue",
"def", "del", "elif", "except", "finally", "global",
"import", "nonlocal", "pass", "raise", "return", "try",
"with", "while", "yield"
]
def find_statement(self) -> str:
"""Find and return the smallest complete statement containing the target line."""
source_tokens = self._get_source_tokens()
self._obtain_statement(source_tokens)
return self._reconstruct_statement()
def _get_source_tokens(self) -> List[tokenize.TokenInfo]:
"""Get all tokens from the source."""
return list(tokenize.tokenize(BytesIO(self.source.encode('utf-8')).readline))
def _obtain_statement(self, source_tokens: List[tokenize.TokenInfo]) -> None:
"""Find the statement containing the target line number."""
previous_row = -1
previous_token = None
continuation_line = False
last_line_to_include = self.line_number
for token in source_tokens:
if (token.start[0] > last_line_to_include and
not continuation_line and
not self.statement_brackets):
break
if token.start[0] > previous_row:
if previous_token is not None:
continuation_line = previous_token.line.endswith("\\\n")
if (token.start[0] <= last_line_to_include and
not self.statement_brackets):
if self.statement_tokens:
self.all_statements.append(self.statement_tokens[:])
self.statement_tokens = []
self.begin_brackets = []
if token.start[0] > last_line_to_include and self.statement_brackets:
last_line_to_include = token.start[0]
previous_row = token.start[0]
self.statement_tokens.append(token)
if (token.start[0] == self.line_number and
token.string.strip() and
token.type != tokenize.COMMENT):
self.bad_token = token
if token.string.strip() and token.type != tokenize.COMMENT:
self.prev_token = token
previous_token = token
# Check if we have a statement-starting token in the middle of brackets
if (self.bad_token and
self.bad_token.string in self.should_begin_statement and
self.bad_token != self.statement_tokens[0] and
self.statement_brackets):
break
if not token.string or token.string not in "()[]}{":
continue
if token.string in "([{":
self.statement_brackets.append(token.string)
if self.bad_token is None or self.bad_token is token:
self.begin_brackets.append(token)
elif token.string in ")]}":
self.end_bracket = token
if not self.statement_brackets:
break
open_bracket = self.statement_brackets.pop()
if not self._matching_brackets(open_bracket, token.string):
self.statement_brackets.append(open_bracket)
break
if self.begin_brackets and self.bad_token is None:
self.begin_brackets.pop()
self.end_bracket = None
if self.statement_tokens:
last_line = self._untokenize(self.statement_tokens)
if last_line.strip():
self.all_statements.append(self.statement_tokens)
elif self.all_statements:
self.statement_tokens = self.all_statements[-1]
def _matching_brackets(self, open_bracket: str, close_bracket: str) -> bool:
"""Check if brackets match."""
return (open_bracket == '(' and close_bracket == ')') or \
(open_bracket == '[' and close_bracket == ']') or \
(open_bracket == '{' and close_bracket == '}')
def _untokenize(self, tokens: List[tokenize.TokenInfo]) -> str:
"""Convert tokens back to source code."""
return ''.join(token.string for token in tokens)
def _reconstruct_statement(self) -> str:
"""Extract the statement directly from the source using token positions."""
if not self.statement_tokens:
return ""
# Get the first and last tokens
first_token = self.statement_tokens[0]
last_token = self.statement_tokens[-1]
# Split source into lines
lines = self.source.splitlines(keepends=True)
# Get the relevant lines
start_line = first_token.start[0] - 1 # convert to 0-based index
end_line = last_token.end[0] - 1
# Extract the statement
statement_lines = []
for i in range(start_line, end_line + 1):
if i < len(lines):
line = lines[i]
if i == start_line:
# For first line, start from first token's column
line = line[first_token.start[1]:]
if i == end_line:
# For last line, end at last token's column
line = line[:last_token.end[1]]
statement_lines.append(line.rstrip())
return '\n'.join(statement_lines)
def try_keyword_substitutions(syntax_error: SyntaxError, source: str | None) -> Tuple[str, Tuple[int, int, int, int], str] | None:
"""
Try to find a valid Python statement by substituting keywords in the problematic code.
Args:
syntax_error: A SyntaxError exception containing the error information
source: Optional source code. If None, will try to read from file.
Returns:
A tuple of (suggestion, (start_line, start_offset, end_line, end_offset), modified_statement)
in the original source, or None if no valid modification was found
"""
# Get the source file and line number
filename = syntax_error.filename
line_number = syntax_error.lineno
if not filename or not isinstance(filename, str):
return None
if not line_number or not isinstance(line_number, int):
return None
if source is None:
try:
# Read the source file
with open(filename, 'r', encoding='utf-8') as f:
source = f.read()
except (IOError, OSError):
return None
# Find the problematic statement
finder = StatementFinder(source, line_number)
statement = finder.find_statement()
if not statement:
return None
# Get the first token's position in the original source
first_token = finder.statement_tokens[0]
source_start_line = first_token.start[0]
source_start_col = first_token.start[1]
# Tokenize the statement
try:
tokens = list(tokenize.tokenize(BytesIO(statement.encode('utf-8')).readline))
except tokenize.TokenError:
return None
# Get the lines of the statement
lines = statement.splitlines(keepends=True)
# Try substituting each NAME token with Python keywords
for token in tokens:
if token.type != tokenize.NAME:
continue
# Calculate the position in the source string
line_idx = token.start[0] - 1
if line_idx >= len(lines):
continue
# Get the line and its start/end positions
line = lines[line_idx]
start_pos = token.start[1]
end_pos = token.end[1]
suggestion = _generate_suggestions(keyword.kwlist, token.string)
if suggestion:
# Create modified line by replacing the token
modified_line = line[:start_pos] + suggestion + line[end_pos:]
modified_lines = lines[:line_idx] + [modified_line] + lines[line_idx + 1:]
modified_statement = ''.join(modified_lines)
# Try to compile
try:
code = dedent(modified_statement)
compile_command(code, "<string>", "exec")
# Calculate positions in original source
original_start_line = source_start_line + line_idx
original_start_col = source_start_col + start_pos if line_idx == 0 else start_pos
original_end_line = source_start_line + line_idx
original_end_col = source_start_col + end_pos if line_idx == 0 else end_pos
return (suggestion,
(original_start_line, original_start_col,
original_end_line, original_end_col),
modified_statement)
except (SyntaxError, ValueError):
continue
return None
# Example usage
if __name__ == "__main__":
# Example usage
code = """\
from os import path
class A:
def foo():
...
def bar():
x= 1
y = [x for x in
range(3)]
with (
open("test.txt", "w") as f,
open("test2.txt", "r") os g,
open("test3.txt", "r") as h,
):
pass
z = 3
def blech():
...
"""
# Example of using the new function
try:
# This would normally come from a SyntaxError
compile(code, "<string>", "exec")
except SyntaxError as e:
result = try_keyword_substitutions(e, code)
if result:
kw, (start_line, start_col, end_line, end_col), modified = result
e.msg += f" Did you mean '{kw}'?"
e.lineno, e.end_lineno = start_line, end_line
e.offset, e.end_offset = start_col + 1, end_col + 1
raise e from None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment