Skip to content

Instantly share code, notes, and snippets.

@user202729
Last active February 6, 2025 05:12
Show Gist options
  • Save user202729/78846233ae50f298cd1d20a8f79cf86e to your computer and use it in GitHub Desktop.
Save user202729/78846233ae50f298cd1d20a8f79cf86e to your computer and use it in GitHub Desktop.
automatically add r to docstrings containing \ (supplementary for https://stackoverflow.com/a/79416828/5267751 )

This shows how to automatically add r to docstrings containing \.

Just run python code-slightly-modified.py [path to your Python file]. It will modify the code in-place.

Note: keep a backup copy of your code before running the script!


How the code was written

  • prompt GPT o3-mini (I used https://duck.ai) with

    write Python program using libcst to automatically add r to docstrings that would raise syntax warning (invalid escape sequence)

  • get the response in original-response.txt

  • slightly modify it to modify the code in-place: code_slightly_modified.py

#!/usr/bin/env python3
"""
This script uses libcst to automatically update docstrings that might
raise an "invalid escape sequence" syntax warning – converting them to raw
docstrings (adding an r prefix) if they are not already and if they contain
backslashes. It processes module-level, function, and class docstrings.
"""
import re
import sys
import libcst as cst
from libcst import MetadataWrapper, CSTTransformer, FunctionDef, ClassDef, Module, SimpleStatementLine, Expr, SimpleString
from typing import Sequence
# Helper: check if the string literal already uses a raw prefix.
def has_raw_prefix(text: str) -> bool:
# The string literal may start with multiple possible prefixes; we only care if a raw prefix is present.
# For example: r'...', R"..." etc.
# We'll use a regular expression. Note that prefixes may come in any order (see PEP 414), but we'll assume that
# "r" or "R" is present if the docstring is meant to be raw.
return bool(re.match(r"(?i)^(?:[urbf]*r[ubf]*)", text))
# Helper: update a SimpleString node to be a raw string.
def make_raw_string(node: SimpleString) -> SimpleString:
# The node.value is the full literal text, including quotes.
old_val = node.value
# If it is already raw, return unchanged.
if has_raw_prefix(old_val):
return node
# Otherwise, add an "r" prefix. We want to preserve the quote style (for example, triple quotes vs single).
# We use a regex to separate any existing prefixes from the quotes.
m = re.match(r"^(?P<prefix>[rubfRUBF]*)(?P<quote>['\"]{3}|['\"])", old_val)
if not m:
# Should not happen; fallback.
new_literal = "r" + old_val
else:
prefix = m.group("prefix")
quote = m.group("quote")
# Remove any lower-case raw specifiers in prefix; add an 'r'.
# For example, if prefix is "b" or "u" or "f", etc.
# We want to add "r" along with the other prefixes (making sure not to duplicate).
new_prefix = prefix + "r"
# Remove duplicated letters (and normalize order keeping r first preferred)
# For simplicity, we put "r" at front then add the sorted remainder (excluding any r)
others = sorted(ch for ch in new_prefix if ch.lower() != "r")
new_prefix = "r" + "".join(others)
# Get the inner content (strip the opening and closing quotes)
inner = old_val[len(prefix)+len(quote):-len(quote)]
# We assume that the inner text is meant to be taken literally.
new_literal = new_prefix + quote + inner + quote
return node.with_changes(value=new_literal)
# Helper: Given a list of statements, update its docstring (if any) using our transformation.
def update_docstring_in_body(body: Sequence[cst.BaseStatement]) -> Sequence[cst.BaseStatement]:
if not body:
return body
# We expect docstring nodes to appear as a SimpleStatementLine with a single Expr containing a SimpleString.
first_stmt = body[0]
if isinstance(first_stmt, SimpleStatementLine) and len(first_stmt.body) == 1:
maybe_expr = first_stmt.body[0]
if isinstance(maybe_expr, Expr) and isinstance(maybe_expr.value, SimpleString):
s = maybe_expr.value
literal_text = s.value
# If it is already raw, do nothing.
if has_raw_prefix(literal_text):
return body
# Check if the literal contains a backslash which might be interpreted as an escape.
if "\\" in literal_text:
# Update the literal to be raw.
new_string = make_raw_string(s)
new_expr = maybe_expr.with_changes(value=new_string)
new_first_stmt = first_stmt.with_changes(body=[new_expr])
# Replace the first statement with the new one.
return [new_first_stmt] + list(body[1:])
return body
# Define a transformer that will update docstrings in modules,
# class definitions, and function definitions.
class DocstringRawTransformer(cst.CSTTransformer):
def leave_Module(self, original_node: Module, updated_node: Module) -> Module:
new_body = update_docstring_in_body(updated_node.body)
return updated_node.with_changes(body=new_body)
def leave_FunctionDef(self, original_node: FunctionDef, updated_node: FunctionDef) -> FunctionDef:
new_body = update_docstring_in_body(updated_node.body.body)
# updated_node.body is a CodeBlock; update its body field.
new_suite = updated_node.body.with_changes(body=new_body)
return updated_node.with_changes(body=new_suite)
def leave_ClassDef(self, original_node: ClassDef, updated_node: ClassDef) -> ClassDef:
new_body = update_docstring_in_body(updated_node.body.body)
new_suite = updated_node.body.with_changes(body=new_body)
return updated_node.with_changes(body=new_suite)
# Provide a main() so this tool can be used from the command line.
def main():
if len(sys.argv) != 2:
sys.exit("Usage: {} <python_file_to_fix.py>".format(sys.argv[0]))
filename = sys.argv[1]
with open(filename, "r", encoding="utf-8") as f:
source = f.read()
try:
module = cst.parse_module(source)
except Exception as e:
sys.exit("Error parsing {}: {}".format(filename, e))
wrapper = MetadataWrapper(module)
transformer = DocstringRawTransformer()
new_module = wrapper.visit(transformer)
with open(filename, "w", encoding="utf-8") as f:
f.write(new_module.code)
if __name__ == "__main__":
main()
Below is one complete solution. In this example we use libcst’s codemod‐style transformer to look at the “docstring” (that is, the first statement in a module, function, or class body that is a simple string literal) and then, if its literal is not already “raw” (that is, lacking the r‑ or R‑prefix) and if it contains a backslash (which might trigger a “invalid escape sequence” warning), we change it to a raw string literal by adding an r‑prefix. (You might want to fine‐tune which escapes you want to “fix”, but here we simply check for the presence of a backslash.)
To run this code you’ll need to install libcst (for example, pip install libcst).
Below is the full Python program:
------------------------------------------------------------
#!/usr/bin/env python3
"""
This script uses libcst to automatically update docstrings that might
raise an "invalid escape sequence" syntax warning – converting them to raw
docstrings (adding an r prefix) if they are not already and if they contain
backslashes. It processes module-level, function, and class docstrings.
"""
import re
import sys
import libcst as cst
from libcst import MetadataWrapper, CSTTransformer, FunctionDef, ClassDef, Module, SimpleStatementLine, Expr, SimpleString
from typing import Sequence
# Helper: check if the string literal already uses a raw prefix.
def has_raw_prefix(text: str) -> bool:
# The string literal may start with multiple possible prefixes; we only care if a raw prefix is present.
# For example: r'...', R"..." etc.
# We'll use a regular expression. Note that prefixes may come in any order (see PEP 414), but we'll assume that
# "r" or "R" is present if the docstring is meant to be raw.
return bool(re.match(r"(?i)^(?:[urbf]*r[ubf]*)", text))
# Helper: update a SimpleString node to be a raw string.
def make_raw_string(node: SimpleString) -> SimpleString:
# The node.value is the full literal text, including quotes.
old_val = node.value
# If it is already raw, return unchanged.
if has_raw_prefix(old_val):
return node
# Otherwise, add an "r" prefix. We want to preserve the quote style (for example, triple quotes vs single).
# We use a regex to separate any existing prefixes from the quotes.
m = re.match(r"^(?P<prefix>[rubfRUBF]*)(?P<quote>['\"]{3}|['\"])", old_val)
if not m:
# Should not happen; fallback.
new_literal = "r" + old_val
else:
prefix = m.group("prefix")
quote = m.group("quote")
# Remove any lower-case raw specifiers in prefix; add an 'r'.
# For example, if prefix is "b" or "u" or "f", etc.
# We want to add "r" along with the other prefixes (making sure not to duplicate).
new_prefix = prefix + "r"
# Remove duplicated letters (and normalize order keeping r first preferred)
# For simplicity, we put "r" at front then add the sorted remainder (excluding any r)
others = sorted(ch for ch in new_prefix if ch.lower() != "r")
new_prefix = "r" + "".join(others)
# Get the inner content (strip the opening and closing quotes)
inner = old_val[len(prefix)+len(quote):-len(quote)]
# We assume that the inner text is meant to be taken literally.
new_literal = new_prefix + quote + inner + quote
return node.with_changes(value=new_literal)
# Helper: Given a list of statements, update its docstring (if any) using our transformation.
def update_docstring_in_body(body: Sequence[cst.BaseStatement]) -> Sequence[cst.BaseStatement]:
if not body:
return body
# We expect docstring nodes to appear as a SimpleStatementLine with a single Expr containing a SimpleString.
first_stmt = body[0]
if isinstance(first_stmt, SimpleStatementLine) and len(first_stmt.body) == 1:
maybe_expr = first_stmt.body[0]
if isinstance(maybe_expr, Expr) and isinstance(maybe_expr.value, SimpleString):
s = maybe_expr.value
literal_text = s.value
# If it is already raw, do nothing.
if has_raw_prefix(literal_text):
return body
# Check if the literal contains a backslash which might be interpreted as an escape.
if "\\" in literal_text:
# Update the literal to be raw.
new_string = make_raw_string(s)
new_expr = maybe_expr.with_changes(value=new_string)
new_first_stmt = first_stmt.with_changes(body=[new_expr])
# Replace the first statement with the new one.
return [new_first_stmt] + list(body[1:])
return body
# Define a transformer that will update docstrings in modules,
# class definitions, and function definitions.
class DocstringRawTransformer(cst.CSTTransformer):
def leave_Module(self, original_node: Module, updated_node: Module) -> Module:
new_body = update_docstring_in_body(updated_node.body)
return updated_node.with_changes(body=new_body)
def leave_FunctionDef(self, original_node: FunctionDef, updated_node: FunctionDef) -> FunctionDef:
new_body = update_docstring_in_body(updated_node.body.body)
# updated_node.body is a CodeBlock; update its body field.
new_suite = updated_node.body.with_changes(body=new_body)
return updated_node.with_changes(body=new_suite)
def leave_ClassDef(self, original_node: ClassDef, updated_node: ClassDef) -> ClassDef:
new_body = update_docstring_in_body(updated_node.body.body)
new_suite = updated_node.body.with_changes(body=new_body)
return updated_node.with_changes(body=new_suite)
# Provide a main() so this tool can be used from the command line.
def main():
if len(sys.argv) != 2:
sys.exit("Usage: {} <python_file_to_fix.py>".format(sys.argv[0]))
filename = sys.argv[1]
with open(filename, "r", encoding="utf-8") as f:
source = f.read()
try:
module = cst.parse_module(source)
except Exception as e:
sys.exit("Error parsing {}: {}".format(filename, e))
wrapper = MetadataWrapper(module)
transformer = DocstringRawTransformer()
new_module = wrapper.visit(transformer)
# Write the transformed module to stdout
sys.stdout.write(new_module.code)
if __name__ == "__main__":
main()
------------------------------------------------------------
Explanation:
1. The helper function has_raw_prefix() uses a regex to see if the string literal token already begins with a raw prefix (r or R) among its combined prefixes.
2. The make_raw_string() function takes a libcst.SimpleString node and returns a modified SimpleString node where an 'r' has been injected into its prefix (taking care to preserve the original quoting style and other prefixes).
3. The update_docstring_in_body() function checks if the first statement in a list of statements is a docstring (a SimpleStatementLine whose single child is an Expr with a SimpleString), then applies the above modifications if necessary.
4. The DocstringRawTransformer class applies the update_docstring_in_body() transformation to module-level, function, and class-level docstrings.
Run the script from the command line with the name of the Python file to process. The modified code is printed to standard output.
This solution should automatically add the missing r‑prefix to docstrings that contain backslashes which could otherwise raise a syntax warning about invalid escape sequences.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment