Skip to content

Instantly share code, notes, and snippets.

@poliorcetics
Last active August 22, 2020 20:15
Show Gist options
  • Save poliorcetics/b3cba87c02711fe90cfd6c10ece52c1c to your computer and use it in GitHub Desktop.
Save poliorcetics/b3cba87c02711fe90cfd6c10ece52c1c to your computer and use it in GitHub Desktop.
Converter from path-based links to intra-doc links for the `rust-lang/rust` project.
#!/usr/bin/env python3
"""
Converter from path-based links to intra-doc links for
the `rust-lang/rust` project. This is not perfect and
the modified files should still be reviewed after running it.
By default it will only print the changes and not apply them,
use `-a` (`--apply`) to write them.
If you are modifying `core` or `alloc` instead of `std`,
you can pass the `-c core` (`--crate core`) flag to mark
the change in the root crate.
Required: Python 3.6 or higher.
Basic usage: `python3 converter.py path/to/file.rs`
See `python3 converter.py -h` for help.
"""
import re
import argparse as ap
from typing import Match, Optional
YELLOW = '\033[93m'
GREEN = '\033[92m'
RED = '\033[91m'
ENDC = '\033[0m'
def print_del(line: str, pos: int):
print(f'{pos:>5}: "{RED}{line}{ENDC}"')
def prind_add(line: str):
print(f' "{GREEN}{line}{ENDC}"\n')
def print_del_reason(reason: str):
print(f" \t{YELLOW}Deletion: {reason}{ENDC}\n")
# The spaces are captured to preserve indentation.
COMMENT_LINK_REGEX = re.compile((
r"^(?P<spaces>\s*)"
r"//(?P<c>!|/) "
r"\[(?P<elem>.*?)\]: "
r"(?P<supers>\.\./)*"
r"(?:(?P<crate>std|core|alloc)/)?"
r"(?P<intermediates>(?:.*/))?"
r"(?:enum|struct|primitive|trait|constant|type|fn|macro)\."
r"(?P<elem2>.*)\.html"
r"(?:#(?:method|variant|tymethod)\.(?P<additional>\S*))?$"
))
LOCAL_PATH = re.compile((
r"^(?:\s*)//(?:!|/) "
r"\[`?(?P<elem>.*?)`?\]: "
r"(?P<elem2>.*)$"
))
def handle_comment_link(line: str, pos: int, crate: str) -> Optional[str]:
"""Try to parse a line according to the `COMMENT_LINK_REGEX`. If it fails,
returns the original `line`. If it succeeds, build the new line using
intra-doc links as well as possible with the available information.
Once this is done, checks it the link became something like:
```rust
/// [`Path`]: Path
```
In which case it returns `None`, marking a deleted line, else it returns the
transformed line.
Args:
line (str): The line to work on.
pos (int): Line number, used for a pretty display.
crate (str): The root crate (`std`, `core` or `alloc`).
Returns:
Optional[str]: Either the original line (no match), the transformed line
(non-trivial intra-link) or `None` (trivial intra-link).
"""
com = COMMENT_LINK_REGEX.search(line)
if com is None:
return line
# Building the new line.
new = f"{com.group('spaces')}//{com.group('c')} [{com.group('elem')}]: "
root = com.group("crate")
supers = com.group("supers")
if root is not None:
new = f"{new}{'crate' if root == crate else root}::"
elif supers is not None:
new += "super::" * supers.count("/")
intermediates = com.group("intermediates")
if intermediates is not None:
# Starting with http(s): probably an external link we don't want to mess up
if intermediates.startswith("http"):
return line
if intermediates != "./":
new += intermediates.replace("/", "::")
new += com.group("elem2")
additional = com.group("additional")
if additional is not None:
new += "::" + additional
print_del(line, pos)
# Checking for a local path. Those lines will be deleted instead of replaced.
local = LOCAL_PATH.match(new)
if local is not None and local.group("elem") == local.group("elem2"):
print_del_reason("local path")
# Don't append the current line to the list of lines to write.
return None
prind_add(new)
return new
COMMENT_MODULE_REGEX = re.compile((
r"^(?P<spaces>\s*)"
r"//(?P<c>!|/) "
r"\[(?P<elem>.*?)\]: "
r"(?P<supers>\.\./)*"
r"(?:(?P<crate>std|core|alloc)/)?"
r"(?P<mods>(?:.*?/)*)"
r"index\.html$"
))
def handle_module_link(line: str, pos: int, crate: str) -> Optional[str]:
"""Try to parse a line according to the `COMMENT_MODULE_REGEX`. If it fails,
returns the original `line`. If it succeeds, build the new line using
intra-doc links as well as possible with the available information.
Once this is done, checks it the link became something like:
```rust
/// [`mod1::mod2`]: mod1::mod2
```
In which case it returns `None`, marking a deleted line, else it returns the
transformed line.
Args:
line (str): The line to work on.
pos (int): Line number, used for a pretty display.
crate (str): The root crate (`std`, `core` or `alloc`).
Returns:
Optional[str]: Either the original line (no match), the transformed line
(non-trivial intra-link) or `None` (trivial intra-link).
"""
com = COMMENT_MODULE_REGEX.search(line)
if com is None:
return line
# Building the new line.
new = f"{com.group('spaces')}//{com.group('c')} [{com.group('elem')}]: "
root = com.group("crate")
supers = com.group("supers")
if root is not None:
new = f"{new}{'crate' if root == crate else root}::"
elif supers is not None:
new += "super::" * supers.count("/")
mods = com.group("mods")
if mods is not None:
new += mods.replace("/", "::")[:-2]
print_del(line, pos)
# Checking for a local path. Those lines will be deleted instead of replaced.
local = LOCAL_PATH.match(new)
if local is not None and local.group("elem") == local.group("elem2"):
print_del_reason("local path")
# Don't append the current line to the list of lines to write.
return None
prind_add(new)
return new
IMPL_REGEX = re.compile((
r"^\s*"
r"(?:impl|(?:pub(?:\(.+\))? )?trait)"
r"(?:<.*>)? "
r"(?:.* for )?"
r"(?P<type>\S+)"
r"(?:<.*>)?"
))
METHOD_ANCHOR = re.compile((
r"^(?P<spaces>\s*)"
r"//(?P<c>!|/) "
r"\[(?P<elem>.*)\]: "
r"#(?:method|variant|tymethod)\.(?P<additional>\S*)$"
))
def handle_method_anchor(line: str, pos: int, curr_impl: str) -> str:
"""Try to parse links according to the `METHOD_ANCHOR` regex. When it fails
it returns the passed line without modification else it builds a new line
from the data found in the line.
The `curr_impl` parameter must contains a type or path to a type.
Args:
line (str): The line to parse.
pos (int): Position in the file, for pretty printing.
curr_impl (str): The current type of the `impl` block, used to insert
the type name before the method/variant name.
Returns:
str: Either the original line or the transformed line.
"""
method = METHOD_ANCHOR.match(line)
if method is None:
return line
spaces = method.group("spaces")
c = method.group("c")
elem = method.group("elem")
additional = method.group("additional")
print_del(line, pos)
line = f"{spaces}//{c} [{elem}]: {curr_impl}::{additional}"
prind_add(line)
return line
# Used to detect an emtpy doc comment line.
EMPTY_DOC_COMMENT = re.compile((r"^\s*//(?:!|/)$"))
# Used to detect doc comment lines, empty or not. This is the same regex as
# `EMPTY_DOC_COMMENT` without the ending `$`.
IS_DOC_COMMENT_LINE = re.compile((r"^\s*//(?:!|/)"))
def apply_regex(path: str, write_result: bool, crate: str):
"""Apply the regex searching for comments to transform, optionally rewriting
the file with the proposed transformation.
Args:
path (str): The path to the file.
write_result (bool): If `True`, will rewrite the file to include the
transformations.
crate (str): The root crate (`std`, `core`, `alloc`).
"""
lines = []
with open(path, "r", encoding="utf-8") as f:
# Used to do a nice line counter display.
pos = 0
in_impl = None
prev_line = ""
while True:
# This will consume the file line by line, avoiding to load it
# entirely in memory for nothing.
line = f.readline()
if line == "":
break
# Keeping the spaces on the left.
line = line.rstrip()
pos += 1
if EMPTY_DOC_COMMENT.match(prev_line) is not None:
# This is notably triggered when a trivial path is deleted
# in the middle of a bigger comment. The next line (the `line`
# here) and the previous line (`prev_line`) may be two empty
# comment lines: we delete one to avoid double /// empty lines.
if EMPTY_DOC_COMMENT.match(line):
print_del(line, pos)
print_del_reason("Consecutives empty comments line")
continue
# This is trigerred when reaching the previously deleted link
# was the last line in the doc comment and its previous line
# was an empty comment line, leading to a single /// line
# at the end of the comment.
elif IS_DOC_COMMENT_LINE.match(line) is None:
print_del(lines.pop().rstrip(), pos - 1)
print_del_reason("Empty comment line at the end of a comment")
line = handle_comment_link(line, pos, crate)
if line is None:
continue
line = handle_module_link(line, pos, crate)
if line is None:
continue
impl_match = IMPL_REGEX.match(line)
if impl_match is not None:
in_impl = impl_match.group("type")
if in_impl is not None:
line = handle_method_anchor(line, pos, in_impl)
prev_line = line
# The `writelines` method does not automatically append the '\n'.
lines.append(line + "\n")
if write_result:
with open(path, "w", encoding="utf-8") as f:
f.writelines(lines)
# ==== MAIN PART ====
PARSER = ap.ArgumentParser(description=(
"Transform a rust source file to use intra doc links when possible. "
"By default it only show the transformations but does nothing. "
"When detecting local paths, it will delete instead of replace. "
"Note: THIS DOES NOT CATCH EVERYTHING. You should still review after that."
))
PARSER.add_argument(
"files", metavar="FILES", type=str, nargs="+", help="The files to transform."
)
PARSER.add_argument(
"-a", "--apply", action="store_true",
help="Apply the transformations. Default is False.",
)
PARSER.add_argument(
"-c", "--crate", type=str, nargs="?", default="std", action="store",
choices=("alloc", "core", "std"), help=(
"The root crate in which the file is. This will help put `crate::` "
"instead of `std::` for example. Default is `std`."
)
)
ARGS = PARSER.parse_args()
for path in ARGS.files:
print(f"{path}")
print("=" * len(path) + "\n")
apply_regex(path, ARGS.apply, ARGS.crate)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment