-
-
Save poliorcetics/b3cba87c02711fe90cfd6c10ece52c1c to your computer and use it in GitHub Desktop.
Converter from path-based links to intra-doc links for the `rust-lang/rust` project.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Converter from path-based links to intra-doc links for | |
the `rust-lang/rust` project. This is not perfect and | |
the modified files should still be reviewed after running it. | |
By default it will only print the changes and not apply them, | |
use `-a` (`--apply`) to write them. | |
If you are modifying `core` or `alloc` instead of `std`, | |
you can pass the `-c core` (`--crate core`) flag to mark | |
the change in the root crate. | |
Required: Python 3.6 or higher. | |
Basic usage: `python3 converter.py path/to/file.rs` | |
See `python3 converter.py -h` for help. | |
""" | |
import re | |
import argparse as ap | |
from typing import Match, Optional | |
YELLOW = '\033[93m' | |
GREEN = '\033[92m' | |
RED = '\033[91m' | |
ENDC = '\033[0m' | |
def print_del(line: str, pos: int): | |
print(f'{pos:>5}: "{RED}{line}{ENDC}"') | |
def prind_add(line: str): | |
print(f' "{GREEN}{line}{ENDC}"\n') | |
def print_del_reason(reason: str): | |
print(f" \t{YELLOW}Deletion: {reason}{ENDC}\n") | |
# The spaces are captured to preserve indentation. | |
COMMENT_LINK_REGEX = re.compile(( | |
r"^(?P<spaces>\s*)" | |
r"//(?P<c>!|/) " | |
r"\[(?P<elem>.*?)\]: " | |
r"(?P<supers>\.\./)*" | |
r"(?:(?P<crate>std|core|alloc)/)?" | |
r"(?P<intermediates>(?:.*/))?" | |
r"(?:enum|struct|primitive|trait|constant|type|fn|macro)\." | |
r"(?P<elem2>.*)\.html" | |
r"(?:#(?:method|variant|tymethod)\.(?P<additional>\S*))?$" | |
)) | |
LOCAL_PATH = re.compile(( | |
r"^(?:\s*)//(?:!|/) " | |
r"\[`?(?P<elem>.*?)`?\]: " | |
r"(?P<elem2>.*)$" | |
)) | |
def handle_comment_link(line: str, pos: int, crate: str) -> Optional[str]: | |
"""Try to parse a line according to the `COMMENT_LINK_REGEX`. If it fails, | |
returns the original `line`. If it succeeds, build the new line using | |
intra-doc links as well as possible with the available information. | |
Once this is done, checks it the link became something like: | |
```rust | |
/// [`Path`]: Path | |
``` | |
In which case it returns `None`, marking a deleted line, else it returns the | |
transformed line. | |
Args: | |
line (str): The line to work on. | |
pos (int): Line number, used for a pretty display. | |
crate (str): The root crate (`std`, `core` or `alloc`). | |
Returns: | |
Optional[str]: Either the original line (no match), the transformed line | |
(non-trivial intra-link) or `None` (trivial intra-link). | |
""" | |
com = COMMENT_LINK_REGEX.search(line) | |
if com is None: | |
return line | |
# Building the new line. | |
new = f"{com.group('spaces')}//{com.group('c')} [{com.group('elem')}]: " | |
root = com.group("crate") | |
supers = com.group("supers") | |
if root is not None: | |
new = f"{new}{'crate' if root == crate else root}::" | |
elif supers is not None: | |
new += "super::" * supers.count("/") | |
intermediates = com.group("intermediates") | |
if intermediates is not None: | |
# Starting with http(s): probably an external link we don't want to mess up | |
if intermediates.startswith("http"): | |
return line | |
if intermediates != "./": | |
new += intermediates.replace("/", "::") | |
new += com.group("elem2") | |
additional = com.group("additional") | |
if additional is not None: | |
new += "::" + additional | |
print_del(line, pos) | |
# Checking for a local path. Those lines will be deleted instead of replaced. | |
local = LOCAL_PATH.match(new) | |
if local is not None and local.group("elem") == local.group("elem2"): | |
print_del_reason("local path") | |
# Don't append the current line to the list of lines to write. | |
return None | |
prind_add(new) | |
return new | |
COMMENT_MODULE_REGEX = re.compile(( | |
r"^(?P<spaces>\s*)" | |
r"//(?P<c>!|/) " | |
r"\[(?P<elem>.*?)\]: " | |
r"(?P<supers>\.\./)*" | |
r"(?:(?P<crate>std|core|alloc)/)?" | |
r"(?P<mods>(?:.*?/)*)" | |
r"index\.html$" | |
)) | |
def handle_module_link(line: str, pos: int, crate: str) -> Optional[str]: | |
"""Try to parse a line according to the `COMMENT_MODULE_REGEX`. If it fails, | |
returns the original `line`. If it succeeds, build the new line using | |
intra-doc links as well as possible with the available information. | |
Once this is done, checks it the link became something like: | |
```rust | |
/// [`mod1::mod2`]: mod1::mod2 | |
``` | |
In which case it returns `None`, marking a deleted line, else it returns the | |
transformed line. | |
Args: | |
line (str): The line to work on. | |
pos (int): Line number, used for a pretty display. | |
crate (str): The root crate (`std`, `core` or `alloc`). | |
Returns: | |
Optional[str]: Either the original line (no match), the transformed line | |
(non-trivial intra-link) or `None` (trivial intra-link). | |
""" | |
com = COMMENT_MODULE_REGEX.search(line) | |
if com is None: | |
return line | |
# Building the new line. | |
new = f"{com.group('spaces')}//{com.group('c')} [{com.group('elem')}]: " | |
root = com.group("crate") | |
supers = com.group("supers") | |
if root is not None: | |
new = f"{new}{'crate' if root == crate else root}::" | |
elif supers is not None: | |
new += "super::" * supers.count("/") | |
mods = com.group("mods") | |
if mods is not None: | |
new += mods.replace("/", "::")[:-2] | |
print_del(line, pos) | |
# Checking for a local path. Those lines will be deleted instead of replaced. | |
local = LOCAL_PATH.match(new) | |
if local is not None and local.group("elem") == local.group("elem2"): | |
print_del_reason("local path") | |
# Don't append the current line to the list of lines to write. | |
return None | |
prind_add(new) | |
return new | |
IMPL_REGEX = re.compile(( | |
r"^\s*" | |
r"(?:impl|(?:pub(?:\(.+\))? )?trait)" | |
r"(?:<.*>)? " | |
r"(?:.* for )?" | |
r"(?P<type>\S+)" | |
r"(?:<.*>)?" | |
)) | |
METHOD_ANCHOR = re.compile(( | |
r"^(?P<spaces>\s*)" | |
r"//(?P<c>!|/) " | |
r"\[(?P<elem>.*)\]: " | |
r"#(?:method|variant|tymethod)\.(?P<additional>\S*)$" | |
)) | |
def handle_method_anchor(line: str, pos: int, curr_impl: str) -> str: | |
"""Try to parse links according to the `METHOD_ANCHOR` regex. When it fails | |
it returns the passed line without modification else it builds a new line | |
from the data found in the line. | |
The `curr_impl` parameter must contains a type or path to a type. | |
Args: | |
line (str): The line to parse. | |
pos (int): Position in the file, for pretty printing. | |
curr_impl (str): The current type of the `impl` block, used to insert | |
the type name before the method/variant name. | |
Returns: | |
str: Either the original line or the transformed line. | |
""" | |
method = METHOD_ANCHOR.match(line) | |
if method is None: | |
return line | |
spaces = method.group("spaces") | |
c = method.group("c") | |
elem = method.group("elem") | |
additional = method.group("additional") | |
print_del(line, pos) | |
line = f"{spaces}//{c} [{elem}]: {curr_impl}::{additional}" | |
prind_add(line) | |
return line | |
# Used to detect an emtpy doc comment line. | |
EMPTY_DOC_COMMENT = re.compile((r"^\s*//(?:!|/)$")) | |
# Used to detect doc comment lines, empty or not. This is the same regex as | |
# `EMPTY_DOC_COMMENT` without the ending `$`. | |
IS_DOC_COMMENT_LINE = re.compile((r"^\s*//(?:!|/)")) | |
def apply_regex(path: str, write_result: bool, crate: str): | |
"""Apply the regex searching for comments to transform, optionally rewriting | |
the file with the proposed transformation. | |
Args: | |
path (str): The path to the file. | |
write_result (bool): If `True`, will rewrite the file to include the | |
transformations. | |
crate (str): The root crate (`std`, `core`, `alloc`). | |
""" | |
lines = [] | |
with open(path, "r", encoding="utf-8") as f: | |
# Used to do a nice line counter display. | |
pos = 0 | |
in_impl = None | |
prev_line = "" | |
while True: | |
# This will consume the file line by line, avoiding to load it | |
# entirely in memory for nothing. | |
line = f.readline() | |
if line == "": | |
break | |
# Keeping the spaces on the left. | |
line = line.rstrip() | |
pos += 1 | |
if EMPTY_DOC_COMMENT.match(prev_line) is not None: | |
# This is notably triggered when a trivial path is deleted | |
# in the middle of a bigger comment. The next line (the `line` | |
# here) and the previous line (`prev_line`) may be two empty | |
# comment lines: we delete one to avoid double /// empty lines. | |
if EMPTY_DOC_COMMENT.match(line): | |
print_del(line, pos) | |
print_del_reason("Consecutives empty comments line") | |
continue | |
# This is trigerred when reaching the previously deleted link | |
# was the last line in the doc comment and its previous line | |
# was an empty comment line, leading to a single /// line | |
# at the end of the comment. | |
elif IS_DOC_COMMENT_LINE.match(line) is None: | |
print_del(lines.pop().rstrip(), pos - 1) | |
print_del_reason("Empty comment line at the end of a comment") | |
line = handle_comment_link(line, pos, crate) | |
if line is None: | |
continue | |
line = handle_module_link(line, pos, crate) | |
if line is None: | |
continue | |
impl_match = IMPL_REGEX.match(line) | |
if impl_match is not None: | |
in_impl = impl_match.group("type") | |
if in_impl is not None: | |
line = handle_method_anchor(line, pos, in_impl) | |
prev_line = line | |
# The `writelines` method does not automatically append the '\n'. | |
lines.append(line + "\n") | |
if write_result: | |
with open(path, "w", encoding="utf-8") as f: | |
f.writelines(lines) | |
# ==== MAIN PART ==== | |
PARSER = ap.ArgumentParser(description=( | |
"Transform a rust source file to use intra doc links when possible. " | |
"By default it only show the transformations but does nothing. " | |
"When detecting local paths, it will delete instead of replace. " | |
"Note: THIS DOES NOT CATCH EVERYTHING. You should still review after that." | |
)) | |
PARSER.add_argument( | |
"files", metavar="FILES", type=str, nargs="+", help="The files to transform." | |
) | |
PARSER.add_argument( | |
"-a", "--apply", action="store_true", | |
help="Apply the transformations. Default is False.", | |
) | |
PARSER.add_argument( | |
"-c", "--crate", type=str, nargs="?", default="std", action="store", | |
choices=("alloc", "core", "std"), help=( | |
"The root crate in which the file is. This will help put `crate::` " | |
"instead of `std::` for example. Default is `std`." | |
) | |
) | |
ARGS = PARSER.parse_args() | |
for path in ARGS.files: | |
print(f"{path}") | |
print("=" * len(path) + "\n") | |
apply_regex(path, ARGS.apply, ARGS.crate) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment