Skip to content

Instantly share code, notes, and snippets.

@rldotai
Created September 12, 2022 19:24

Revisions

  1. rldotai created this gist Sep 12, 2022.
    51 changes: 51 additions & 0 deletions get_comments.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,51 @@
    import io, tokenize


    def extract_comments(code: str | io.TextIOBase) -> str:
    """
    Extract comments from a piece of Python code, returning a string of
    *just* the comments.
    Example:
    >>> extract_comments(r'''
    ... # A comment
    ... def identity(x):
    ... "This is a docstring, not a comment."
    ... # Here's a comment inside a function
    ... return x # and an inline comment
    ...
    ... ''')
    "# A comment\n# Here's a comment inside a function\n# and an inline comment\n"
    A modified version of: https://stackoverflow.com/a/34512388
    set to use Python 3.
    """
    res = []
    last = None
    if isinstance(code, str):
    buffer = io.StringIO(code)
    else:
    buffer = code
    # pass in stringio.readline to generate_tokens
    for toktype, tokval, begin, end, line in tokenize.generate_tokens(buffer.readline):
    if toktype == tokenize.COMMENT:
    res.append((toktype, tokval))
    elif toktype in (tokenize.NEWLINE, tokenize.NL) and last == tokenize.COMMENT:
    res.append((toktype, tokval))
    else:
    pass

    # Record the token type (for preserving newlines)
    last = toktype
    return tokenize.untokenize(res)


    if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument(
    "input", type=argparse.FileType("r"), help="Input to extract comments from"
    )
    args = parser.parse_args()
    print(extract_comments(args.input.read()))