Created
September 12, 2022 19:24
Revisions
-
rldotai created this gist
Sep 12, 2022 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,51 @@ import io, tokenize def extract_comments(code: str | io.TextIOBase) -> str: """ Extract comments from a piece of Python code, returning a string of *just* the comments. Example: >>> extract_comments(r''' ... # A comment ... def identity(x): ... "This is a docstring, not a comment." ... # Here's a comment inside a function ... return x # and an inline comment ... ... ''') "# A comment\n# Here's a comment inside a function\n# and an inline comment\n" A modified version of: https://stackoverflow.com/a/34512388 set to use Python 3. """ res = [] last = None if isinstance(code, str): buffer = io.StringIO(code) else: buffer = code # pass in stringio.readline to generate_tokens for toktype, tokval, begin, end, line in tokenize.generate_tokens(buffer.readline): if toktype == tokenize.COMMENT: res.append((toktype, tokval)) elif toktype in (tokenize.NEWLINE, tokenize.NL) and last == tokenize.COMMENT: res.append((toktype, tokval)) else: pass # Record the token type (for preserving newlines) last = toktype return tokenize.untokenize(res) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument( "input", type=argparse.FileType("r"), help="Input to extract comments from" ) args = parser.parse_args() print(extract_comments(args.input.read()))