Last active
January 11, 2018 06:26
-
-
Save pquentin/188f139be4c1bd5731b54bb2f7c29e41 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import difflib | |
import glob | |
import os | |
import sys | |
from tokenize import tokenize as std_tokenize | |
from tokenize import ASYNC, AWAIT, NAME | |
import click | |
ASYNC_TO_SYNC = { | |
'__aenter__': '__enter__', | |
'__aexit__': '__exit__', | |
'__aiter__': '__iter__', | |
'__anext__': '__next__', | |
# TODO StopIteration is still accepted in Python 2, but the right change | |
# is 'raise StopAsyncIteration' -> 'return' since we want to use bleached | |
# code in Python 3.7+ | |
'StopAsyncIteration': 'StopIteration', | |
} | |
# This script removes 'await' and 'async' keywords and rewrites some tokens. | |
# | |
# This is done using `tokenize` from the Python standard library to parse an | |
# input file into tokens. Since untokenize(tokenize(f)) != f using the standard | |
# library, we have to provide our own that wrap the tokenize function from the | |
# standard library: | |
# * tokenize outputs tuples: each token and the possible whitespace that | |
# needs to go before that token | |
# * bleach_tokens rewrite this token stream to remove async color | |
# * untokenize assembles the tokens by simply concatenating all vlues | |
def tokenize(f): | |
last_end = (0, 0) | |
last_line = 0 | |
for toknum, tokval, start, end, _ in std_tokenize(f.readline): | |
if start == end: | |
continue # nothing to print | |
if start[0] > last_line: | |
last_end = (start[0], 0) | |
last_line = start[0] | |
space = '' | |
if start > last_end: | |
assert start[0] == end[0] | |
space = ' ' * (start[1] - last_end[1]) | |
yield (space, toknum, tokval) | |
last_end = end | |
def bleach_tokens(tokens): | |
# TODO __await__, ...? | |
used_space = None | |
for space, toknum, tokval in tokens: | |
if toknum in [ASYNC, AWAIT]: # TODO Python 3.7+ | |
# When remove async or await, we want to use the whitespace that | |
# was before async/await before the next token so that | |
# `print(await stuff)` becomes `print(stuff)` and not | |
# `print( stuff)` | |
used_space = space | |
else: | |
if toknum == NAME and tokval in ASYNC_TO_SYNC: | |
tokval = ASYNC_TO_SYNC[tokval] | |
if used_space is None: | |
used_space = space | |
yield (used_space, tokval) | |
used_space = None | |
def untokenize(tokens): | |
output = ''.join(space + tokval for space, tokval in tokens) | |
for line in output.split('\n'): | |
yield line.rstrip(' ') | |
def get_diff(initial, result, filename): | |
return difflib.unified_diff( | |
[l + '\n' for l in initial.split('\n')], | |
[l + '\n' for l in result.split('\n')], | |
fromfile='{} (original)'.format(filename), | |
tofile='{} (bleached)'.format(filename)) | |
def get_paths(path): | |
if os.path.isfile(path): | |
yield path | |
else: | |
for expand in ('**/*.py', '**.py'): | |
yield from glob.iglob(os.path.join('{}/{}'.format(path, expand))) | |
@click.command() | |
@click.option( | |
'-w', '--write', is_flag=True, help='write changes to the filesystem') | |
@click.argument('inpath', type=click.Path(exists=True, dir_okay=True)) | |
@click.argument('outpath', type=click.Path(dir_okay=True)) | |
def bleach(write, inpath, outpath): | |
for filepath in get_paths(inpath): | |
diff = None | |
with open(filepath, 'rb') as f: | |
initial = f.read() | |
f.seek(0) | |
tokens = tokenize(f) | |
tokens = bleach_tokens(tokens) | |
result = '\n'.join(untokenize(tokens)) | |
diff = list(get_diff(initial.decode('utf8'), result, filepath)) | |
if write: | |
outfilepath = filepath.replace(inpath, outpath) | |
sys.stdout.writelines(diff) | |
os.makedirs(os.path.dirname(outfilepath), exist_ok=True) | |
# TODO get encoding from tokenize | |
with open(outfilepath, 'w') as f: | |
print(result, file=f, end='') | |
if __name__ == '__main__': | |
assert sys.version_info.major >= 3 | |
bleach() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment