Created
August 14, 2019 23:38
-
-
Save charmoniumQ/2a8c11f083fd64f112c7db4247ef2f64 to your computer and use it in GitHub Desktop.
This implements a preprocessor macro system.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import click | |
import re | |
import io | |
import contextlib | |
#import colorama | |
#from termcolor import cprint | |
''' | |
# Overview # | |
This implements a preprocessor macro system. | |
There are two commands: capture (which records source code) and output | |
(which execs code, copying stdout in place to the file). You can | |
capture Python source code, or source code from your language. | |
# Capture # | |
To capture, place code between capture-begin and -end tags. After the | |
begin tag, declare a name for the capture. See the (fairly liberal) | |
regex in code for exact specification. See Examples 1 and 2. | |
You can also capture a indentation, trimming off a certain number of | |
characters from the end. See Example 3. | |
# Output # | |
If you want to exec captured code and place its output in this | |
file. Do this by writing output-begin and -end tags, using the name of | |
the capture. The actual capture site can be after the output site. | |
There is no problem in using a capture before it is defined. See the | |
regex in code for exact specification. See Example 4. | |
The output will be placed between these two lines. Anything between | |
these two lines will be nuked. | |
The code will be executed in an environment which has access to a | |
dictionary `captures` of all of the captured code. Again, a capture | |
can be accessed prior to its actual capture-declaration site, since | |
the capturing-phase is run before the output-phase. | |
# Example # | |
// file main.c | |
// PY_AUTOGEN_CAPTURE_BEGIN vars (example 2) | |
int a; | |
float b; | |
float c; | |
// PY_AUTOGEN_CAPTURE_END | |
/* | |
PY_AUTOGEN_CAPTURE_BEGIN gen_print_stmt (example 1) | |
import re | |
types_and_names = [] | |
for line in captures['vars']: | |
match = re.match(r'\s*(\S+)\s+(\S+)\s*;', line) | |
if match: | |
types_and_names.append(match.groups()) | |
type_fmt = {'float': '%f', 'int': '%d'} | |
ind = captures['ind'] | |
fmt_str = ' '.join( | |
f'{name}={type_fmt[type]}' for type, name in types_and_names | |
) | |
args = ', '.join(name for type, name in types_and_names) | |
print(f'{ind}printf("{fmt_str}", {args});') | |
PY_AUTOGEN_CAPTURE_END | |
*/ | |
int main() { | |
printf( | |
// PY_AUTOGEN_CAPTURE_INDENTATION 3 ind (example 4) | |
// PY_AUTOGEN_OUTPUT_BEGIN gen_print_stmt (example 5) | |
// PY_AUTOGEN_OUTPUT_END | |
); | |
return 0; | |
} | |
''' | |
capture_indentation = re.compile(r'(.*)PY_AUTOGEN_CAPTURE_INDENTATION\s+(\d+)\s+(\S+)\s.*', re.DOTALL) | |
begin_output = re.compile(r'.*PY_AUTOGEN_OUTPUT_BEGIN\s+(\S+)\s.*', re.DOTALL) | |
end_output = re.compile(r'.*PY_AUTOGEN_OUTPUT_END.*', re.DOTALL) | |
begin_capture = re.compile(r'.*PY_AUTOGEN_CAPTURE_BEGIN\s+(\S+)\s.*', re.DOTALL) | |
end_capture = re.compile(r'.*PY_AUTOGEN_CAPTURE_END.*', re.DOTALL) | |
def find_captures(iter): | |
capturing = False | |
captures = dict() | |
capture_name = '' | |
for line in iter: | |
if not capturing: | |
if begin_capture.match(line): | |
capture_name = begin_capture.match(line).group(1) | |
capturing = True | |
captures[capture_name] = [] | |
elif capture_indentation.match(line): | |
match = capture_indentation.match(line) | |
indentation, minus_chars_str, capture_name = match.groups() | |
captures[capture_name] = indentation[:-int(minus_chars_str)] | |
else: | |
pass | |
else: | |
if end_capture.match(line): | |
capturing = False | |
else: | |
captures[capture_name].append(line) | |
if capturing: | |
raise ValueError(f'EOF reached whIle capturing {capture_name}') | |
return captures | |
def exec_code(code, locals): | |
fake_stdout = io.StringIO() | |
with contextlib.redirect_stdout(fake_stdout): | |
exec(''.join(code), locals) | |
return io.StringIO(fake_stdout.getvalue()) | |
def substitute_output(iter, captures): | |
old_output = False | |
for line in iter: | |
if not old_output: | |
if begin_output.match(line): | |
capture_name = begin_output.match(line).group(1) | |
old_output = True | |
yield line | |
yield from exec_code(captures[capture_name], dict(captures=captures)) | |
else: | |
yield line | |
else: | |
if end_output.match(line): | |
old_output = False | |
yield line | |
else: | |
# do not yield line, so it is not reproduced in output | |
pass | |
if old_output: | |
raise ValueError(f'EOF reached while looking for end-output tag ({end_output!r})') | |
return captures | |
@click.command() | |
@click.argument('file', type=click.File('r+')) | |
def main(file): | |
captures = find_captures(file) | |
file.seek(0) | |
lines = list(substitute_output(file, captures)) | |
file.seek(0) | |
for line in lines: | |
file.write(line) | |
if __name__ == '__main__': | |
main() | |
# TODO: rewrite using iterator-labeler | |
# each elem maps to a concrete label or a special 'propagate' label | |
# groups = groupby(propagate_labels(map_with_arg(labeler, iterator))) | |
# for label, group in groups: | |
# if label == 'blah' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment