Created
August 10, 2020 00:27
-
-
Save nevercast/53820d1694865f0de707f3ce2db93ab5 to your computer and use it in GitHub Desktop.
Check Python file for side effects on import
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#### MIT Licence | |
# Copyright 2020 Josh "nevercast" Lloyd | |
# This notice must remain intact for all copies or substantial portions of the Software | |
#### | |
# First release: 2020-08-10 | |
from __future__ import print_function | |
import ast | |
import sys | |
def _ast_node_map(*nodes): | |
""" For each node in nodes, looks up ast.node, if it exists it's part | |
of the return value, if it doesn't, it's ignored. This allows us | |
to check for nodes that exist in this version of Python and | |
ignore ones that do not. | |
""" | |
return tuple(node for node in (getattr(ast, node_name, None) for node_name in nodes) if node is not None) | |
# Clean nodes are nodes that do not inherently have side effects | |
CLEAN_NODES = _ast_node_map('AsyncFunctionDef', 'FunctionDef', 'ClassDef') | |
# Import nodes need to be added to a stack when doing a deep search | |
IMPORT_NODES = _ast_node_map('Import', 'ImportFrom') | |
# Dirty nodes aren't immediately rejected, but instead must contain only Dirty or Clean nodes themselves. | |
DIRTY_NODES = _ast_node_map('If', 'Try', 'TryExcept') | |
# Every other type of node is immediately rejected as a side effect | |
# Note: 'Assign' isn't always bad, that's how you set globals. By always rejecting them however, I leave it | |
# up to the developer to decide if the particular assignment is bad or not. Being aware of all global | |
# assignments inside a file on import could provide helpful insight. If this tool was ever used as | |
# some sort of lint checker, it would need options on how to handle Assign better, that's out of scope | |
# currently though. | |
# Can we handle __name__ checks, this is currently implemented dirty by checking the `If` line with a string | |
# compare. It was faster than covering all the edge cases with AST. Currently we only support it if we are | |
# using an unparser than gives back valid Python source, i.e. astunparse | |
CAN_HANDLE_DUNDER_NAME = False | |
# Takes a node and returns a string representation of it | |
def _ast_unparser(node, tree=None, code=None): | |
return ast.dump(node) | |
# Python 3.8 offers a better printer for ast => code | |
if hasattr(ast, 'get_source_segment'): | |
def _ast_unparser(node, tree=None, code=None): | |
if code is None: | |
return ast.dump(node) | |
return ast.get_source_segment(code, node) | |
# If astunparse is installed, that's the best ast printer | |
try: | |
import astunparse | |
def _ast_unparser(node, tree=None, code=None): | |
return astunparse.unparse(node) | |
CAN_HANDLE_DUNDER_NAME = True | |
except ImportError: | |
pass | |
def verify_file_is_sideeffect_free(filename_or_ast, inspect_imports=False): | |
if isinstance(filename_or_ast, str): | |
with open(filename_or_ast) as file_handle: | |
source_code = file_handle.read() | |
tree = ast.parse(source_code) | |
else: | |
tree = filename_or_ast | |
try: | |
import inspect | |
source_code = inspect.getsource(tree) | |
except Exception: | |
source_code = "" | |
rejections = {} # root: [children] | |
imports = [] | |
inspection_stack = tree.body[:] | |
def collect_children(parent_node, children, stack): | |
for child in children: | |
child.parent = parent_node | |
stack += children | |
for node in inspection_stack: | |
if isinstance(node, CLEAN_NODES): | |
continue | |
if isinstance(node, IMPORT_NODES): | |
imports.append(node) | |
continue | |
if isinstance(node, DIRTY_NODES): | |
LIST_ATTRS = ('body', 'values', 'ops', 'comparators', 'handlers') | |
SCALAR_ATTRS = ('op', 'left', 'right', 'value') | |
# Special handling for if __name__, we only inspect the test rather than the body | |
# Originally I started checking the AST, but it gets complicated fast | |
# Just a simple case of `if __name__.startswith()` becomes painful. So I cheated... | |
# I just render the source, and check the first line contains __name__, that's it. | |
if CAN_HANDLE_DUNDER_NAME and isinstance(node, ast.If): | |
if_source = _ast_unparser(node=node, tree=tree, code=source_code).strip().splitlines()[0] | |
if '__name__' in if_source: | |
LIST_ATTRS = ('values', 'ops', 'comparators') | |
SCALAR_ATTRS = ('op', 'left', 'right', 'value') | |
print('#', if_source, ' # Ignored as a side effect') | |
for list_attr in LIST_ATTRS: | |
collect_children(node, getattr(node, list_attr, []), inspection_stack) | |
for scalar_attr in SCALAR_ATTRS: | |
if hasattr(node, scalar_attr): | |
collect_children(node, [getattr(node, scalar_attr)], inspection_stack) | |
else: | |
# Get the source code for this rejected segment | |
# node_source = _ast_unparser(node=node, tree=tree, code=source_code) | |
# Check if this node has parents, in which case we follow the dirty train | |
rejected_node = node | |
while hasattr(node, 'parent'): | |
node = node.parent | |
if node in rejections: | |
rejection = rejections[node] | |
else: | |
rejections[node] = rejection = [] | |
if rejected_node != node and rejected_node not in rejection: | |
rejection.append(rejected_node) | |
# Print the rejections | |
for root, children in rejections.items(): | |
# Optionally we can output each of the lines inside a Dirty block that tripped us up | |
# this is a little hard to read on the terminal unless we perhaps colored each bad line. | |
# Coloring each line is something I intend to implement in the future. | |
# if children: | |
# print('Multiple rejections:') | |
# for child in children: | |
# print(_ast_unparser(node=child, tree=tree, code=source_code).strip()) | |
# print('Inside block:') | |
# else: | |
print(_ast_unparser(node=root, tree=tree, code=source_code).strip()) | |
print('#', len(imports), 'imports were not checked.') | |
if __name__ == '__main__': | |
def main(): | |
if 2 != len(sys.argv): | |
print('Syntax:', __file__, 'file_to_check.py') | |
return | |
target_file = sys.argv[1] | |
print('# Checking', target_file) | |
verify_file_is_sideeffect_free(target_file) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment