Skip to content

Instantly share code, notes, and snippets.

@seberg
Last active April 24, 2020 02:49
Show Gist options
  • Save seberg/548a2fa9187739ff33ec406e933fa8a4 to your computer and use it in GitHub Desktop.
Save seberg/548a2fa9187739ff33ec406e933fa8a4 to your computer and use it in GitHub Desktop.
Hacky script to analyze python kwargs passing
#!/usr/bin/env python3
from pathlib import Path
import ast
import tokenize
class FindFuncs(ast.NodeVisitor):
# Very dirtily adapted and stolen from NumPy tests...
def __init__(self, filename, include_attributes=True):
super().__init__()
self.__filename = filename
self.all_kwargs = []
self.identical_kwargs = []
self.num_args = []
self.num_kwargs = []
self.include_attributes = include_attributes
def visit_Call(self, node):
ast.NodeVisitor.generic_visit(self, node)
self.num_args.append(len(node.args))
self.num_kwargs.append(len(node.keywords))
for keyword in node.keywords:
name = keyword.arg
self.all_kwargs.append(name)
# probably dirty, but meh, I assume its good enough...
if hasattr(keyword.value, "id"):
value = keyword.value.id
if value == name:
self.identical_kwargs.append(name)
elif self.include_attributes and hasattr(keyword.value, "attr"):
value = keyword.value.attr
if value == name:
self.identical_kwargs.append(name)
def skip_hidden_and_tests_and_benchmarks(path):
path_str = str(path)
if "benchmarks" in path.parts:
return True
if "build" in path.parts:
return True
if "tests" in path.parts:
return True
if "test" in path.parts:
return True
if any(p.startswith(".") and p != ".." for p in path.parts):
return True
return False
def find_all_kwargs(base=".", skip_func=lambda x: False, verbose=False,
include_attributes=False):
base = Path(base)
all_kwargs = []
identical_kwargs = []
num_args = []
num_kwargs = []
num_files = 0
if not base.exists():
raise ValueError(f"Base path {base} does not exist.")
for path in base.rglob("*.py"):
if skip_func(path):
if verbose:
print(f"(skipped: {path})")
continue
try:
with tokenize.open(path) as file:
tree = ast.parse(file.read())
funcs = FindFuncs(path, include_attributes=include_attributes)
funcs.visit(tree)
except (SyntaxError, ImportError) as e:
if verbose:
print(f"(skipped due to error: {path}; {e})")
continue
else:
if verbose:
print(f"scanned: {path}")
num_files += 1
all_kwargs.extend(funcs.all_kwargs)
identical_kwargs.extend(funcs.identical_kwargs)
num_args.extend(funcs.num_args)
num_kwargs.extend(funcs.num_kwargs)
# oops, way too many returns now :)
return all_kwargs, identical_kwargs, num_args, num_kwargs, num_files
if __name__ == "__main__":
import sys
from collections import Counter
from argparse import ArgumentParser
parser = ArgumentParser(
"Recursively search and analysize all python files contained in "
"the given paths.")
parser.add_argument("paths", default=[], nargs="+",
help="(Base)paths to scan")
parser.add_argument("--skip", "-s", default=False, action="store_true",
help="Skip paths including '/test/', '/tests/', '/build/'"
"'/benchmarks/' or a folder/file starting with '.'.")
parser.add_argument("--verbose", "-v", default=False, action="store_true",
help="Print out skipped and scanned paths.")
parser.add_argument("--include-attributes", "-i", default=False,
action="store_true",
help="Include things like `dtype=(expression).dtype`.")
args = parser.parse_args(sys.argv[1:])
if not args.skip:
skip_func = lambda x: False
else:
skip_func = skip_hidden_and_tests_and_benchmarks
kwargs = []
identical = []
num_files = 0
for path in args.paths:
res = find_all_kwargs(path,
skip_func=skip_func, verbose=args.verbose,
include_attributes=args.include_attributes)
path_kwargs, path_identical, num_args, num_kwargs, new_files = res
print(f"Scanned {new_files} python files in {path}")
print(" Total kwargs:", len(path_kwargs), "out which identical:", len(path_identical))
if len(path_identical) > 0:
print(f" Thus {len(path_identical)/len(path_kwargs) * 100:3.1f}% are identical.")
print()
kwargs.extend(path_kwargs)
identical.extend(path_identical)
num_files += new_files
if len(args.paths) > 1:
print(f"Overall Scanned {num_files} python files.")
print(" Total kwargs:", len(kwargs), "out which identical:", len(identical))
if len(identical) > 0:
print(f" Thus {len(identical)/len(kwargs) * 100:3.1f}% are identical.")
print()
c = Counter(identical)
print("Table of most common identical kwargs:")
print(f" {'name':>20s} | value")
print(f" {'-' * 20}-|------")
for name, num in c.most_common(50):
print(f" {name:>20s} | {num:5d}")
print()
print("Argument count statistics:")
print(Counter(num_args))
print("Kwarg count statistics:")
print(Counter(num_kwargs))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment