Last active
April 24, 2020 02:49
-
-
Save seberg/548a2fa9187739ff33ec406e933fa8a4 to your computer and use it in GitHub Desktop.
Hacky script to analyze python kwargs passing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from pathlib import Path | |
import ast | |
import tokenize | |
class FindFuncs(ast.NodeVisitor): | |
# Very dirtily adapted and stolen from NumPy tests... | |
def __init__(self, filename, include_attributes=True): | |
super().__init__() | |
self.__filename = filename | |
self.all_kwargs = [] | |
self.identical_kwargs = [] | |
self.num_args = [] | |
self.num_kwargs = [] | |
self.include_attributes = include_attributes | |
def visit_Call(self, node): | |
ast.NodeVisitor.generic_visit(self, node) | |
self.num_args.append(len(node.args)) | |
self.num_kwargs.append(len(node.keywords)) | |
for keyword in node.keywords: | |
name = keyword.arg | |
self.all_kwargs.append(name) | |
# probably dirty, but meh, I assume its good enough... | |
if hasattr(keyword.value, "id"): | |
value = keyword.value.id | |
if value == name: | |
self.identical_kwargs.append(name) | |
elif self.include_attributes and hasattr(keyword.value, "attr"): | |
value = keyword.value.attr | |
if value == name: | |
self.identical_kwargs.append(name) | |
def skip_hidden_and_tests_and_benchmarks(path): | |
path_str = str(path) | |
if "benchmarks" in path.parts: | |
return True | |
if "build" in path.parts: | |
return True | |
if "tests" in path.parts: | |
return True | |
if "test" in path.parts: | |
return True | |
if any(p.startswith(".") and p != ".." for p in path.parts): | |
return True | |
return False | |
def find_all_kwargs(base=".", skip_func=lambda x: False, verbose=False, | |
include_attributes=False): | |
base = Path(base) | |
all_kwargs = [] | |
identical_kwargs = [] | |
num_args = [] | |
num_kwargs = [] | |
num_files = 0 | |
if not base.exists(): | |
raise ValueError(f"Base path {base} does not exist.") | |
for path in base.rglob("*.py"): | |
if skip_func(path): | |
if verbose: | |
print(f"(skipped: {path})") | |
continue | |
try: | |
with tokenize.open(path) as file: | |
tree = ast.parse(file.read()) | |
funcs = FindFuncs(path, include_attributes=include_attributes) | |
funcs.visit(tree) | |
except (SyntaxError, ImportError) as e: | |
if verbose: | |
print(f"(skipped due to error: {path}; {e})") | |
continue | |
else: | |
if verbose: | |
print(f"scanned: {path}") | |
num_files += 1 | |
all_kwargs.extend(funcs.all_kwargs) | |
identical_kwargs.extend(funcs.identical_kwargs) | |
num_args.extend(funcs.num_args) | |
num_kwargs.extend(funcs.num_kwargs) | |
# oops, way too many returns now :) | |
return all_kwargs, identical_kwargs, num_args, num_kwargs, num_files | |
if __name__ == "__main__": | |
import sys | |
from collections import Counter | |
from argparse import ArgumentParser | |
parser = ArgumentParser( | |
"Recursively search and analysize all python files contained in " | |
"the given paths.") | |
parser.add_argument("paths", default=[], nargs="+", | |
help="(Base)paths to scan") | |
parser.add_argument("--skip", "-s", default=False, action="store_true", | |
help="Skip paths including '/test/', '/tests/', '/build/'" | |
"'/benchmarks/' or a folder/file starting with '.'.") | |
parser.add_argument("--verbose", "-v", default=False, action="store_true", | |
help="Print out skipped and scanned paths.") | |
parser.add_argument("--include-attributes", "-i", default=False, | |
action="store_true", | |
help="Include things like `dtype=(expression).dtype`.") | |
args = parser.parse_args(sys.argv[1:]) | |
if not args.skip: | |
skip_func = lambda x: False | |
else: | |
skip_func = skip_hidden_and_tests_and_benchmarks | |
kwargs = [] | |
identical = [] | |
num_files = 0 | |
for path in args.paths: | |
res = find_all_kwargs(path, | |
skip_func=skip_func, verbose=args.verbose, | |
include_attributes=args.include_attributes) | |
path_kwargs, path_identical, num_args, num_kwargs, new_files = res | |
print(f"Scanned {new_files} python files in {path}") | |
print(" Total kwargs:", len(path_kwargs), "out which identical:", len(path_identical)) | |
if len(path_identical) > 0: | |
print(f" Thus {len(path_identical)/len(path_kwargs) * 100:3.1f}% are identical.") | |
print() | |
kwargs.extend(path_kwargs) | |
identical.extend(path_identical) | |
num_files += new_files | |
if len(args.paths) > 1: | |
print(f"Overall Scanned {num_files} python files.") | |
print(" Total kwargs:", len(kwargs), "out which identical:", len(identical)) | |
if len(identical) > 0: | |
print(f" Thus {len(identical)/len(kwargs) * 100:3.1f}% are identical.") | |
print() | |
c = Counter(identical) | |
print("Table of most common identical kwargs:") | |
print(f" {'name':>20s} | value") | |
print(f" {'-' * 20}-|------") | |
for name, num in c.most_common(50): | |
print(f" {name:>20s} | {num:5d}") | |
print() | |
print("Argument count statistics:") | |
print(Counter(num_args)) | |
print("Kwarg count statistics:") | |
print(Counter(num_kwargs)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment