Created
September 28, 2013 18:49
-
-
Save cpcloud/6745173 to your computer and use it in GitHub Desktop.
Command line tool to show raised exceptions that don't have a message. Originally created for use with the pandas data analysis library codebase.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
from __future__ import print_function | |
import re | |
import os | |
import fnmatch | |
import ast | |
import argparse | |
import inspect | |
import tempfile | |
import subprocess | |
import operator | |
import tokenize | |
import string | |
import warnings | |
try: | |
from importlib import import_module | |
except ImportError: | |
import_module = __import__ | |
from numpy import nan as NA, logical_not | |
from pandas import DataFrame | |
from pandas.core.config import option_context | |
# recreate the format spec mini language | |
identifier = tokenize.Name | |
integer = tokenize.Intnumber | |
attribute_name = identifier | |
arg_name = r'({identifier})|({integer})'.format(integer=integer, | |
identifier=identifier) | |
index_string = '[' + re.escape(string.printable.replace(']', '')) + ']+' | |
element_index = r'({integer})|({index_string})'.format(integer=integer, | |
index_string=index_string) | |
field_name = r'({0})(\.({1})|\[({2})\])*'.format(arg_name, attribute_name, | |
element_index) | |
conversion = 'r|s' | |
fill = re.escape(string.printable.replace('{', '').replace('}', '')) | |
align = '[<>=^]' | |
sign = r'[+\- ]' | |
width = integer | |
precision = integer | |
typ = '[bcdeEfFgGnosxX%]' | |
format_spec = r'(({fill})?{align})?({sign})?#?0?({width})?,?(\.({precision}))?({typ})?'.format(fill=fill, | |
align=align, | |
sign=sign, | |
width=width, | |
precision=precision, | |
typ=typ) | |
replacement_field = ('\{' + r'({field_name})(!{conversion})' | |
'?(:({format_spec}))?'.format(field_name=field_name, | |
conversion=conversion, | |
format_spec=format_spec) + | |
'\}') | |
FMT_SPEC_RE = re.compile(replacement_field) | |
def get_num_format_spec(s): | |
# if it's a format string | |
n = len(frozenset(FMT_SPEC_RE.findall(s))) | |
# either not a format string or nothing in it | |
if not n: | |
rx = r'(?:%(?:\({identifier}\))?(?:[diouxXefFgGcrs]))+'.format(identifier=identifier) | |
n = len(re.findall(rx, s)) | |
return n | |
def parse_interp_string(node): | |
"""Only works for non-nested nodes""" | |
assert isinstance(node, ast.BinOp), 'node is not a binary operator' | |
assert isinstance(node.op, ast.Mod), 'node operator is not %' | |
assert isinstance(node.left, ast.Str), 'lhs is not a str' | |
msg = 'rhs is ' 'not a ' 'tuple, ' 'string, ' 'or dict' | |
assert isinstance(node.right, (ast.Tuple, ast.Str, ast.Dict)), msg | |
if isinstance(node.right, ast.Dict): | |
msg = 'all dict keys must be strings' | |
assert all(isinstance(key, ast.Str) for key in node.right.keys), msg | |
return node.left.s | |
def parse_format_string(node): | |
assert isinstance(node, ast.Call), 'node is not a method call' | |
assert isinstance(node.func, ast.Attribute), 'func not a method, {0!r}'.format(node.func.__class__.__name__) | |
assert isinstance(node.func.value, ast.Str), 'object is not a string' | |
return node.func.value.s | |
def parse_num_format_string_args(node): | |
return len(node.args) | |
def parse_num_interp_string_args(node): | |
return len(node.right.elts) | |
def try_parse_raise_arg(node): | |
try: | |
# string | |
v = node.s | |
except AttributeError: | |
try: | |
# interpolated string | |
v = parse_interp_string(node) | |
except AssertionError: | |
try: | |
# format spec string | |
v = parse_format_string(node) | |
except AssertionError: | |
v = node | |
return v | |
def check_valid_msg(node): | |
n, s = 0, '' | |
try: | |
n = parse_num_interp_string_args(node) | |
except AttributeError: | |
try: | |
n = parse_num_format_string_args(node) | |
except AttributeError: | |
pass | |
else: | |
try: | |
s = parse_format_string(node) | |
except AssertionError: | |
return True | |
else: | |
s = parse_interp_string(node) | |
return n == get_num_format_spec(s) | |
class RaiseVisitor(ast.NodeVisitor): | |
def __init__(self, pyfile, asserts): | |
self.pyfile = pyfile | |
self.asserts = asserts | |
def visit_Raise(self, node): | |
k = self.pyfile, node.lineno, node.col_offset | |
if isinstance(node.type, ast.Attribute): | |
self.asserts[k] = ['{0}.{1}'.format(node.type.value.id, | |
node.type.attr)] | |
else: | |
try: | |
# try to get the name of the exception constructor | |
try: | |
self.asserts[k] = [node.type.func.id] | |
except AttributeError: | |
self.asserts[k] = ['{0}.{1}'.format(node.type.func.value.id, | |
node.type.func.attr)] | |
except AttributeError: | |
# not a constructor | |
try: | |
# a variable | |
#name = node.type.id | |
self.asserts[k] = ['variable'] | |
except AttributeError: | |
# not a variable or an exception constructor | |
try: | |
# a reraise if type is none | |
self.asserts[k] = ['reraise' if node.type is None else | |
node.type] | |
except AttributeError: | |
self.asserts[k] = [NA] | |
else: | |
# is constructor, try parsing its contents | |
try: | |
# function arguments | |
args = node.type.args | |
try: | |
# try to get the first argument | |
arg = args[0] | |
#import ipdb; ipdb.set_trace() | |
v = try_parse_raise_arg(arg) | |
is_valid = check_valid_msg(arg) | |
self.asserts[k].extend([v, is_valid]) | |
except IndexError: | |
# no arguments (e.g., raise Exception()) | |
self.asserts[k].append('no arguments') | |
# not a constructor | |
except AttributeError: | |
# no arguments (e.g., raise Exception) | |
self.asserts[k].append('no constructor') | |
def parse_file(pyfile, asserts): | |
with open(pyfile, 'r') as pyf: | |
source = pyf.read() | |
try: | |
parsed = ast.parse(source, pyfile, 'exec') | |
except SyntaxError: | |
return | |
visitor = RaiseVisitor(pyfile, asserts) | |
visitor.visit(parsed) | |
def path_matches(path, pattern): | |
return re.search(pattern, path) is not None | |
def regex_or(*patterns): | |
return '({0})'.format('|'.join(patterns)) | |
def get_asserts_from_path(path, file_filters, dir_filters): | |
if file_filters is None: | |
file_filters = '__init__.py', | |
file_filters = regex_or(*file_filters) | |
if dir_filters is None: | |
dir_filters = 'build', '.tox', '.*\.egg.*' | |
dir_filters = regex_or(*dir_filters) | |
asserts = {} | |
if os.path.isfile(path): | |
parse_file(path, asserts) | |
return asserts | |
for root, _, filenames in os.walk(path): | |
full_names = [] | |
if not path_matches(root, dir_filters): | |
full_names = [os.path.join(root, fn) for fn in filenames | |
if not path_matches(fn, file_filters)] | |
if full_names: | |
pyfiles = fnmatch.filter(full_names, '*.py') | |
if pyfiles: | |
for pyfile in pyfiles: | |
#print('parsing %s' % pyfile) | |
parse_file(pyfile, asserts) | |
return asserts | |
def obj_path_from_string(dotted_name, full_path): | |
try: | |
obj = import_module(dotted_name) | |
except ImportError: | |
splits_ville = dotted_name.split('.') | |
module_name, obj_name = splits_ville[:-1], splits_ville[-1] | |
module_name = '.'.join(module_name) | |
try: | |
module = import_module(module_name) | |
except ImportError: | |
raise ImportError("'{0}' is not a valid Python " | |
"module".format(module_name)) | |
else: | |
try: | |
obj = getattr(module, obj_name) | |
except AttributeError: | |
raise AttributeError("") | |
if full_path: | |
path = inspect.getabsfile(obj) | |
else: | |
path = inspect.getfile(obj) | |
if path.endswith('pyc'): | |
path = path.strip('c') | |
return os.path.dirname(path) | |
def get_asserts_from_obj(dotted_name, file_filters, dir_filters, full_path): | |
path = obj_path_from_string(dotted_name, full_path) | |
return get_asserts_from_path(path, file_filters, dir_filters) | |
def asserts_to_frame(asserts): | |
import pandas | |
index, values = zip(*asserts.iteritems()) | |
values = map(lambda x: list(reduce(operator.concat, map(list, x))), | |
asserts.iteritems()) | |
columns = 'filename', 'line', 'col', 'code', 'msg', 'valid' | |
df = DataFrame(values, columns=columns).fillna(NA).convert_objects() | |
pandas_path = inspect.getfile(pandas) | |
pandas_dir = os.path.dirname(os.path.abspath(pandas_path)) | |
df['filename'] = df.filename.str.replace(pandas_dir + '/', '') | |
return df | |
def parse_args(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-k', '--kind', default='all', | |
choices=('all', 'a', 'empty', 'e', 'nonempty', 'n'), | |
help='The type of nodes you want to look for') | |
parser.add_argument('-m', '--module', default='pandas', | |
help=('The name of a module or file to search for ' | |
'nodes in')) | |
parser.add_argument('-i', '--file-filters', default=None, nargs='*', | |
help=("A list of regular expressions describing files " | |
"you want to ignore")) | |
parser.add_argument('-d', '--dir-filters', default=None, nargs='*', | |
help=('A list of regular expressions describing' | |
' directories you want to ignore')) | |
parser.add_argument('-p', '--full-path', action='store_true', | |
help=('Display the entire path of the file if this ' | |
'is given')) | |
parser.add_argument('-t', '--type', nargs='*', | |
help='The types of exceptions to report') | |
parser.add_argument('-b', '--sort-by', default=['filename'], | |
action='append', | |
help=('A list of columns or index levels you want to ' | |
'sort by')) | |
parser.add_argument('-I', '--ignore', help='Ignore these kinds of ' | |
'exceptions', default=('NotImplementedError', | |
'StopIteration'), | |
nargs='*') | |
parser.add_argument('-v', '--keep-exception-variables', | |
action='store_true', | |
help='Show raises with a variable as the argument') | |
parser.add_argument('-r', '--keep-reraises', action='store_true', | |
help='Show reraised exceptions in the output') | |
parser.add_argument('-V', '--validate', action='store_true', | |
help='Add a column to show the validity format strings') | |
parser.add_argument('-P', '--page', action='store_true', | |
help='Show the data in a pager') | |
return parser.parse_args() | |
def _build_exc_regex(exc_list): | |
return r'(.*(?:{0}).*)'.format('|'.join(exc_list)) | |
def main(args): | |
global df | |
asserts = get_asserts_from_obj(args.module, args.file_filters, | |
args.dir_filters, args.full_path) | |
if not asserts: | |
print ("No asserts found in '{0}'".format(args.module)) | |
return 0 | |
df = asserts_to_frame(asserts) | |
filt = logical_not(df.code.str.contains('|'.join(args.ignore), | |
case=False).fillna(False)) | |
atype = args.kind | |
msg = 'No' | |
if atype.startswith('e'): | |
ind = df.msg.isnull() | |
msg += ' empty' | |
elif atype.startswith('n'): | |
ind = df.msg.notnull() | |
msg += ' nonempty' | |
else: | |
ind = slice(None) | |
df = df[ind][filt] | |
if not args.keep_exception_variables: | |
df = df[df.code != 'variable'] | |
if not args.keep_reraises: | |
df = df[df.code != 'reraise'] | |
if args.validate: | |
df = df[~df.valid.astype(bool)] | |
msg += ' invalid' | |
if df.empty: | |
print("{0} exceptions matching {1} found in module " | |
"'{2}'".format(msg, args.type or 'all exceptions', args.module)) | |
return 0 | |
max_cols = int(df.filename.map(lambda x: len(repr(x))).max()) | |
with option_context('display.max_colwidth', max_cols, | |
'display.max_seq_items', max_cols): | |
if args.type is not None: | |
regex = _build_exc_regex(args.type) | |
vals = df.code.str.match(regex, re.IGNORECASE) | |
df = df[vals.str[0].notnull()] | |
if df.empty: | |
msg = "{0} {1} found in '{2}'".format(msg, args.type, args.module) | |
print(msg) | |
return 0 | |
df = df[filt].sort(args.sort_by) | |
if args.page: | |
with tempfile.NamedTemporaryFile() as tmpf: | |
df.to_string(buf=tmpf) | |
return subprocess.call([os.environ.get('PAGER', 'less'), | |
tmpf.name]) | |
else: | |
print(df.to_string()) | |
return 0 | |
df = None | |
if __name__ == '__main__': | |
with warnings.catch_warnings(): | |
warnings.simplefilter('ignore', UserWarning) | |
main(parse_args()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example
You can reproduce this with
or
from the top level pandas directory. (There are more rows than show here. I omitted some for brevity's sake.)
Description
The above image shows all the of the exceptions whose constructor has the name
Exception
(--kind
option) in thepandas.core
module (--module
option) and sorts the result first by the file name then by the line number (--sort-by
option, defaults to sorting by file name and other values of this parameter are append to the list of columns to sort).Notes
python setup.py develop
. If you didn't do that then the results will reflect the current installed version of pandas, which is probably not what you want.python scripts/parse_except.py --help
PAGER
(defaults toless
).