Skip to content

Instantly share code, notes, and snippets.

@cpcloud
Created September 28, 2013 18:49
Show Gist options
  • Save cpcloud/6745173 to your computer and use it in GitHub Desktop.
Save cpcloud/6745173 to your computer and use it in GitHub Desktop.
Command line tool to show raised exceptions that don't have a message. Originally created for use with the pandas data analysis library codebase.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
import re
import os
import fnmatch
import ast
import argparse
import inspect
import tempfile
import subprocess
import operator
import tokenize
import string
import warnings
try:
from importlib import import_module
except ImportError:
import_module = __import__
from numpy import nan as NA, logical_not
from pandas import DataFrame
from pandas.core.config import option_context
# recreate the format spec mini language
identifier = tokenize.Name
integer = tokenize.Intnumber
attribute_name = identifier
arg_name = r'({identifier})|({integer})'.format(integer=integer,
identifier=identifier)
index_string = '[' + re.escape(string.printable.replace(']', '')) + ']+'
element_index = r'({integer})|({index_string})'.format(integer=integer,
index_string=index_string)
field_name = r'({0})(\.({1})|\[({2})\])*'.format(arg_name, attribute_name,
element_index)
conversion = 'r|s'
fill = re.escape(string.printable.replace('{', '').replace('}', ''))
align = '[<>=^]'
sign = r'[+\- ]'
width = integer
precision = integer
typ = '[bcdeEfFgGnosxX%]'
format_spec = r'(({fill})?{align})?({sign})?#?0?({width})?,?(\.({precision}))?({typ})?'.format(fill=fill,
align=align,
sign=sign,
width=width,
precision=precision,
typ=typ)
replacement_field = ('\{' + r'({field_name})(!{conversion})'
'?(:({format_spec}))?'.format(field_name=field_name,
conversion=conversion,
format_spec=format_spec) +
'\}')
FMT_SPEC_RE = re.compile(replacement_field)
def get_num_format_spec(s):
# if it's a format string
n = len(frozenset(FMT_SPEC_RE.findall(s)))
# either not a format string or nothing in it
if not n:
rx = r'(?:%(?:\({identifier}\))?(?:[diouxXefFgGcrs]))+'.format(identifier=identifier)
n = len(re.findall(rx, s))
return n
def parse_interp_string(node):
"""Only works for non-nested nodes"""
assert isinstance(node, ast.BinOp), 'node is not a binary operator'
assert isinstance(node.op, ast.Mod), 'node operator is not %'
assert isinstance(node.left, ast.Str), 'lhs is not a str'
msg = 'rhs is ' 'not a ' 'tuple, ' 'string, ' 'or dict'
assert isinstance(node.right, (ast.Tuple, ast.Str, ast.Dict)), msg
if isinstance(node.right, ast.Dict):
msg = 'all dict keys must be strings'
assert all(isinstance(key, ast.Str) for key in node.right.keys), msg
return node.left.s
def parse_format_string(node):
assert isinstance(node, ast.Call), 'node is not a method call'
assert isinstance(node.func, ast.Attribute), 'func not a method, {0!r}'.format(node.func.__class__.__name__)
assert isinstance(node.func.value, ast.Str), 'object is not a string'
return node.func.value.s
def parse_num_format_string_args(node):
return len(node.args)
def parse_num_interp_string_args(node):
return len(node.right.elts)
def try_parse_raise_arg(node):
try:
# string
v = node.s
except AttributeError:
try:
# interpolated string
v = parse_interp_string(node)
except AssertionError:
try:
# format spec string
v = parse_format_string(node)
except AssertionError:
v = node
return v
def check_valid_msg(node):
n, s = 0, ''
try:
n = parse_num_interp_string_args(node)
except AttributeError:
try:
n = parse_num_format_string_args(node)
except AttributeError:
pass
else:
try:
s = parse_format_string(node)
except AssertionError:
return True
else:
s = parse_interp_string(node)
return n == get_num_format_spec(s)
class RaiseVisitor(ast.NodeVisitor):
def __init__(self, pyfile, asserts):
self.pyfile = pyfile
self.asserts = asserts
def visit_Raise(self, node):
k = self.pyfile, node.lineno, node.col_offset
if isinstance(node.type, ast.Attribute):
self.asserts[k] = ['{0}.{1}'.format(node.type.value.id,
node.type.attr)]
else:
try:
# try to get the name of the exception constructor
try:
self.asserts[k] = [node.type.func.id]
except AttributeError:
self.asserts[k] = ['{0}.{1}'.format(node.type.func.value.id,
node.type.func.attr)]
except AttributeError:
# not a constructor
try:
# a variable
#name = node.type.id
self.asserts[k] = ['variable']
except AttributeError:
# not a variable or an exception constructor
try:
# a reraise if type is none
self.asserts[k] = ['reraise' if node.type is None else
node.type]
except AttributeError:
self.asserts[k] = [NA]
else:
# is constructor, try parsing its contents
try:
# function arguments
args = node.type.args
try:
# try to get the first argument
arg = args[0]
#import ipdb; ipdb.set_trace()
v = try_parse_raise_arg(arg)
is_valid = check_valid_msg(arg)
self.asserts[k].extend([v, is_valid])
except IndexError:
# no arguments (e.g., raise Exception())
self.asserts[k].append('no arguments')
# not a constructor
except AttributeError:
# no arguments (e.g., raise Exception)
self.asserts[k].append('no constructor')
def parse_file(pyfile, asserts):
with open(pyfile, 'r') as pyf:
source = pyf.read()
try:
parsed = ast.parse(source, pyfile, 'exec')
except SyntaxError:
return
visitor = RaiseVisitor(pyfile, asserts)
visitor.visit(parsed)
def path_matches(path, pattern):
return re.search(pattern, path) is not None
def regex_or(*patterns):
return '({0})'.format('|'.join(patterns))
def get_asserts_from_path(path, file_filters, dir_filters):
if file_filters is None:
file_filters = '__init__.py',
file_filters = regex_or(*file_filters)
if dir_filters is None:
dir_filters = 'build', '.tox', '.*\.egg.*'
dir_filters = regex_or(*dir_filters)
asserts = {}
if os.path.isfile(path):
parse_file(path, asserts)
return asserts
for root, _, filenames in os.walk(path):
full_names = []
if not path_matches(root, dir_filters):
full_names = [os.path.join(root, fn) for fn in filenames
if not path_matches(fn, file_filters)]
if full_names:
pyfiles = fnmatch.filter(full_names, '*.py')
if pyfiles:
for pyfile in pyfiles:
#print('parsing %s' % pyfile)
parse_file(pyfile, asserts)
return asserts
def obj_path_from_string(dotted_name, full_path):
try:
obj = import_module(dotted_name)
except ImportError:
splits_ville = dotted_name.split('.')
module_name, obj_name = splits_ville[:-1], splits_ville[-1]
module_name = '.'.join(module_name)
try:
module = import_module(module_name)
except ImportError:
raise ImportError("'{0}' is not a valid Python "
"module".format(module_name))
else:
try:
obj = getattr(module, obj_name)
except AttributeError:
raise AttributeError("")
if full_path:
path = inspect.getabsfile(obj)
else:
path = inspect.getfile(obj)
if path.endswith('pyc'):
path = path.strip('c')
return os.path.dirname(path)
def get_asserts_from_obj(dotted_name, file_filters, dir_filters, full_path):
path = obj_path_from_string(dotted_name, full_path)
return get_asserts_from_path(path, file_filters, dir_filters)
def asserts_to_frame(asserts):
import pandas
index, values = zip(*asserts.iteritems())
values = map(lambda x: list(reduce(operator.concat, map(list, x))),
asserts.iteritems())
columns = 'filename', 'line', 'col', 'code', 'msg', 'valid'
df = DataFrame(values, columns=columns).fillna(NA).convert_objects()
pandas_path = inspect.getfile(pandas)
pandas_dir = os.path.dirname(os.path.abspath(pandas_path))
df['filename'] = df.filename.str.replace(pandas_dir + '/', '')
return df
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-k', '--kind', default='all',
choices=('all', 'a', 'empty', 'e', 'nonempty', 'n'),
help='The type of nodes you want to look for')
parser.add_argument('-m', '--module', default='pandas',
help=('The name of a module or file to search for '
'nodes in'))
parser.add_argument('-i', '--file-filters', default=None, nargs='*',
help=("A list of regular expressions describing files "
"you want to ignore"))
parser.add_argument('-d', '--dir-filters', default=None, nargs='*',
help=('A list of regular expressions describing'
' directories you want to ignore'))
parser.add_argument('-p', '--full-path', action='store_true',
help=('Display the entire path of the file if this '
'is given'))
parser.add_argument('-t', '--type', nargs='*',
help='The types of exceptions to report')
parser.add_argument('-b', '--sort-by', default=['filename'],
action='append',
help=('A list of columns or index levels you want to '
'sort by'))
parser.add_argument('-I', '--ignore', help='Ignore these kinds of '
'exceptions', default=('NotImplementedError',
'StopIteration'),
nargs='*')
parser.add_argument('-v', '--keep-exception-variables',
action='store_true',
help='Show raises with a variable as the argument')
parser.add_argument('-r', '--keep-reraises', action='store_true',
help='Show reraised exceptions in the output')
parser.add_argument('-V', '--validate', action='store_true',
help='Add a column to show the validity format strings')
parser.add_argument('-P', '--page', action='store_true',
help='Show the data in a pager')
return parser.parse_args()
def _build_exc_regex(exc_list):
return r'(.*(?:{0}).*)'.format('|'.join(exc_list))
def main(args):
global df
asserts = get_asserts_from_obj(args.module, args.file_filters,
args.dir_filters, args.full_path)
if not asserts:
print ("No asserts found in '{0}'".format(args.module))
return 0
df = asserts_to_frame(asserts)
filt = logical_not(df.code.str.contains('|'.join(args.ignore),
case=False).fillna(False))
atype = args.kind
msg = 'No'
if atype.startswith('e'):
ind = df.msg.isnull()
msg += ' empty'
elif atype.startswith('n'):
ind = df.msg.notnull()
msg += ' nonempty'
else:
ind = slice(None)
df = df[ind][filt]
if not args.keep_exception_variables:
df = df[df.code != 'variable']
if not args.keep_reraises:
df = df[df.code != 'reraise']
if args.validate:
df = df[~df.valid.astype(bool)]
msg += ' invalid'
if df.empty:
print("{0} exceptions matching {1} found in module "
"'{2}'".format(msg, args.type or 'all exceptions', args.module))
return 0
max_cols = int(df.filename.map(lambda x: len(repr(x))).max())
with option_context('display.max_colwidth', max_cols,
'display.max_seq_items', max_cols):
if args.type is not None:
regex = _build_exc_regex(args.type)
vals = df.code.str.match(regex, re.IGNORECASE)
df = df[vals.str[0].notnull()]
if df.empty:
msg = "{0} {1} found in '{2}'".format(msg, args.type, args.module)
print(msg)
return 0
df = df[filt].sort(args.sort_by)
if args.page:
with tempfile.NamedTemporaryFile() as tmpf:
df.to_string(buf=tmpf)
return subprocess.call([os.environ.get('PAGER', 'less'),
tmpf.name])
else:
print(df.to_string())
return 0
df = None
if __name__ == '__main__':
with warnings.catch_warnings():
warnings.simplefilter('ignore', UserWarning)
main(parse_args())
@cpcloud
Copy link
Author

cpcloud commented Sep 28, 2013

Example

parse-except

You can reproduce this with

python scripts/parse_except.py --sort-by line --module pandas.core --kind except

or

python scripts/parse_except.py -b line -m pandas.core -k except

from the top level pandas directory. (There are more rows than show here. I omitted some for brevity's sake.)

Description

The above image shows all the of the exceptions whose constructor has the name Exception (--kind option) in the pandas.core module (--module option) and sorts the result first by the file name then by the line number (--sort-by option, defaults to sorting by file name and other values of this parameter are append to the list of columns to sort).

Notes

  • This assumes you have installed pandas with python setup.py develop. If you didn't do that then the results will reflect the current installed version of pandas, which is probably not what you want.
  • To see all of the options and a short description you can do: python scripts/parse_except.py --help
  • It probably won't work on windows since it tries to show the output in your PAGER (defaults to less).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment