cpcloud · September 28, 2013 18:49 · cpcloud · Sep 28, 2013
diff --git a/gistfile1.py b/gistfile1.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 from __future__ import print_function

 import re
 import os
 import fnmatch
 import ast
 import argparse
 import inspect
 import tempfile
 import subprocess
 import operator
 import tokenize
 import string
 import warnings

 try:
    from importlib import import_module
 except ImportError:
    import_module = __import__


 from numpy import nan as NA, logical_not
 from pandas import DataFrame
 from pandas.core.config import option_context


 # recreate the format spec mini language
 identifier = tokenize.Name
 integer = tokenize.Intnumber
 attribute_name = identifier
 arg_name = r'({identifier})|({integer})'.format(integer=integer,
                                                identifier=identifier)
 index_string = '[' + re.escape(string.printable.replace(']', '')) + ']+'
 element_index = r'({integer})|({index_string})'.format(integer=integer,
                                                       index_string=index_string)
 field_name = r'({0})(\.({1})|\[({2})\])*'.format(arg_name, attribute_name,
                                                 element_index)
 conversion = 'r|s'

 fill = re.escape(string.printable.replace('{', '').replace('}', ''))
 align = '[<>=^]'
 sign = r'[+\- ]'
 width = integer
 precision = integer
 typ = '[bcdeEfFgGnosxX%]'
 format_spec = r'(({fill})?{align})?({sign})?#?0?({width})?,?(\.({precision}))?({typ})?'.format(fill=fill,
                                                                                               align=align,
                                                                                               sign=sign,
                                                                                               width=width,
                                                                                               precision=precision,
                                                                                               typ=typ)
 replacement_field = ('\{' + r'({field_name})(!{conversion})'
                     '?(:({format_spec}))?'.format(field_name=field_name,
                                                   conversion=conversion,
                                                   format_spec=format_spec) +
                     '\}')

 FMT_SPEC_RE = re.compile(replacement_field)


 def get_num_format_spec(s):
    # if it's a format string
    n = len(frozenset(FMT_SPEC_RE.findall(s)))

    # either not a format string or nothing in it
    if not n:
        rx = r'(?:%(?:\({identifier}\))?(?:[diouxXefFgGcrs]))+'.format(identifier=identifier)
        n = len(re.findall(rx, s))
    return n


 def parse_interp_string(node):
    """Only works for non-nested nodes"""
    assert isinstance(node, ast.BinOp), 'node is not a binary operator'
    assert isinstance(node.op, ast.Mod), 'node operator is not %'
    assert isinstance(node.left, ast.Str), 'lhs is not a str'
    msg = 'rhs is ' 'not a ' 'tuple, ' 'string, ' 'or dict'
    assert isinstance(node.right, (ast.Tuple, ast.Str, ast.Dict)), msg
    if isinstance(node.right, ast.Dict):
        msg = 'all dict keys must be strings'
        assert all(isinstance(key, ast.Str) for key in node.right.keys), msg
    return node.left.s


 def parse_format_string(node):
    assert isinstance(node, ast.Call), 'node is not a method call'
    assert isinstance(node.func, ast.Attribute), 'func not a method, {0!r}'.format(node.func.__class__.__name__)
    assert isinstance(node.func.value, ast.Str), 'object is not a string'
    return node.func.value.s


 def parse_num_format_string_args(node):
    return len(node.args)


 def parse_num_interp_string_args(node):
    return len(node.right.elts)


 def try_parse_raise_arg(node):
    try:
        # string
        v = node.s
    except AttributeError:
        try:
            # interpolated string
            v = parse_interp_string(node)
        except AssertionError:
            try:
                # format spec string
                v = parse_format_string(node)
            except AssertionError:
                v = node
    return v


 def check_valid_msg(node):
    n, s = 0, ''

    try:
        n = parse_num_interp_string_args(node)
    except AttributeError:
        try:
            n = parse_num_format_string_args(node)
        except AttributeError:
            pass
        else:
            try:
                s = parse_format_string(node)
            except AssertionError:
                return True
    else:
        s = parse_interp_string(node)
    return n == get_num_format_spec(s)


 class RaiseVisitor(ast.NodeVisitor):
    def __init__(self, pyfile, asserts):
        self.pyfile = pyfile
        self.asserts = asserts

    def visit_Raise(self, node):
        k = self.pyfile, node.lineno, node.col_offset

        if isinstance(node.type, ast.Attribute):
            self.asserts[k] = ['{0}.{1}'.format(node.type.value.id,
                                                node.type.attr)]
        else:
            try:
                # try to get the name of the exception constructor
                try:
                    self.asserts[k] = [node.type.func.id]
                except AttributeError:
                    self.asserts[k] = ['{0}.{1}'.format(node.type.func.value.id,
                                                        node.type.func.attr)]
            except AttributeError:
                # not a constructor
                try:
                    # a variable
                    #name = node.type.id
                    self.asserts[k] = ['variable']
                except AttributeError:
                    # not a variable or an exception constructor
                    try:
                        # a reraise if type is none
                        self.asserts[k] = ['reraise' if node.type is None else
                                        node.type]
                    except AttributeError:
                        self.asserts[k] = [NA]
            else:
                # is constructor, try parsing its contents
                try:
                    # function arguments
                    args = node.type.args

                    try:
                        # try to get the first argument
                        arg = args[0]
                        #import ipdb; ipdb.set_trace()
                        v = try_parse_raise_arg(arg)
                        is_valid = check_valid_msg(arg)
                        self.asserts[k].extend([v, is_valid])
                    except IndexError:
                        # no arguments (e.g., raise Exception())
                        self.asserts[k].append('no arguments')
                # not a constructor
                except AttributeError:
                    # no arguments (e.g., raise Exception)
                    self.asserts[k].append('no constructor')


 def parse_file(pyfile, asserts):
    with open(pyfile, 'r') as pyf:
        source = pyf.read()

    try:
        parsed = ast.parse(source, pyfile, 'exec')
    except SyntaxError:
        return

    visitor = RaiseVisitor(pyfile, asserts)
    visitor.visit(parsed)


 def path_matches(path, pattern):
    return re.search(pattern, path) is not None


 def regex_or(*patterns):
    return '({0})'.format('|'.join(patterns))


 def get_asserts_from_path(path, file_filters, dir_filters):
    if file_filters is None:
        file_filters = '__init__.py',

    file_filters = regex_or(*file_filters)

    if dir_filters is None:
        dir_filters = 'build', '.tox', '.*\.egg.*'

    dir_filters = regex_or(*dir_filters)

    asserts = {}

    if os.path.isfile(path):
        parse_file(path, asserts)
        return asserts

    for root, _, filenames in os.walk(path):
        full_names = []

        if not path_matches(root, dir_filters):
            full_names = [os.path.join(root, fn) for fn in filenames
                          if not path_matches(fn, file_filters)]

        if full_names:
            pyfiles = fnmatch.filter(full_names, '*.py')

            if pyfiles:
                for pyfile in pyfiles:
                    #print('parsing %s' % pyfile)
                    parse_file(pyfile, asserts)

    return asserts


 def obj_path_from_string(dotted_name, full_path):
    try:
        obj = import_module(dotted_name)
    except ImportError:
        splits_ville = dotted_name.split('.')
        module_name, obj_name = splits_ville[:-1], splits_ville[-1]
        module_name = '.'.join(module_name)

        try:
            module = import_module(module_name)
        except ImportError:
            raise ImportError("'{0}' is not a valid Python "
                              "module".format(module_name))
        else:
            try:
                obj = getattr(module, obj_name)
            except AttributeError:
                raise AttributeError("")

    if full_path:
        path = inspect.getabsfile(obj)
    else:
        path = inspect.getfile(obj)

    if path.endswith('pyc'):
        path = path.strip('c')
    return os.path.dirname(path)


 def get_asserts_from_obj(dotted_name, file_filters, dir_filters, full_path):
    path = obj_path_from_string(dotted_name, full_path)
    return get_asserts_from_path(path, file_filters, dir_filters)


 def asserts_to_frame(asserts):
    import pandas

    index, values = zip(*asserts.iteritems())
    values = map(lambda x: list(reduce(operator.concat, map(list, x))),
                 asserts.iteritems())
    columns = 'filename', 'line', 'col', 'code', 'msg', 'valid'
    df = DataFrame(values, columns=columns).fillna(NA).convert_objects()
    pandas_path = inspect.getfile(pandas)
    pandas_dir = os.path.dirname(os.path.abspath(pandas_path))
    df['filename'] = df.filename.str.replace(pandas_dir + '/', '')
    return df


 def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-k', '--kind', default='all',
                        choices=('all', 'a', 'empty', 'e', 'nonempty', 'n'),
                        help='The type of nodes you want to look for')
    parser.add_argument('-m', '--module', default='pandas',
                        help=('The name of a module or file to search for '
                              'nodes in'))
    parser.add_argument('-i', '--file-filters', default=None, nargs='*',
                        help=("A list of regular expressions describing files "
                              "you want to ignore"))
    parser.add_argument('-d', '--dir-filters', default=None, nargs='*',
                        help=('A list of regular expressions describing'
                              ' directories you want to ignore'))
    parser.add_argument('-p', '--full-path', action='store_true',
                        help=('Display the entire path of the file if this '
                              'is given'))
    parser.add_argument('-t', '--type', nargs='*',
                        help='The types of exceptions to report')
    parser.add_argument('-b', '--sort-by', default=['filename'],
                        action='append',
                        help=('A list of columns or index levels you want to '
                              'sort by'))
    parser.add_argument('-I', '--ignore', help='Ignore these kinds of '
                        'exceptions', default=('NotImplementedError',
                                               'StopIteration'),
                        nargs='*')
    parser.add_argument('-v', '--keep-exception-variables',
                        action='store_true',
                        help='Show raises with a variable as the argument')
    parser.add_argument('-r', '--keep-reraises', action='store_true',
                        help='Show reraised exceptions in the output')
    parser.add_argument('-V', '--validate', action='store_true',
                        help='Add a column to show the validity format strings')
    parser.add_argument('-P', '--page', action='store_true',
                        help='Show the data in a pager')
    return parser.parse_args()


 def _build_exc_regex(exc_list):
    return r'(.*(?:{0}).*)'.format('|'.join(exc_list))


 def main(args):
    global df
    asserts = get_asserts_from_obj(args.module, args.file_filters,
                                   args.dir_filters, args.full_path)

    if not asserts:
        print ("No asserts found in '{0}'".format(args.module))
        return 0

    df = asserts_to_frame(asserts)
    filt = logical_not(df.code.str.contains('|'.join(args.ignore),
                                            case=False).fillna(False))

    atype = args.kind

    msg = 'No'

    if atype.startswith('e'):
        ind = df.msg.isnull()
        msg += ' empty'
    elif atype.startswith('n'):
        ind = df.msg.notnull()
        msg += ' nonempty'
    else:
        ind = slice(None)

    df = df[ind][filt]

    if not args.keep_exception_variables:
        df = df[df.code != 'variable']

    if not args.keep_reraises:
        df = df[df.code != 'reraise']

    if args.validate:
        df = df[~df.valid.astype(bool)]
        msg += ' invalid'

    if df.empty:
        print("{0} exceptions matching {1} found in module "
              "'{2}'".format(msg, args.type or 'all exceptions', args.module))
        return 0
    max_cols = int(df.filename.map(lambda x: len(repr(x))).max())

    with option_context('display.max_colwidth', max_cols,
                        'display.max_seq_items', max_cols):
        if args.type is not None:
            regex = _build_exc_regex(args.type)
            vals = df.code.str.match(regex, re.IGNORECASE)
            df = df[vals.str[0].notnull()]

        if df.empty:
            msg = "{0} {1} found in '{2}'".format(msg, args.type, args.module)
            print(msg)
            return 0

        df = df[filt].sort(args.sort_by)
        if args.page:
            with tempfile.NamedTemporaryFile() as tmpf:
                df.to_string(buf=tmpf)
                return subprocess.call([os.environ.get('PAGER', 'less'),
                                        tmpf.name])
        else:
            print(df.to_string())
    return 0


 df = None
 if __name__ == '__main__':
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', UserWarning)
        main(parse_args())
	#!/usr/bin/env python
	# -- coding: utf-8 --

	from __future__ import print_function

	import re
	import os
	import fnmatch
	import ast
	import argparse
	import inspect
	import tempfile
	import subprocess
	import operator
	import tokenize
	import string
	import warnings

	try:
	from importlib import import_module
	except ImportError:
	import_module = __import__


	from numpy import nan as NA, logical_not
	from pandas import DataFrame
	from pandas.core.config import option_context


	# recreate the format spec mini language
	identifier = tokenize.Name
	integer = tokenize.Intnumber
	attribute_name = identifier
	arg_name = r'({identifier})\|({integer})'.format(integer=integer,
	identifier=identifier)
	index_string = '[' + re.escape(string.printable.replace(']', '')) + ']+'
	element_index = r'({integer})\|({index_string})'.format(integer=integer,
	index_string=index_string)
	field_name = r'({0})(\.({1})\|\[({2})\])*'.format(arg_name, attribute_name,
	element_index)
	conversion = 'r\|s'

	fill = re.escape(string.printable.replace('{', '').replace('}', ''))
	align = '[<>=^]'
	sign = r'[+\- ]'
	width = integer
	precision = integer
	typ = '[bcdeEfFgGnosxX%]'
	format_spec = r'(({fill})?{align})?({sign})?#?0?({width})?,?(\.({precision}))?({typ})?'.format(fill=fill,
	align=align,
	sign=sign,
	width=width,
	precision=precision,
	typ=typ)
	replacement_field = ('\{' + r'({field_name})(!{conversion})'
	'?(:({format_spec}))?'.format(field_name=field_name,
	conversion=conversion,
	format_spec=format_spec) +
	'\}')

	FMT_SPEC_RE = re.compile(replacement_field)


	def get_num_format_spec(s):
	# if it's a format string
	n = len(frozenset(FMT_SPEC_RE.findall(s)))

	# either not a format string or nothing in it
	if not n:
	rx = r'(?:%(?:\({identifier}\))?(?:[diouxXefFgGcrs]))+'.format(identifier=identifier)
	n = len(re.findall(rx, s))
	return n


	def parse_interp_string(node):
	"""Only works for non-nested nodes"""
	assert isinstance(node, ast.BinOp), 'node is not a binary operator'
	assert isinstance(node.op, ast.Mod), 'node operator is not %'
	assert isinstance(node.left, ast.Str), 'lhs is not a str'
	msg = 'rhs is ' 'not a ' 'tuple, ' 'string, ' 'or dict'
	assert isinstance(node.right, (ast.Tuple, ast.Str, ast.Dict)), msg
	if isinstance(node.right, ast.Dict):
	msg = 'all dict keys must be strings'
	assert all(isinstance(key, ast.Str) for key in node.right.keys), msg
	return node.left.s


	def parse_format_string(node):
	assert isinstance(node, ast.Call), 'node is not a method call'
	assert isinstance(node.func, ast.Attribute), 'func not a method, {0!r}'.format(node.func.__class__.__name__)
	assert isinstance(node.func.value, ast.Str), 'object is not a string'
	return node.func.value.s


	def parse_num_format_string_args(node):
	return len(node.args)


	def parse_num_interp_string_args(node):
	return len(node.right.elts)


	def try_parse_raise_arg(node):
	try:
	# string
	v = node.s
	except AttributeError:
	try:
	# interpolated string
	v = parse_interp_string(node)
	except AssertionError:
	try:
	# format spec string
	v = parse_format_string(node)
	except AssertionError:
	v = node
	return v


	def check_valid_msg(node):
	n, s = 0, ''

	try:
	n = parse_num_interp_string_args(node)
	except AttributeError:
	try:
	n = parse_num_format_string_args(node)
	except AttributeError:
	pass
	else:
	try:
	s = parse_format_string(node)
	except AssertionError:
	return True
	else:
	s = parse_interp_string(node)
	return n == get_num_format_spec(s)


	class RaiseVisitor(ast.NodeVisitor):
	def __init__(self, pyfile, asserts):
	self.pyfile = pyfile
	self.asserts = asserts

	def visit_Raise(self, node):
	k = self.pyfile, node.lineno, node.col_offset

	if isinstance(node.type, ast.Attribute):
	self.asserts[k] = ['{0}.{1}'.format(node.type.value.id,
	node.type.attr)]
	else:
	try:
	# try to get the name of the exception constructor
	try:
	self.asserts[k] = [node.type.func.id]
	except AttributeError:
	self.asserts[k] = ['{0}.{1}'.format(node.type.func.value.id,
	node.type.func.attr)]
	except AttributeError:
	# not a constructor
	try:
	# a variable
	#name = node.type.id
	self.asserts[k] = ['variable']
	except AttributeError:
	# not a variable or an exception constructor
	try:
	# a reraise if type is none
	self.asserts[k] = ['reraise' if node.type is None else
	node.type]
	except AttributeError:
	self.asserts[k] = [NA]
	else:
	# is constructor, try parsing its contents
	try:
	# function arguments
	args = node.type.args

	try:
	# try to get the first argument
	arg = args[0]
	#import ipdb; ipdb.set_trace()
	v = try_parse_raise_arg(arg)
	is_valid = check_valid_msg(arg)
	self.asserts[k].extend([v, is_valid])
	except IndexError:
	# no arguments (e.g., raise Exception())
	self.asserts[k].append('no arguments')
	# not a constructor
	except AttributeError:
	# no arguments (e.g., raise Exception)
	self.asserts[k].append('no constructor')


	def parse_file(pyfile, asserts):
	with open(pyfile, 'r') as pyf:
	source = pyf.read()

	try:
	parsed = ast.parse(source, pyfile, 'exec')
	except SyntaxError:
	return

	visitor = RaiseVisitor(pyfile, asserts)
	visitor.visit(parsed)


	def path_matches(path, pattern):
	return re.search(pattern, path) is not None


	def regex_or(*patterns):
	return '({0})'.format('\|'.join(patterns))


	def get_asserts_from_path(path, file_filters, dir_filters):
	if file_filters is None:
	file_filters = '__init__.py',

	file_filters = regex_or(*file_filters)

	if dir_filters is None:
	dir_filters = 'build', '.tox', '.\.egg.'

	dir_filters = regex_or(*dir_filters)

	asserts = {}

	if os.path.isfile(path):
	parse_file(path, asserts)
	return asserts

	for root, _, filenames in os.walk(path):
	full_names = []

	if not path_matches(root, dir_filters):
	full_names = [os.path.join(root, fn) for fn in filenames
	if not path_matches(fn, file_filters)]

	if full_names:
	pyfiles = fnmatch.filter(full_names, '*.py')

	if pyfiles:
	for pyfile in pyfiles:
	#print('parsing %s' % pyfile)
	parse_file(pyfile, asserts)

	return asserts


	def obj_path_from_string(dotted_name, full_path):
	try:
	obj = import_module(dotted_name)
	except ImportError:
	splits_ville = dotted_name.split('.')
	module_name, obj_name = splits_ville[:-1], splits_ville[-1]
	module_name = '.'.join(module_name)

	try:
	module = import_module(module_name)
	except ImportError:
	raise ImportError("'{0}' is not a valid Python "
	"module".format(module_name))
	else:
	try:
	obj = getattr(module, obj_name)
	except AttributeError:
	raise AttributeError("")

	if full_path:
	path = inspect.getabsfile(obj)
	else:
	path = inspect.getfile(obj)

	if path.endswith('pyc'):
	path = path.strip('c')
	return os.path.dirname(path)


	def get_asserts_from_obj(dotted_name, file_filters, dir_filters, full_path):
	path = obj_path_from_string(dotted_name, full_path)
	return get_asserts_from_path(path, file_filters, dir_filters)


	def asserts_to_frame(asserts):
	import pandas

	index, values = zip(*asserts.iteritems())
	values = map(lambda x: list(reduce(operator.concat, map(list, x))),
	asserts.iteritems())
	columns = 'filename', 'line', 'col', 'code', 'msg', 'valid'
	df = DataFrame(values, columns=columns).fillna(NA).convert_objects()
	pandas_path = inspect.getfile(pandas)
	pandas_dir = os.path.dirname(os.path.abspath(pandas_path))
	df['filename'] = df.filename.str.replace(pandas_dir + '/', '')
	return df


	def parse_args():
	parser = argparse.ArgumentParser()
	parser.add_argument('-k', '--kind', default='all',
	choices=('all', 'a', 'empty', 'e', 'nonempty', 'n'),
	help='The type of nodes you want to look for')
	parser.add_argument('-m', '--module', default='pandas',
	help=('The name of a module or file to search for '
	'nodes in'))
	parser.add_argument('-i', '--file-filters', default=None, nargs='*',
	help=("A list of regular expressions describing files "
	"you want to ignore"))
	parser.add_argument('-d', '--dir-filters', default=None, nargs='*',
	help=('A list of regular expressions describing'
	' directories you want to ignore'))
	parser.add_argument('-p', '--full-path', action='store_true',
	help=('Display the entire path of the file if this '
	'is given'))
	parser.add_argument('-t', '--type', nargs='*',
	help='The types of exceptions to report')
	parser.add_argument('-b', '--sort-by', default=['filename'],
	action='append',
	help=('A list of columns or index levels you want to '
	'sort by'))
	parser.add_argument('-I', '--ignore', help='Ignore these kinds of '
	'exceptions', default=('NotImplementedError',
	'StopIteration'),
	nargs='*')
	parser.add_argument('-v', '--keep-exception-variables',
	action='store_true',
	help='Show raises with a variable as the argument')
	parser.add_argument('-r', '--keep-reraises', action='store_true',
	help='Show reraised exceptions in the output')
	parser.add_argument('-V', '--validate', action='store_true',
	help='Add a column to show the validity format strings')
	parser.add_argument('-P', '--page', action='store_true',
	help='Show the data in a pager')
	return parser.parse_args()


	def _build_exc_regex(exc_list):
	return r'(.(?:{0}).)'.format('\|'.join(exc_list))


	def main(args):
	global df
	asserts = get_asserts_from_obj(args.module, args.file_filters,
	args.dir_filters, args.full_path)

	if not asserts:
	print ("No asserts found in '{0}'".format(args.module))
	return 0

	df = asserts_to_frame(asserts)
	filt = logical_not(df.code.str.contains('\|'.join(args.ignore),
	case=False).fillna(False))

	atype = args.kind

	msg = 'No'

	if atype.startswith('e'):
	ind = df.msg.isnull()
	msg += ' empty'
	elif atype.startswith('n'):
	ind = df.msg.notnull()
	msg += ' nonempty'
	else:
	ind = slice(None)

	df = df[ind][filt]

	if not args.keep_exception_variables:
	df = df[df.code != 'variable']

	if not args.keep_reraises:
	df = df[df.code != 'reraise']

	if args.validate:
	df = df[~df.valid.astype(bool)]
	msg += ' invalid'

	if df.empty:
	print("{0} exceptions matching {1} found in module "
	"'{2}'".format(msg, args.type or 'all exceptions', args.module))
	return 0
	max_cols = int(df.filename.map(lambda x: len(repr(x))).max())

	with option_context('display.max_colwidth', max_cols,
	'display.max_seq_items', max_cols):
	if args.type is not None:
	regex = _build_exc_regex(args.type)
	vals = df.code.str.match(regex, re.IGNORECASE)
	df = df[vals.str[0].notnull()]

	if df.empty:
	msg = "{0} {1} found in '{2}'".format(msg, args.type, args.module)
	print(msg)
	return 0

	df = df[filt].sort(args.sort_by)
	if args.page:
	with tempfile.NamedTemporaryFile() as tmpf:
	df.to_string(buf=tmpf)
	return subprocess.call([os.environ.get('PAGER', 'less'),
	tmpf.name])
	else:
	print(df.to_string())
	return 0


	df = None
	if __name__ == '__main__':
	with warnings.catch_warnings():
	warnings.simplefilter('ignore', UserWarning)
	main(parse_args())