d1manson · February 15, 2016 13:05
diff --git a/row_indexer.py b/row_indexer.py
 class RowIndexer(object):
    def __init__(self, obj, idx, **kwargs):
        """
        This class is syntactic sugar for performing a slow iteration over
        rows in a pandas DataFrame, where each row needs to have some (slow)
        computation performed and the results assigned to multiple columns
        in the row. Here is a bare-bones example::


            df = ... #create a DataFram
            theRow = RowIndexer(df, 3) # here we explicitly choose row 3
            theRow.something = "hello"
            theRow.other_thing = 99.2
            # df.something[3]:  "hello"
            # df.other_thing[3]: 99.2

        The intended useage case is within a generator, which yields a new
        RowIndexer each time.

        In addition to the parent DataFrame and index, you can also provide
        a list of kwargs, which should be added to the RowIndexer as read-only
        attributes.

        If the dataframe's index has names, you can access those by name on
        the indexer (e.g. if ``'day'`` is one of the levels, then ``theRow.day`` 
        will return the value of that level of the index).
        """
        object.__setattr__(self, '_obj', obj)
        object.__setattr__(self, '_idx', idx)
        object.__setattr__(self, '_with_column_sub', None)

        for k, v in kwargs.iteritems():
            object.__setattr__(self, k, v)

    def __getattr__(self, key):
        
        # expand key into tuple if multi indexed columns exist or column_sub in use
        possible_keys = [key]
        if self._with_column_sub is not None:
            sub_idx = self._with_column_sub.index('[attr]')
            possible_keys.append(self._with_column_sub[:sub_idx] + (key,) + self._with_column_sub[sub_idx+1:])
        if self._obj.columns.nlevels > 1 and not isinstance(key, tuple):        
            possible_keys.append((key,) + ('',)* (self._obj.columns.nlevels-1))
            
        # try a simple column indexing, then index names, then index names without sub

        for key in possible_keys: 
            try:
                return self._obj.get_value(self._idx, key)
            except:
                pass 
            try:
                return self._idx[list(self._obj.index.names).index(key)]
            except:
                pass

        raise KeyError("Could not find key '" + "' or '".join(possible_keys) +
                       "' in column or index names.")
            
    def __getitem__(self, key):
        return self.__getattr__(key)
        
    def __setattr__(self, key, value):
        if self._with_column_sub is not None:
            sub_idx = self._with_column_sub.index('[attr]')
            key = self._with_column_sub[:sub_idx] + (key,) + self._with_column_sub[sub_idx+1:]
        elif self._obj.columns.nlevels > 1:
            key = (key,) + ('',) * (self._obj.columns.nlevels-1)
        try:
            self._obj.set_value(self._idx, key, value)
        except TypeError:
            # this execption seems to occur when value is iterable and the key
            # does not yet exist.  To overcome this, we create the key and try
            # again.
            self._obj[key] = self._obj[key].get_values().astype(object)
            self._obj.set_value(self._idx, key, value)
            
    def _update(self, dict_):
        """convenience method, equivalent to looping over values in dict and
        setting them as attributes on this row.
        """
        for k, v in dict_.items():
            self.__setattr__(k, v)

    @contextmanager
    def column_sub(self, *args, **kwargs):
        """Example::

            row.usa = 21
            with row.column_sub('europe'):
                row.uk = 34 # sets row['europe', 'uk']
            with row.column_sub('[attr]', 'male')
                row.mean_age = 21 # sets row['mean_age', 'male']
                
        applies to both getting and setting.
        
        If the required number of index levels is greater than that already
        in use, the existing column names will be padded as specified by kwarg
        ``padding=''`, with the padding added "below" existing names, this
        matches what reset index seems to do.
        
        You may want to do df.sort_index(axis=1, inplace=True) when you are finsihed.
        """
        if not args or len([a for a in args if a =='[attr]']) > len(args)-1:
            raise ValueError("you must specify at most one '[attr]' spot, and "
                             "at leat one sub label.")
        if 'padding' in kwargs:
            padding = kwargs['padding']
            if len(padding) > 1:
                raise ValueError("urecorgnised wkargs")
        else:
            padding = ""
            if kwargs:
                raise ValueError("unrecognised kwargs")
                
        # establish total levels required                     
        if '[attr]' not in args:
            args = args + ('[attr]',)
        target_nlevels = len(args)
            
        _pad_columns_to_level(self._obj, target_nlevels, padding)
                                
        object.__setattr__(self, '_with_column_sub', tuple(args))
        try:
            yield
        finally:
            object.__setattr__(self, '_with_column_sub', None)

    def __dir__(self):
        ret = set(self._obj.columns.values)
        ret.update(self.__dict__.keys())
        ret.update(dir(type(self)))
        return list(ret)

    def __repr__(self):
        ret = repr(self._obj.loc[self._idx])
        if self._with_column_sub:
            ret += "\nwith sub: " + str(self._with_column_sub)
        return ret
	class RowIndexer(object):
	def __init__(self, obj, idx, **kwargs):
	"""
	This class is syntactic sugar for performing a slow iteration over
	rows in a pandas DataFrame, where each row needs to have some (slow)
	computation performed and the results assigned to multiple columns
	in the row. Here is a bare-bones example::


	df = ... #create a DataFram
	theRow = RowIndexer(df, 3) # here we explicitly choose row 3
	theRow.something = "hello"
	theRow.other_thing = 99.2
	# df.something[3]: "hello"
	# df.other_thing[3]: 99.2

	The intended useage case is within a generator, which yields a new
	RowIndexer each time.

	In addition to the parent DataFrame and index, you can also provide
	a list of kwargs, which should be added to the RowIndexer as read-only
	attributes.

	If the dataframe's index has names, you can access those by name on
	the indexer (e.g. if ``'day'`` is one of the levels, then ``theRow.day``
	will return the value of that level of the index).
	"""
	object.__setattr__(self, '_obj', obj)
	object.__setattr__(self, '_idx', idx)
	object.__setattr__(self, '_with_column_sub', None)

	for k, v in kwargs.iteritems():
	object.__setattr__(self, k, v)

	def __getattr__(self, key):

	# expand key into tuple if multi indexed columns exist or column_sub in use
	possible_keys = [key]
	if self._with_column_sub is not None:
	sub_idx = self._with_column_sub.index('[attr]')
	possible_keys.append(self._with_column_sub[:sub_idx] + (key,) + self._with_column_sub[sub_idx+1:])
	if self._obj.columns.nlevels > 1 and not isinstance(key, tuple):
	possible_keys.append((key,) + ('',)* (self._obj.columns.nlevels-1))

	# try a simple column indexing, then index names, then index names without sub

	for key in possible_keys:
	try:
	return self._obj.get_value(self._idx, key)
	except:
	pass
	try:
	return self._idx[list(self._obj.index.names).index(key)]
	except:
	pass

	raise KeyError("Could not find key '" + "' or '".join(possible_keys) +
	"' in column or index names.")

	def __getitem__(self, key):
	return self.__getattr__(key)

	def __setattr__(self, key, value):
	if self._with_column_sub is not None:
	sub_idx = self._with_column_sub.index('[attr]')
	key = self._with_column_sub[:sub_idx] + (key,) + self._with_column_sub[sub_idx+1:]
	elif self._obj.columns.nlevels > 1:
	key = (key,) + ('',) * (self._obj.columns.nlevels-1)
	try:
	self._obj.set_value(self._idx, key, value)
	except TypeError:
	# this execption seems to occur when value is iterable and the key
	# does not yet exist. To overcome this, we create the key and try
	# again.
	self._obj[key] = self._obj[key].get_values().astype(object)
	self._obj.set_value(self._idx, key, value)

	def _update(self, dict_):
	"""convenience method, equivalent to looping over values in dict and
	setting them as attributes on this row.
	"""
	for k, v in dict_.items():
	self.__setattr__(k, v)

	@contextmanager
	def column_sub(self, args, *kwargs):
	"""Example::

	row.usa = 21
	with row.column_sub('europe'):
	row.uk = 34 # sets row['europe', 'uk']
	with row.column_sub('[attr]', 'male')
	row.mean_age = 21 # sets row['mean_age', 'male']

	applies to both getting and setting.

	If the required number of index levels is greater than that already
	in use, the existing column names will be padded as specified by kwarg
	``padding=''`, with the padding added "below" existing names, this
	matches what reset index seems to do.

	You may want to do df.sort_index(axis=1, inplace=True) when you are finsihed.
	"""
	if not args or len([a for a in args if a =='[attr]']) > len(args)-1:
	raise ValueError("you must specify at most one '[attr]' spot, and "
	"at leat one sub label.")
	if 'padding' in kwargs:
	padding = kwargs['padding']
	if len(padding) > 1:
	raise ValueError("urecorgnised wkargs")
	else:
	padding = ""
	if kwargs:
	raise ValueError("unrecognised kwargs")

	# establish total levels required
	if '[attr]' not in args:
	args = args + ('[attr]',)
	target_nlevels = len(args)

	_pad_columns_to_level(self._obj, target_nlevels, padding)

	object.__setattr__(self, '_with_column_sub', tuple(args))
	try:
	yield
	finally:
	object.__setattr__(self, '_with_column_sub', None)

	def __dir__(self):
	ret = set(self._obj.columns.values)
	ret.update(self.__dict__.keys())
	ret.update(dir(type(self)))
	return list(ret)

	def __repr__(self):
	ret = repr(self._obj.loc[self._idx])
	if self._with_column_sub:
	ret += "\nwith sub: " + str(self._with_column_sub)
	return ret