echasnovski · June 17, 2020 10:18
diff --git a/python-snippets-from-r.py b/python-snippets-from-r.py
 import numpy as np
 import pandas as pd


 def nest(df, cols, nest_name="data", keep=False):
    """Nest non-grouping columns into a list-column of data frames
    
    Parameters
    ----------
    df : Data frame.
    cols : List of column names.
        Columns specifying grouping.
    nest_name : String, optional.
        Name of list-column of data frames, by default "data".
    keep : Boolean, optional.
        Should the grouping columns be kept in the list column, by default `False`.
    
    Returns
    -------
    res : Data frame with nested columns.
    """
    df_grouped = df.groupby(cols)
    group_keys = df_grouped.groups.keys()
    res = pd.DataFrame(group_keys, columns=cols)
    if keep:
        res[nest_name] = [df_grouped.get_group(key) for key in group_keys]
    else:
        res[nest_name] = [
            df_grouped.get_group(key).drop(cols, axis=1) for key in group_keys
        ]

    return res


 def unnest(df, col):
    """Unnest previously nested column
    
    Parameters
    ----------
    df : Data frame with list-column of data frames.
        Preferably output of `nest()`.
    col : String.
        Name of column to unnest
    
    Returns
    -------
    res : Data frame with unnested column.
    """
    # Concatenate by row data frames from nested column
    nest_part = pd.concat(df[col].values, axis=0).reset_index(drop=True)

    # Repeat rows of the rest part of data frame by the lengths of
    # corresponding unnested data frames
    sub_df_lens = [len(sub_df) for sub_df in df[col]]
    row_inds = np.repeat(np.arange(len(df)), sub_df_lens)
    col_inds = (~df.columns.isin([col])).nonzero()[0]
    rest_part = df.iloc[row_inds, col_inds].reset_index(drop=True)

    return pd.concat([rest_part, nest_part], axis=1)

  
 def complete(df, cols, fill_val=None):
    """Complete a data frame with missing combinations of columns

    Parameters
    ----------
    df : Data frame.
    cols : List of strings or dictionary.
        If list, should contain column names, combinations of which should be
        added. If dictionary, keys represent column names and values - unique
        values of corresponding columns (if `None`, they will be inferred from
        columns with names stored in keys).
    fill_val : Value appropriate for `value` in `fillna()` DataFrame method.
        Values to use in other columns inside added combinations.
    
    Returns
    -------
    res : Data frame.

    Examples
    --------
    >>> df = pd.DataFrame({"a": [1, 2], "b": ["x", "y"], "c": [False, True]})
    # Basic completion from values present in data frame
    >>> df1 = complete(df, ["a", "b"])
    # Order of columns matter to ordering
    >>> df2 = complete(df, ["b", "a"])
    # Using dict to mimic list input
    >>> df3 = complete(df, {"a": None, "b": None})
    # Extended functionality with dict `cols`
    >>> df4 = complete(df, {"a": [3, 2, 1], "b": ["x", "y", "z"]})
    # Using `fill_val`
    >>> df5 = complete(df, {"b": ["x", "y", "z"]}, fill_val={"a": 0, "c": False})
    """

    def col_sort_unique(col, val=None):
        if val is None:
            val = np.sort(df[col].unique())
        return val

    if isinstance(cols, list):
        col_names = cols
        unique_col_vals = [col_sort_unique(col) for col in cols]
    elif isinstance(cols, dict):
        col_names = cols.keys()
        unique_col_vals = [col_sort_unique(col, val) for col, val in cols.items()]
    else:
        ValueError("`cols` should be list or dict.")

    comb = pd.MultiIndex.from_product(unique_col_vals, names=col_names)

    res = pd.DataFrame(index=comb).reset_index().merge(df, how="left")
    if fill_val is not None:
        res = res.fillna(fill_val)

    return res[df.columns]
diff --git a/python-snippets.py b/python-snippets.py
 def corners(a):
    """Extract corners from ndarray

    Extract first and last elements (or single if axis has shape one) along all
    dimensions. In other words, output elements are taken from `a` at indexes
    with at least one index is "extreme".

    Parameters
    ----------
    a : ndarray

    Returns
    -------
    corners : ndarray
        Has the same shape as `a`.

    Examples
    --------
    >>> a = np.arange(24).reshape((2, 3, 4))
    >>> corners(a)
    array([[[ 0,  3],
            [ 8, 11]],

           [[12, 15],
            [20, 23]]])
    >>> corners(a[0])
    array([[ 0,  3],
           [ 8, 11]])
    >>> corners(a[0, 0])
    array([0, 3])
    >>> corners(a[[0], 0])
    array([[0, 3]])
    """
    from itertools import product

    # Define extreme indices
    corner_inds = [[0, -1] if d > 1 else [0] for d in a.shape]
    dims = tuple(len(i) for i in corner_inds)

    # Construct subsetting tuple
    inds = product(*corner_inds)
    inds = tuple(list(x) for x in zip(*inds))

    # Return corners with the same shape as `a`
    return a[inds].reshape(dims)
	import numpy as np
	import pandas as pd


	def nest(df, cols, nest_name="data", keep=False):
	"""Nest non-grouping columns into a list-column of data frames

	Parameters
	----------
	df : Data frame.
	cols : List of column names.
	Columns specifying grouping.
	nest_name : String, optional.
	Name of list-column of data frames, by default "data".
	keep : Boolean, optional.
	Should the grouping columns be kept in the list column, by default `False`.

	Returns
	-------
	res : Data frame with nested columns.
	"""
	df_grouped = df.groupby(cols)
	group_keys = df_grouped.groups.keys()
	res = pd.DataFrame(group_keys, columns=cols)
	if keep:
	res[nest_name] = [df_grouped.get_group(key) for key in group_keys]
	else:
	res[nest_name] = [
	df_grouped.get_group(key).drop(cols, axis=1) for key in group_keys
	]

	return res


	def unnest(df, col):
	"""Unnest previously nested column

	Parameters
	----------
	df : Data frame with list-column of data frames.
	Preferably output of `nest()`.
	col : String.
	Name of column to unnest

	Returns
	-------
	res : Data frame with unnested column.
	"""
	# Concatenate by row data frames from nested column
	nest_part = pd.concat(df[col].values, axis=0).reset_index(drop=True)

	# Repeat rows of the rest part of data frame by the lengths of
	# corresponding unnested data frames
	sub_df_lens = [len(sub_df) for sub_df in df[col]]
	row_inds = np.repeat(np.arange(len(df)), sub_df_lens)
	col_inds = (~df.columns.isin([col])).nonzero()[0]
	rest_part = df.iloc[row_inds, col_inds].reset_index(drop=True)

	return pd.concat([rest_part, nest_part], axis=1)


	def complete(df, cols, fill_val=None):
	"""Complete a data frame with missing combinations of columns

	Parameters
	----------
	df : Data frame.
	cols : List of strings or dictionary.
	If list, should contain column names, combinations of which should be
	added. If dictionary, keys represent column names and values - unique
	values of corresponding columns (if `None`, they will be inferred from
	columns with names stored in keys).
	fill_val : Value appropriate for `value` in `fillna()` DataFrame method.
	Values to use in other columns inside added combinations.

	Returns
	-------
	res : Data frame.

	Examples
	--------
	>>> df = pd.DataFrame({"a": [1, 2], "b": ["x", "y"], "c": [False, True]})
	# Basic completion from values present in data frame
	>>> df1 = complete(df, ["a", "b"])
	# Order of columns matter to ordering
	>>> df2 = complete(df, ["b", "a"])
	# Using dict to mimic list input
	>>> df3 = complete(df, {"a": None, "b": None})
	# Extended functionality with dict `cols`
	>>> df4 = complete(df, {"a": [3, 2, 1], "b": ["x", "y", "z"]})
	# Using `fill_val`
	>>> df5 = complete(df, {"b": ["x", "y", "z"]}, fill_val={"a": 0, "c": False})
	"""

	def col_sort_unique(col, val=None):
	if val is None:
	val = np.sort(df[col].unique())
	return val

	if isinstance(cols, list):
	col_names = cols
	unique_col_vals = [col_sort_unique(col) for col in cols]
	elif isinstance(cols, dict):
	col_names = cols.keys()
	unique_col_vals = [col_sort_unique(col, val) for col, val in cols.items()]
	else:
	ValueError("`cols` should be list or dict.")

	comb = pd.MultiIndex.from_product(unique_col_vals, names=col_names)

	res = pd.DataFrame(index=comb).reset_index().merge(df, how="left")
	if fill_val is not None:
	res = res.fillna(fill_val)

	return res[df.columns]
	def corners(a):
	"""Extract corners from ndarray

	Extract first and last elements (or single if axis has shape one) along all
	dimensions. In other words, output elements are taken from `a` at indexes
	with at least one index is "extreme".

	Parameters
	----------
	a : ndarray

	Returns
	-------
	corners : ndarray
	Has the same shape as `a`.

	Examples
	--------
	>>> a = np.arange(24).reshape((2, 3, 4))
	>>> corners(a)
	array([[[ 0, 3],
	[ 8, 11]],

	[[12, 15],
	[20, 23]]])
	>>> corners(a[0])
	array([[ 0, 3],
	[ 8, 11]])
	>>> corners(a[0, 0])
	array([0, 3])
	>>> corners(a[[0], 0])
	array([[0, 3]])
	"""
	from itertools import product

	# Define extreme indices
	corner_inds = [[0, -1] if d > 1 else [0] for d in a.shape]
	dims = tuple(len(i) for i in corner_inds)

	# Construct subsetting tuple
	inds = product(*corner_inds)
	inds = tuple(list(x) for x in zip(*inds))

	# Return corners with the same shape as `a`
	return a[inds].reshape(dims)