DGrady · August 18, 2018 04:16 · Syrus · Aug 18, 2018
diff --git a/describe_population.py b/describe_population.py
 import pandas as pd

 def describe_population(df: pd.DataFrame) -> pd.DataFrame:
    """
    Report the populated and uniqueness counts for each column of the input.
    
    The ratio columns are given as percents.
    """

    N = len(df)

    dtypes = df.dtypes

    distincts = df.nunique()
    nas = df.isnull().sum()
    pop = N - nas

    out = pd.DataFrame()

    out['dtype'] = dtypes

    out['na'] = nas
    out['populated'] = pop
    out['distinct'] = distincts

    out['pop/N'] = 100 * pop / N
    out['dist/pop'] = 100 * distincts / pop

    out.columns.name = "N = {:,}".format(N)

    return out
	import pandas as pd

	def describe_population(df: pd.DataFrame) -> pd.DataFrame:
	"""
	Report the populated and uniqueness counts for each column of the input.

	The ratio columns are given as percents.
	"""

	N = len(df)

	dtypes = df.dtypes

	distincts = df.nunique()
	nas = df.isnull().sum()
	pop = N - nas

	out = pd.DataFrame()

	out['dtype'] = dtypes

	out['na'] = nas
	out['populated'] = pop
	out['distinct'] = distincts

	out['pop/N'] = 100 * pop / N
	out['dist/pop'] = 100 * distincts / pop

	out.columns.name = "N = {:,}".format(N)

	return out
No results found