Skip to content

Instantly share code, notes, and snippets.

@VictorBezak
Last active December 6, 2020 16:31
Show Gist options
  • Save VictorBezak/e6cb5c113a758e965f0e852e237bebe0 to your computer and use it in GitHub Desktop.
Save VictorBezak/e6cb5c113a758e965f0e852e237bebe0 to your computer and use it in GitHub Desktop.
Box Plot Statistics
def box_describe(df_copy):
df_stats = df_copy.describe().T
df_stats.rename(columns = {
'25%': 'q1',
'50%': 'q2',
'75%': 'q3'
}, inplace = True)
df_stats['iqr'] = df_stats['q3'] - df_stats['q1']
df_stats['lower_fence'] = df_stats['q1'] - (1.5 * df_stats['iqr']) # Lower Fence
df_stats['upper_fence'] = df_stats['q3'] + (1.5 * df_stats['iqr']) # Upper Fence
def get_outliers(series):
lower_outliers = (series < df_stats.loc[series.name].lower_fence).sum()
upper_outliers = (series > df_stats.loc[series.name].upper_fence).sum()
return lower_outliers + upper_outliers
df_stats['outliers'] = df_copy.apply(lambda x: get_outliers(x))
df_stats['outlier%'] = (df_stats['outliers'] / df_stats['count']) * 100
return df_stats
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment