Skip to content

Instantly share code, notes, and snippets.

@adhadse
Created April 5, 2021 08:54
Show Gist options
  • Save adhadse/2f462c5a6c820e4184eaac604fa8c8a8 to your computer and use it in GitHub Desktop.
Save adhadse/2f462c5a6c820e4184eaac604fa8c8a8 to your computer and use it in GitHub Desktop.
def check_outliers_iqr(dataframe, col_name):
lower_limit, upper_limit = determine_outlier_thresholds_iqr(dataframe, col_name)
if dataframe[(dataframe[col_name] > upper_limit) | (dataframe[col_name] < lower_limit)].any(axis=None):
return True
else:
return False
def replace_with_thresholds_iqr(dataframe,cols, th1=0.05, th3=0.95, replace=False):
from tabulate import tabulate
data = []
for col_name in cols:
if col_name != 'Outcome':
outliers_ = check_outliers_iqr(df,col_name)
count = None
lower_limit, upper_limit = determine_outlier_thresholds_iqr(dataframe, col_name, th1, th3)
if outliers_:
count = dataframe[(dataframe[col_name] > upper_limit) | (dataframe[col_name] < lower_limit)][col].count()
if replace:
if lower_limit < 0:
# We don't want to replace with negative values, right!
dataframe.loc[(dataframe[col_name] > upper_limit), col_name] = upper_limit
else:
dataframe.loc[(dataframe[col_name] < lower_limit), col_name] = lower_limit
dataframe.loc[(dataframe[col_name] > upper_limit), col_name] = upper_limit
outliers_status = check_outliers_iqr(df, col_name)
data.append([outliers_, outliers_status, count, col_name, lower_limit, upper_limit ])
table = tabulate(data, headers=['Outliers (Previously)', 'Outliers', 'Count', 'Column', 'Lower Limit', 'Upper Limit'], tablefmt='rst', numalign='right')
print("Removing Outliers using IQR")
print(table)
replace_with_thresholds_iqr(df, df.columns)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment