adhadse · April 5, 2021 08:54
diff --git a/checking_outliers_and_limiting_them.py b/checking_outliers_and_limiting_them.py
 def check_outliers_iqr(dataframe, col_name):
    lower_limit, upper_limit = determine_outlier_thresholds_iqr(dataframe, col_name)
    if dataframe[(dataframe[col_name] > upper_limit) | (dataframe[col_name] < lower_limit)].any(axis=None):
        return True
    else: 
        return False

 def replace_with_thresholds_iqr(dataframe,cols, th1=0.05, th3=0.95, replace=False):
    from tabulate import tabulate
    data = []
    for col_name in cols:
        if col_name != 'Outcome':
            outliers_ = check_outliers_iqr(df,col_name)
            count = None
            lower_limit, upper_limit = determine_outlier_thresholds_iqr(dataframe, col_name, th1, th3)
            if outliers_:
                count = dataframe[(dataframe[col_name] > upper_limit) | (dataframe[col_name] < lower_limit)][col].count()
                if replace: 
                    if lower_limit < 0:
                        # We don't want to replace with negative values, right!
                        dataframe.loc[(dataframe[col_name] > upper_limit), col_name] = upper_limit
                    else:
                        dataframe.loc[(dataframe[col_name] < lower_limit), col_name] = lower_limit
                        dataframe.loc[(dataframe[col_name] > upper_limit), col_name] = upper_limit
            outliers_status = check_outliers_iqr(df, col_name)
            data.append([outliers_, outliers_status, count, col_name, lower_limit, upper_limit ])
    table = tabulate(data, headers=['Outliers (Previously)', 'Outliers', 'Count', 'Column', 'Lower Limit', 'Upper Limit'], tablefmt='rst', numalign='right')
    print("Removing Outliers using IQR")
    print(table)
    
 replace_with_thresholds_iqr(df, df.columns)
	def check_outliers_iqr(dataframe, col_name):
	lower_limit, upper_limit = determine_outlier_thresholds_iqr(dataframe, col_name)
	if dataframe[(dataframe[col_name] > upper_limit) \| (dataframe[col_name] < lower_limit)].any(axis=None):
	return True
	else:
	return False

	def replace_with_thresholds_iqr(dataframe,cols, th1=0.05, th3=0.95, replace=False):
	from tabulate import tabulate
	data = []
	for col_name in cols:
	if col_name != 'Outcome':
	outliers_ = check_outliers_iqr(df,col_name)
	count = None
	lower_limit, upper_limit = determine_outlier_thresholds_iqr(dataframe, col_name, th1, th3)
	if outliers_:
	count = dataframe[(dataframe[col_name] > upper_limit) \| (dataframe[col_name] < lower_limit)][col].count()
	if replace:
	if lower_limit < 0:
	# We don't want to replace with negative values, right!
	dataframe.loc[(dataframe[col_name] > upper_limit), col_name] = upper_limit
	else:
	dataframe.loc[(dataframe[col_name] < lower_limit), col_name] = lower_limit
	dataframe.loc[(dataframe[col_name] > upper_limit), col_name] = upper_limit
	outliers_status = check_outliers_iqr(df, col_name)
	data.append([outliers_, outliers_status, count, col_name, lower_limit, upper_limit ])
	table = tabulate(data, headers=['Outliers (Previously)', 'Outliers', 'Count', 'Column', 'Lower Limit', 'Upper Limit'], tablefmt='rst', numalign='right')
	print("Removing Outliers using IQR")
	print(table)

	replace_with_thresholds_iqr(df, df.columns)