Created
June 16, 2018 14:23
-
-
Save abirjameel/179640efa45b35ad275dbb47e41ae318 to your computer and use it in GitHub Desktop.
Function for Normalizing pandas DataFrame
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def normalize(df): | |
""" | |
Function for min-max Scaling a pandas DataFrame | |
@param: | |
Takes a pandas DataFrame: df | |
Returns: a normalized DataFrame | |
along with a dict containing rescaling | |
coef which can be used in below function. | |
""" | |
result = df.copy() | |
param = dict() | |
for feature_name in df.columns: | |
param['max_value_'+str(feature_name)] = df[feature_name].max() | |
param['min_value_'+str(feature_name)] = df[feature_name].min() | |
max_value = df[feature_name].max() | |
min_value = df[feature_name].min() | |
result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value) | |
return result, param | |
def normalize_zscore(df): | |
""" | |
Function for Z-score Scaling a pandas DataFrame | |
@param: | |
Takes a pandas DataFrame: df | |
Returns: a normalized DataFrame | |
along with a dict containing rescaling | |
coef which can be used in below function. | |
""" | |
result = df.copy() | |
param = dict() | |
for feature_name in df.columns: | |
param['mean_'+str(feature_name)] = df[feature_name].mean() | |
param['std_'+str(feature_name)] = df[feature_name].std() | |
mean = df[feature_name].mean() | |
std = df[feature_name].std() | |
result[feature_name] = (df[feature_name] - mean) / std | |
return result, param | |
def denorm_zscore(normalizedDF, param): | |
""" | |
Function for rescaling a scaled DataFrame | |
@param: | |
Takes a pandas DataFrame and a dict output from above two functions | |
Returns: | |
Returns a rescaled DataFrame | |
""" | |
for feature_name in normalizedDF.columns: | |
mean = param["mean_"+str(feature_name)] | |
std = param["std_"+str(feature_name)] | |
normalizedDF[feature_name] = (normalizedDF[feature_name] * std) + mean | |
return normalizedDF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment