Created
November 26, 2019 06:37
-
-
Save analyticsindiamagazine/88c6aa8ace948f17e64b31fc652e1a5d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#A function to find all the non numeric values | |
def non_numerals(series): | |
non_numerals = [] | |
for i in series.unique(): | |
try : | |
i = float(i) | |
except: | |
non_numerals.append(i) | |
return non_numerals |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A function to replace the non-numeric values | |
def replace_nn_with(series, type_, fill_with = None, method = 'mean'): | |
nn = non_numerals(series) | |
print('-'*30) | |
print('-'*30) | |
print("Non Numerals in column ",series.name," : ",nn) | |
series = series.replace(nn, np.nan, inplace = False) | |
nulls = series.isnull().sum() | |
if fill_with: | |
series.fillna(fill_with, inplace = True) | |
print("Filling Non Numerals with {}".format(fill_with)) | |
else: | |
series = series.replace(nn, np.nan, inplace = False) | |
if method == 'mean' : | |
rep = series.astype(float).mean() | |
print("Filling Non Numerals with MEAN = ", rep) | |
elif method == 'median' : | |
rep = series.astype(float).median() | |
print("Filling Non Numerals with MEDIAN = ", rep) | |
elif method == 'min' : | |
rep = series.astype(float).min() | |
print("Filling Non Numerals with MINIMUM = ", rep) | |
else: | |
print('Please pass a valid method as a string -- ("mean" or "median" or "min")') | |
return 0 | |
series.fillna(rep, inplace = True) | |
try: | |
series = series.astype(type_) | |
print(nulls, ": observations replaced") | |
return series | |
except: | |
# Since type conversion of a string containting decimals to int is not possible, it is first converted to float | |
series = series.astype(float) | |
print(nulls, ": observations replaced") | |
series = series.astype(type_) | |
return series |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment