Created
May 10, 2016 05:33
-
-
Save fluffywaffles/50af362c19dda63cb921d3895c077785 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def mean(lst): | |
''' | |
Calculate the mean of the input list. | |
''' | |
l = len(lst) | |
return float(sum(lst)) / l if l > 0 else None | |
def list_mode(lst): | |
return max(lst, key=lst.count) | |
def bias_replace_missing_with_avg(data_set, attribute_metadata): | |
''' | |
For some reason, this is my longest function. | |
It's all the partitioning and partition undoing. | |
Replace 'None' values (missing values) with the average of all existing | |
values for that attribute. | |
''' | |
notNone = lambda x: x is not None | |
bestGuess = lambda x, nominal: mean(x) if nominal else list_mode(x) | |
partitioned_by_attr = [ | |
(bestGuess(filter(notNone, attr_values), data["is_nominal"]), attr_values) | |
for (attr_values, data) in [ | |
(getall(data_set, attr), attribute_metadata[attr]) | |
for attr in range(len(data_set[0])) | |
] | |
] | |
filled_in = [ | |
[ bestGuess if value is None else value for value in values ] | |
for (bestGuess, values) in partitioned_by_attr | |
] | |
return [ | |
[ biased_attr_values[i] for biased_attr_values in filled_in ] | |
for i in range(len(data_set)) | |
] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment