fluffywaffles · May 10, 2016 05:33
diff --git a/bias2.py b/bias2.py
 def mean(lst):
    '''
    Calculate the mean of the input list.
    '''
    l = len(lst)
    return float(sum(lst)) / l if l > 0 else None


 def list_mode(lst):
    return max(lst, key=lst.count)


 def bias_replace_missing_with_avg(data_set, attribute_metadata):
    '''
    For some reason, this is my longest function.
    It's all the partitioning and partition undoing.

    Replace 'None' values (missing values) with the average of all existing
    values for that attribute.
    '''
    notNone = lambda x: x is not None
    bestGuess = lambda x, nominal: mean(x) if nominal else list_mode(x)

    partitioned_by_attr = [
        (bestGuess(filter(notNone, attr_values), data["is_nominal"]), attr_values)
        for (attr_values, data) in [
            (getall(data_set, attr), attribute_metadata[attr])
            for attr in range(len(data_set[0]))
        ]
    ]

    filled_in = [
        [ bestGuess if value is None else value for value in values ]
        for (bestGuess, values) in partitioned_by_attr
    ]

    return [
        [ biased_attr_values[i] for biased_attr_values in filled_in ]
        for i in range(len(data_set))
    ]
	def mean(lst):
	'''
	Calculate the mean of the input list.
	'''
	l = len(lst)
	return float(sum(lst)) / l if l > 0 else None


	def list_mode(lst):
	return max(lst, key=lst.count)


	def bias_replace_missing_with_avg(data_set, attribute_metadata):
	'''
	For some reason, this is my longest function.
	It's all the partitioning and partition undoing.

	Replace 'None' values (missing values) with the average of all existing
	values for that attribute.
	'''
	notNone = lambda x: x is not None
	bestGuess = lambda x, nominal: mean(x) if nominal else list_mode(x)

	partitioned_by_attr = [
	(bestGuess(filter(notNone, attr_values), data["is_nominal"]), attr_values)
	for (attr_values, data) in [
	(getall(data_set, attr), attribute_metadata[attr])
	for attr in range(len(data_set[0]))
	]
	]

	filled_in = [
	[ bestGuess if value is None else value for value in values ]
	for (bestGuess, values) in partitioned_by_attr
	]

	return [
	[ biased_attr_values[i] for biased_attr_values in filled_in ]
	for i in range(len(data_set))
	]