Olshansk · May 25, 2020 23:22
diff --git a/joint_probability_matricies_4.py b/joint_probability_matricies_4.py
 def create_joint_probability_matrix(data_GT, data_P, bins):

    # https://stackoverflow.com/questions/38931566
    def background_gradient(s, m=None, M=None, cmap='Reds', low=0, high=0):
        if m is None:
            m = s.min().min()
        if M is None:
            M = s.max().max()
        rng = M - m
        norm = colors.Normalize(m - (rng * low), M + (rng * high))
        normed = s.apply(lambda x: norm(x.values))
        cm = plt.cm.get_cmap(cmap)
        c = normed.applymap(lambda x: colors.rgb2hex(cm(x)))
        ret = c.applymap(lambda x: 'background-color: %s' % x)
        return ret
    
    assert(len(data_GT) == len(data_P))
    
    bucket_GT = pd.cut(data_GT, bins=bins, include_lowest=True, right=True)
    bucket_P = pd.cut(data_P, bins=bins, include_lowest=True, right=True)

    # Generate a pandas dataframe where the index represents the student number
    df_GT = pd.DataFrame({'bucket': bucket_GT}).reset_index()
    df_P = pd.DataFrame({'bucket': bucket_P}).reset_index()

    # Merged the actual predicted grades 
    merged_df = pd.merge(df_GT, df_P, on=['index'], suffixes=('_grouth_truth', '_predicted'))

    # Create a multi-leveled
    merged_df = merged_df.groupby(['bucket_grouth_truth', 'bucket_predicted']).count()

    # https://stackoverflow.com/a/43921476/768439
    # Convert multi-leveled pandas index into a 2d numpy array
    m, n = len(merged_df.index.levels[0]), len(merged_df.index.levels[1])
    jp_matrix = merged_df.values.reshape(m, n)
    
    # Convert counts to percentages
    bin_size = (bins[1] - bins[0])
    total = len(data_GT)  # equal to len(data_P)
    axis = np.linspace(bin_size/2, bins[-1] + bin_size/2, len(bins))[:-1]
    jp_df = pd.DataFrame(jp_matrix, columns=axis, index=axis)
    jp_df = jp_df.applymap(lambda val: 0 if math.isnan(val) else (val / len(data_GT)))
    
    # Format the output table
    jp_df.columns.name = 'Predicted'
    jp_df.index.name = 'Ground Truth'
    formatted_df = jp_df.style.set_caption("Probability Distribution Matrix").apply(background_gradient, high=1, axis=None).format("{:.2f}")
    
    return formatted_df
	def create_joint_probability_matrix(data_GT, data_P, bins):

	# https://stackoverflow.com/questions/38931566
	def background_gradient(s, m=None, M=None, cmap='Reds', low=0, high=0):
	if m is None:
	m = s.min().min()
	if M is None:
	M = s.max().max()
	rng = M - m
	norm = colors.Normalize(m - (rng * low), M + (rng * high))
	normed = s.apply(lambda x: norm(x.values))
	cm = plt.cm.get_cmap(cmap)
	c = normed.applymap(lambda x: colors.rgb2hex(cm(x)))
	ret = c.applymap(lambda x: 'background-color: %s' % x)
	return ret

	assert(len(data_GT) == len(data_P))

	bucket_GT = pd.cut(data_GT, bins=bins, include_lowest=True, right=True)
	bucket_P = pd.cut(data_P, bins=bins, include_lowest=True, right=True)

	# Generate a pandas dataframe where the index represents the student number
	df_GT = pd.DataFrame({'bucket': bucket_GT}).reset_index()
	df_P = pd.DataFrame({'bucket': bucket_P}).reset_index()

	# Merged the actual predicted grades
	merged_df = pd.merge(df_GT, df_P, on=['index'], suffixes=('_grouth_truth', '_predicted'))

	# Create a multi-leveled
	merged_df = merged_df.groupby(['bucket_grouth_truth', 'bucket_predicted']).count()

	# https://stackoverflow.com/a/43921476/768439
	# Convert multi-leveled pandas index into a 2d numpy array
	m, n = len(merged_df.index.levels[0]), len(merged_df.index.levels[1])
	jp_matrix = merged_df.values.reshape(m, n)

	# Convert counts to percentages
	bin_size = (bins[1] - bins[0])
	total = len(data_GT) # equal to len(data_P)
	axis = np.linspace(bin_size/2, bins[-1] + bin_size/2, len(bins))[:-1]
	jp_df = pd.DataFrame(jp_matrix, columns=axis, index=axis)
	jp_df = jp_df.applymap(lambda val: 0 if math.isnan(val) else (val / len(data_GT)))

	# Format the output table
	jp_df.columns.name = 'Predicted'
	jp_df.index.name = 'Ground Truth'
	formatted_df = jp_df.style.set_caption("Probability Distribution Matrix").apply(background_gradient, high=1, axis=None).format("{:.2f}")

	return formatted_df
No results found