Skip to content

Instantly share code, notes, and snippets.

@Olshansk
Last active May 25, 2020 23:22
Show Gist options
  • Select an option

  • Save Olshansk/3f2acd5715725029679d0d9d0430900e to your computer and use it in GitHub Desktop.

Select an option

Save Olshansk/3f2acd5715725029679d0d9d0430900e to your computer and use it in GitHub Desktop.
Joint Probability Matrices - Create Transition Matrix Function
def create_joint_probability_matrix(data_GT, data_P, bins):
# https://stackoverflow.com/questions/38931566
def background_gradient(s, m=None, M=None, cmap='Reds', low=0, high=0):
if m is None:
m = s.min().min()
if M is None:
M = s.max().max()
rng = M - m
norm = colors.Normalize(m - (rng * low), M + (rng * high))
normed = s.apply(lambda x: norm(x.values))
cm = plt.cm.get_cmap(cmap)
c = normed.applymap(lambda x: colors.rgb2hex(cm(x)))
ret = c.applymap(lambda x: 'background-color: %s' % x)
return ret
assert(len(data_GT) == len(data_P))
bucket_GT = pd.cut(data_GT, bins=bins, include_lowest=True, right=True)
bucket_P = pd.cut(data_P, bins=bins, include_lowest=True, right=True)
# Generate a pandas dataframe where the index represents the student number
df_GT = pd.DataFrame({'bucket': bucket_GT}).reset_index()
df_P = pd.DataFrame({'bucket': bucket_P}).reset_index()
# Merged the actual predicted grades
merged_df = pd.merge(df_GT, df_P, on=['index'], suffixes=('_grouth_truth', '_predicted'))
# Create a multi-leveled
merged_df = merged_df.groupby(['bucket_grouth_truth', 'bucket_predicted']).count()
# https://stackoverflow.com/a/43921476/768439
# Convert multi-leveled pandas index into a 2d numpy array
m, n = len(merged_df.index.levels[0]), len(merged_df.index.levels[1])
jp_matrix = merged_df.values.reshape(m, n)
# Convert counts to percentages
bin_size = (bins[1] - bins[0])
total = len(data_GT) # equal to len(data_P)
axis = np.linspace(bin_size/2, bins[-1] + bin_size/2, len(bins))[:-1]
jp_df = pd.DataFrame(jp_matrix, columns=axis, index=axis)
jp_df = jp_df.applymap(lambda val: 0 if math.isnan(val) else (val / len(data_GT)))
# Format the output table
jp_df.columns.name = 'Predicted'
jp_df.index.name = 'Ground Truth'
formatted_df = jp_df.style.set_caption("Probability Distribution Matrix").apply(background_gradient, high=1, axis=None).format("{:.2f}")
return formatted_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment