Skip to content

Instantly share code, notes, and snippets.

@gituser768
Last active January 16, 2022 20:18
Show Gist options
  • Save gituser768/5b69fd0e24482857bf54ed417f554b72 to your computer and use it in GitHub Desktop.
Save gituser768/5b69fd0e24482857bf54ed417f554b72 to your computer and use it in GitHub Desktop.
Simulation of variance of estimate of F1 as precision and recall vary from 0 to 1
from collections import defaultdict
import numpy as np
import numpy.linalg as la
import pandas as pd
import numpy.random as rn
import matplotlib.pyplot as plt
def hmean(a, b):
return np.nan_to_num(2 / (1/a + 1/b))
def main():
p = 0.5
n_iter = 10000
r_steps = 30
p_steps = 40
recall = np.linspace(0, 1, r_steps + 1)[1:]
precision = np.linspace(0, 1, p_steps + 1)[1:]
p_1_0 = recall * p / precision[:, np.newaxis] - recall * p
recall_neg = p_1_0 / (1 - p)
f1s = defaultdict(list)
num_samples = [100]
for i in range(n_iter):
for n in num_samples:
pos = (rn.rand(n) > p)
pred_tpos = (rn.rand(n, r_steps) < recall) & pos[:, np.newaxis]
pred_tneg = (rn.rand(n, p_steps, r_steps) < recall_neg) & ~pos[:, np.newaxis, np.newaxis]
pred = pred_tpos[:, np.newaxis] | pred_tneg
recall_hat = pred_tpos.sum(0) / pos.sum(0)
precision_hat = pred_tpos.sum(0) / pred.sum(0)
f1s[n].append(hmean(np.tile(recall_hat, p_steps), precision_hat.reshape(-1)).reshape(*precision_hat.shape))
for n in num_samples:
plt.contourf(recall, precision, np.stack(f1s[n]).std(0))
plt.figure()
plt.show()
if __name__ == "__main__": main()
@gituser768
Copy link
Author

This is missing:
p≥πr/(1−π+πr)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment