Last active
October 4, 2021 15:29
-
-
Save gituser768/f000942c4060c7ed9d4dd63f477765f1 to your computer and use it in GitHub Desktop.
Estimating AUC using only positive and unknown examples
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.metrics import roc_auc_score | |
y_true = np.r_[np.ones(10000), np.zeros(10000)] | |
alpha = 0.5 | |
beta = 0.2 | |
y_known = np.r_[(np.random.rand(10000) < alpha).astype(np.float), | |
(np.random.rand(10000) < beta).astype(np.float)] | |
# rankings = [np.random.permutation(2000) for i in range(100)] | |
rankings = [-np.arange(20000) + 100 *np.random.randn(20000) * i for i in range(100)] | |
true_scores = [roc_auc_score(y_true, y_pred) for y_pred in rankings] | |
est_scores = [roc_auc_score(y_known, y_pred) for y_pred in rankings] | |
import matplotlib.pyplot as plt | |
plt.scatter(est_scores, true_scores) | |
plt.figure() | |
plt.plot(est_scores) # this converges slowly but eventually is an affine transform of true_scores | |
plt.plot(true_scores) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment