Last active
December 12, 2021 23:53
-
-
Save Brideau/15ec54668ea15cb0eada554bae7d6f13 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import faker as f | |
import numpy as np | |
from numpy.random import default_rng | |
from pandas import DataFrame | |
from scipy.stats import truncnorm | |
from sklearn.metrics import average_precision_score | |
fake_bad_actor_generator = f.Faker() | |
rng = default_rng() | |
def generate_potential_bad_actor(position): | |
""" | |
The details of this function don't really matter. It just generates a | |
random 'potential bad actor' and labels it with whether it was actually | |
determined to be fraudulent, and how likely the model though there were | |
fraudulent. | |
""" | |
return { | |
"name": fake_bad_actor_generator.name(), | |
"email": fake_bad_actor_generator.ascii_email(), | |
"is_fraudulent": rng.binomial(1, (1000.0 - position) / 1000.0), | |
"probability_of_fraudulent": round( | |
truncnorm.rvs(a=0.0, b=(1000.0 - position) / 1000.0), 2 | |
), | |
} | |
ordered_fraud_list = [generate_potential_bad_actor(r) for r in np.arange(0, 1000)] | |
ordered_fraud_df = DataFrame(ordered_fraud_list).sample(frac=1) | |
# Calculate Average Precision | |
average_precision_score( | |
y_true=ordered_fraud_df["is_fraudulent"], | |
y_score=ordered_fraud_df["probability_of_fraudulent"], | |
) | |
def precision_at_k(y_true, y_score, k, pos_label=1): | |
from sklearn.utils import column_or_1d | |
from sklearn.utils.multiclass import type_of_target | |
y_true_type = type_of_target(y_true) | |
if not (y_true_type == "binary"): | |
raise ValueError("y_true must be a binary column.") | |
# Makes this compatible with various array types | |
y_true_arr = column_or_1d(y_true) | |
y_score_arr = column_or_1d(y_score) | |
y_true_arr = y_true_arr == pos_label | |
desc_sort_order = np.argsort(y_score_arr)[::-1] | |
y_true_sorted = y_true_arr[desc_sort_order] | |
y_score_sorted = y_score_arr[desc_sort_order] | |
true_positives = y_true_sorted[:k].sum() | |
return true_positives / k | |
precision_at_k( | |
y_true=ordered_fraud_df["is_fraudulent"], | |
y_score=ordered_fraud_df["probability_of_fraudulent"], | |
k=40 | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment