Created
July 4, 2016 05:53
-
-
Save dsal1951/896868a448490f90516a3d241f1fdf4f to your computer and use it in GitHub Desktop.
Data needed for a Lift chart (aka Gains chart) for a predictive model created using Sklearn and Matplotlib
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def calc_lift(x,y,clf,bins=10): | |
""" | |
Takes input arrays and trained SkLearn Classifier and returns a Pandas | |
DataFrame with the average lift generated by the model in each bin | |
Parameters | |
------------------- | |
x: Numpy array or Pandas Dataframe with shape = [n_samples, n_features] | |
y: A 1-d Numpy array or Pandas Series with shape = [n_samples] | |
IMPORTANT: Code is only configured for binary target variable | |
of 1 for success and 0 for failure | |
clf: A trained SkLearn classifier object | |
bins: Number of equal sized buckets to divide observations across | |
Default value is 10 | |
""" | |
#Actual Value of y | |
y_actual = y | |
#Predicted Probability that y = 1 | |
y_prob = clf.predict_proba(x) | |
#Predicted Value of Y | |
y_pred = clf.predict(x) | |
cols = ['ACTUAL','PROB_POSITIVE','PREDICTED'] | |
data = [y_actual,y_prob[:,1],y_pred] | |
df = pd.DataFrame(dict(zip(cols,data))) | |
#Observations where y=1 | |
total_positive_n = df['ACTUAL'].sum() | |
#Total Observations | |
total_n = df.index.size | |
natural_positive_prob = total_positive_n/float(total_n) | |
#Create Bins where First Bin has Observations with the | |
#Highest Predicted Probability that y = 1 | |
df['BIN_POSITIVE'] = pd.qcut(df['PROB_POSITIVE'],bins,labels=False) | |
pos_group_df = df.groupby('BIN_POSITIVE') | |
#Percentage of Observations in each Bin where y = 1 | |
lift_positive = pos_group_df['ACTUAL'].sum()/pos_group_df['ACTUAL'].count() | |
lift_index_positive = (lift_positive/natural_positive_prob)*100 | |
#Consolidate Results into Output Dataframe | |
lift_df = pd.DataFrame({'LIFT_POSITIVE':lift_positive, | |
'LIFT_POSITIVE_INDEX':lift_index_positive, | |
'BASELINE_POSITIVE':natural_positive_prob}) | |
return lift_df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment