Last active
July 2, 2020 12:13
-
-
Save adityajn105/19ab9cbee5d6b4b23c78cf517699d56c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Decile chart, LIFT, KS | |
def getDeciles(models, X, y, n=10): | |
probs = predict_probas(gbms,X)[:,1] | |
cuts = pd.qcut(probs,n) | |
df = pd.DataFrame({'DECILE':cuts,"P":probs,'Y':y}).groupby('DECILE').Y.value_counts().unstack(fill_value=0)[::-1] | |
df.index = [ f"{i+1} {df.index[i]}" for i in range(n) ] | |
df.columns = ['ZEROS','ONES'] | |
df['POPULATION'] = df.ZEROS+df.ONES | |
df['EVENT_RATE'] = df.ONES/df.POPULATION | |
df['CUMULATIVE_EVENT_RATE'] = df.ONES.cumsum()/df.POPULATION.cumsum() | |
df['EVENTS_CAPTURED'] = df.ONES.cumsum() / df.ONES.sum() | |
df['NON_EVENT_CAPTURED'] = df.ZEROS.cumsum() / df.ZEROS.sum() | |
df['LIFT'] = df.EVENTS_CAPTURED*df.POPULATION.sum() / df.POPULATION.cumsum() | |
df['KS'] = df.EVENTS_CAPTURED - df.NON_EVENT_CAPTURED | |
return df | |
def calibration_plot(yi, pi, bins:int=10, plot=True, show_trend=True, show_bin_size = False, figsize=(8,5) ): | |
""" | |
yi : Actual Targets | |
pi : predicted probabilities | |
bins : Number of bins (Default 10) | |
plot : Give a scatter plot | |
else return fraction of positives, mean of probabilities, sizes of bin | |
show_trend : show trend using lines (Default True) | |
show_bin_size : Show overlayed figure with size of bins (Default False) | |
figsize : size of figure (Default (8,5)) | |
""" | |
bins = np.linspace(0,1,bins+1) | |
labels = [ f"{s:.2f}-{e:.2f}".replace("0.",".") for s,e in zip(bins[:-1],bins[1:]) ] | |
data = pd.DataFrame( {"yi_frac":yi, "pi_mean":pi, "cnt":yi, "bi":pd.cut(pi,bins=bins,labels=labels )} ) | |
data = data.groupby('bi').agg({"pi_mean":np.mean, "yi_frac": lambda x: sum(x)/len(x), "cnt": len}) | |
if plot==True: | |
plt.figure(figsize=figsize) | |
fig = plt.scatter( data.index, data.yi_frac, c="r" ) | |
plt.xlabel("Probability Bin");plt.ylabel("Fraction of Positives");plt.title("Calibration Plot") | |
if show_trend: plt.plot( list(data.index), data.yi_frac, alpha=0.7 ) | |
if show_bin_size: | |
plt.bar( data.index, data.cnt/sum(data.cnt), alpha=0.3) | |
for idx,cnt,frac in zip(data.index,data.cnt,data.cnt/sum(data.cnt)): plt.annotate(cnt,(idx,frac+0.02),rotation=90) | |
plt.xticks( data.index, rotation=45) | |
return fig | |
else: return data | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment