Skip to content

Instantly share code, notes, and snippets.

@adityajn105
Last active July 2, 2020 12:13
Show Gist options
  • Save adityajn105/19ab9cbee5d6b4b23c78cf517699d56c to your computer and use it in GitHub Desktop.
Save adityajn105/19ab9cbee5d6b4b23c78cf517699d56c to your computer and use it in GitHub Desktop.
# Decile chart, LIFT, KS
def getDeciles(models, X, y, n=10):
probs = predict_probas(gbms,X)[:,1]
cuts = pd.qcut(probs,n)
df = pd.DataFrame({'DECILE':cuts,"P":probs,'Y':y}).groupby('DECILE').Y.value_counts().unstack(fill_value=0)[::-1]
df.index = [ f"{i+1} {df.index[i]}" for i in range(n) ]
df.columns = ['ZEROS','ONES']
df['POPULATION'] = df.ZEROS+df.ONES
df['EVENT_RATE'] = df.ONES/df.POPULATION
df['CUMULATIVE_EVENT_RATE'] = df.ONES.cumsum()/df.POPULATION.cumsum()
df['EVENTS_CAPTURED'] = df.ONES.cumsum() / df.ONES.sum()
df['NON_EVENT_CAPTURED'] = df.ZEROS.cumsum() / df.ZEROS.sum()
df['LIFT'] = df.EVENTS_CAPTURED*df.POPULATION.sum() / df.POPULATION.cumsum()
df['KS'] = df.EVENTS_CAPTURED - df.NON_EVENT_CAPTURED
return df
def calibration_plot(yi, pi, bins:int=10, plot=True, show_trend=True, show_bin_size = False, figsize=(8,5) ):
"""
yi : Actual Targets
pi : predicted probabilities
bins : Number of bins (Default 10)
plot : Give a scatter plot
else return fraction of positives, mean of probabilities, sizes of bin
show_trend : show trend using lines (Default True)
show_bin_size : Show overlayed figure with size of bins (Default False)
figsize : size of figure (Default (8,5))
"""
bins = np.linspace(0,1,bins+1)
labels = [ f"{s:.2f}-{e:.2f}".replace("0.",".") for s,e in zip(bins[:-1],bins[1:]) ]
data = pd.DataFrame( {"yi_frac":yi, "pi_mean":pi, "cnt":yi, "bi":pd.cut(pi,bins=bins,labels=labels )} )
data = data.groupby('bi').agg({"pi_mean":np.mean, "yi_frac": lambda x: sum(x)/len(x), "cnt": len})
if plot==True:
plt.figure(figsize=figsize)
fig = plt.scatter( data.index, data.yi_frac, c="r" )
plt.xlabel("Probability Bin");plt.ylabel("Fraction of Positives");plt.title("Calibration Plot")
if show_trend: plt.plot( list(data.index), data.yi_frac, alpha=0.7 )
if show_bin_size:
plt.bar( data.index, data.cnt/sum(data.cnt), alpha=0.3)
for idx,cnt,frac in zip(data.index,data.cnt,data.cnt/sum(data.cnt)): plt.annotate(cnt,(idx,frac+0.02),rotation=90)
plt.xticks( data.index, rotation=45)
return fig
else: return data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment