Skip to content

Instantly share code, notes, and snippets.

@lukedeo
Created April 9, 2014 05:18
Show Gist options
  • Save lukedeo/10228233 to your computer and use it in GitHub Desktop.
Save lukedeo/10228233 to your computer and use it in GitHub Desktop.
Performance Plotting for Top Tagging
import numpy as np
import math
import matplotlib.pyplot as plt
import matplotlib.tri as tri
from matplotlib.mlab import griddata
from matplotlib import rc
import numpy.ma as ma
def data_load(filename):
data = np.genfromtxt(filename, delimiter = ',', names = True)
return data
def pt_plot(data, pt_label = 'fjet_pt_flat', weighted = None, log = False, bins = 100, logcount = False):
fig = plt.figure(figsize=(11.69, 8.27), dpi=100)
ax = plt.subplot(1,1,1)
if log:
ax.set_xscale('log')
pt = (data[pt_label]) / 1000
bins = np.linspace(np.min(pt), np.max(pt), bins)
if log:
plt.xlabel(r"$\log(p_T)$ in GeV", fontsize=20)
else:
plt.xlabel(r"$p_T$ in GeV", fontsize=20)
plt.ylabel(r"Count", fontsize=20)
plt.title(r"Distribution of Jet $p_T$")
plt.xlim(np.min(pt), np.max(pt))
if weighted is None:
plt.hist(pt[data['top'] == 1], histtype='step', log = logcount, bins = bins, color = 'red', label = r"$Z'$ jets, $m_{Z'} = 1.75 \mathrm{TeV}$")
plt.hist(pt[data['top'] == 0], histtype='step', log = logcount, bins = bins, color = 'blue', label = "JZ4W Jets")
else:
plt.hist(pt[data['top'] == 1], histtype='step', log = logcount, bins = bins, color = 'red', label = r"$Z'$ jets, $m_{Z'} = \mathsf{1.75 TeV}$", weights=weighted[data['top'] == 1])
plt.hist(pt[data['top'] == 0], histtype='step', log = logcount, bins = bins, color = 'blue', label = "JZ4W Jets", weights=weighted[data['top'] == 0])
plt.legend(loc = 1)
plt.show()
def sb_plot(data, label = 'fjet_pt_flat', weighted = None, log = False, bins = 100, logcount = False):
fig = plt.figure(figsize=(11.69, 8.27), dpi=100)
ax = plt.subplot(1,1,1)
pt = data[label]
if log:
ax.set_xscale('log')
bins = np.linspace(np.min(pt), np.max(pt), bins)
if log:
plt.xlabel(r"$\log(p_T)$ in GeV", fontsize=20)
else:
plt.xlabel(r"" + label, fontsize=20)
plt.ylabel(r"Count", fontsize=20)
plt.title(r"Distribution of Jet " + label)
plt.xlim(np.min(pt), np.max(pt))
if weighted is None:
plt.hist(pt[data['top'] == 1], histtype='step', log = logcount, bins = bins, color = 'red', label = r"$Z'$ jets, $m_{Z'} = 1.75 \mathrm{TeV}$")
plt.hist(pt[data['top'] == 0], histtype='step', log = logcount, bins = bins, color = 'blue', label = "JZ4W Jets")
else:
plt.hist(pt[data['top'] == 1], histtype='step', log = logcount, bins = bins, color = 'red', label = r"$Z'$ jets, $m_{Z'} = \mathsf{1.75 TeV}$", weights=weighted[data['top'] == 1])
plt.hist(pt[data['top'] == 0], histtype='step', log = logcount, bins = bins, color = 'blue', label = "JZ4W Jets", weights=weighted[data['top'] == 0])
plt.legend(loc = 1)
plt.show()
def pre_process(data):
data = data[data['fjet_pt_flat']/1000 < 1100]
data = data[data['fjet_pt_flat']/1000 > 550]
data = data[np.abs(data['fjet_eta_flat']) < 1.2 ]
data = data[data['fjet_Tau2_flat'] > -10]
data = data[data['fjet_Tau3_flat'] > -10]
return data
def general_roc(data, discriminant, bins = 2000):
top = data[:]['top']
qcd_total = np.sum(top == 0)
top_total = np.sum(top == 1)
bincount = bins
top_ind = data[:]['top'] == 1
qcd_ind = data[:]['top'] == 0
discriminant_bins = np.linspace(np.min(discriminant), np.max(discriminant), bins)
sig, b1 = np.histogram(discriminant[top_ind], discriminant_bins)
bkd, b1 = np.histogram(discriminant[qcd_ind], discriminant_bins)
t_efficiency = np.add.accumulate(sig[::-1]) / float(top_total)
qcd_rejection = 1 / (np.add.accumulate(bkd[::-1]) / float(qcd_total))
return t_efficiency, qcd_rejection
def general_roc_weighted(data, discriminant, weights, bins = 2000):
top = data[:]['top']
# qcd_total = np.sum(top == 0)
# top_total = np.sum(top == 1)
bincount = bins
top_ind = data[:]['top'] == 1
qcd_ind = data[:]['top'] == 0
qcd_total = np.sum(weights[qcd_ind])
top_total = np.sum(weights[top_ind])
discriminant_bins = np.linspace(np.min(discriminant), np.max(discriminant), bins)
sig, b1 = np.histogram(discriminant[top_ind], discriminant_bins, weights = weights[top_ind])
bkd, b1 = np.histogram(discriminant[qcd_ind], discriminant_bins, weights = weights[qcd_ind])
t_efficiency = np.add.accumulate(sig[::-1]) / float(top_total)
qcd_rejection = 1 / (np.add.accumulate(bkd[::-1]) / float(qcd_total))
return t_efficiency, qcd_rejection
def tagger_VI_roc(data, bins = 2000):
top = data[:]['top']
qcd_total = np.sum(top == 0)
top_total = np.sum(top == 1)
discriminant = data['fjet_Tau3_flat'] / data['fjet_Tau2_flat']
bincount = bins
top_ind = data[:]['top'] == 1
qcd_ind = data[:]['top'] == 0
d12_cut = ((data['fjet_SPLIT12_flat'] > 40000))
t21_cut = (data['fjet_Tau2_flat'] / data['fjet_Tau1_flat'] > 0.4) & (data['fjet_Tau2_flat'] / data['fjet_Tau1_flat'] < 0.9)
discriminant_bins = np.linspace(np.min(discriminant[d12_cut & t21_cut]), np.max(discriminant[d12_cut & t21_cut]), bins)
sig, b1 = np.histogram(discriminant[top_ind & d12_cut & t21_cut], discriminant_bins)
bkd, b1 = np.histogram(discriminant[qcd_ind & d12_cut & t21_cut], discriminant_bins)
t_efficiency = np.add.accumulate(sig) / float(top_total)
qcd_rejection = 1 / (np.add.accumulate(bkd) / float(qcd_total))
return t_efficiency, qcd_rejection
def tagger_VI_roc_weighted(data, weights, bins = 2000):
top = data[:]['top']
# qcd_total = np.sum(top == 0)
# top_total = np.sum(top == 1)
discriminant = data['fjet_Tau3_flat'] / data['fjet_Tau2_flat']
bincount = bins
top_ind = data[:]['top'] == 1
qcd_ind = data[:]['top'] == 0
qcd_total = np.sum(weights[qcd_ind])
top_total = np.sum(weights[top_ind])
d12_cut = ((data['fjet_SPLIT12_flat'] > 40000))
t21_cut = (data['fjet_Tau2_flat'] / data['fjet_Tau1_flat'] > 0.4) & (data['fjet_Tau2_flat'] / data['fjet_Tau1_flat'] < 0.9)
discriminant_bins = np.linspace(np.min(discriminant[d12_cut & t21_cut]), np.max(discriminant[d12_cut & t21_cut]), bins)
sig, b1 = np.histogram(discriminant[top_ind & d12_cut & t21_cut], discriminant_bins, weights = weights[top_ind & d12_cut & t21_cut])
bkd, b1 = np.histogram(discriminant[qcd_ind & d12_cut & t21_cut], discriminant_bins, weights = weights[qcd_ind & d12_cut & t21_cut])
t_efficiency = np.add.accumulate(sig) / float(top_total)
qcd_rejection = 1 / (np.add.accumulate(bkd) / float(qcd_total))
return t_efficiency, qcd_rejection
def ROC_plotter(taggerdict, min_eff = 0, max_eff = 1, linewidth = 1.4, pp = False, additional = ''):
fig = plt.figure(figsize=(11.69, 8.27), dpi=100)
ax = fig.add_subplot(111)
plt.xlim(min_eff,max_eff)
plt.grid(b = True, which = 'minor')
plt.grid(b = True, which = 'major')
max_ = 0
for tagger, data in taggerdict.iteritems():
sel = data['efficiency'] >= .7 * min_eff
if np.max(data['rejection']) > max_:
max_ = np.max(data['rejection'])
plt.plot(data['efficiency'], data['rejection'], '-', label = r''+tagger, color = data['color'], linewidth=linewidth)
ax = plt.subplot(1,1,1)
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + ax.get_xticklabels() + ax.get_yticklabels()):
item.set_fontsize(20)
plt.ylim(1,10 ** 3)
ax.set_yscale('log')
ax.set_xlabel(r"$\epsilon_{t}$, Top efficiency ($Z', m_{Z'} = 1.75$ TeV)")
ax.set_ylabel(r"QCD (JZ4W) rejection")
plt.legend()
plt.title(r"Top Tagging Efficiency vs. Rejection, $\vert\eta\vert < 1.2, m_{Z'} = 1.75$ TeV " + additional)
if pp:
pp.savefig(fig)
else:
plt.show()
def add_tagger(name, color, tagger_pair, dictref):
dictref.update({name : {'efficiency' : tagger_pair[0], 'rejection' : tagger_pair[1], 'color' : color}})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment