Created
October 11, 2016 17:19
-
-
Save mtzl/904f26090dcc636d4cf49ec6576367db to your computer and use it in GitHub Desktop.
Orca PID
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from km3pipe.io import H5Chain\n", | |
"\n", | |
"prefix = '/home/mlotze/km3net/orca-9m/uncut/'\n", | |
"n_events = {\n", | |
" prefix + 'numuon_cc.h5': 2400,\n", | |
" prefix + 'nuelec_cc.h5': 1200,\n", | |
" prefix + 'nuelec_nc.h5': 1200,\n", | |
" prefix + 'atmuon.h5': 2400,\n", | |
"}\n", | |
"files = list(n_events.keys())\n", | |
"with H5Chain(files) as c:\n", | |
" df = c(n_events)['mva']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"plt.style.use('foursigma')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = df.fillna(0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from km3pipe.io import guess_mc_feats\n", | |
"\n", | |
"mc_feats = guess_mc_feats(df)\n", | |
"reco_feats = [feat for feat in df.columns if feat not in mc_feats]\n", | |
"reco_feats\n", | |
"X = df[reco_feats]\n", | |
"mc_info = df[mc_feats]\n", | |
"del(df)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from trawler import TargetGen\n", | |
"\n", | |
"mc_type = mc_info['dusj_MCtype']\n", | |
"is_cc = np.array([True] * mc_type.shape[0])\n", | |
"tg = TargetGen(is_cc, mc_type)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Counter({'cscd': 2400, 'atmu': 2400, 'track': 2400})\n", | |
"Counter({0: 2400, 1: 2400, 2: 2400})\n", | |
"['atmu' 'cscd' 'track']\n" | |
] | |
} | |
], | |
"source": [ | |
"from collections import Counter \n", | |
"from sklearn.preprocessing import LabelEncoder\n", | |
"\n", | |
"target = tg.atmu_track_cscd\n", | |
"le = LabelEncoder()\n", | |
"y = le.fit_transform(target)\n", | |
"\n", | |
"print(Counter(target))\n", | |
"print(Counter(y))\n", | |
"print(le.classes_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'atmu': 0, 'cscd': 1, 'track': 2}" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"num2tag = {num: le.inverse_transform(num) for num in np.unique(y)}\n", | |
"tag2num = {num2tag[num]: num for num, tag in num2tag.items()}\n", | |
"tag2num" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# make energy binning\n", | |
"e_min = 0\n", | |
"e_max = 100\n", | |
"nbins = 25\n", | |
"\n", | |
"binlims = np.linspace(e_min, e_max, nbins+1)\n", | |
"log_binlims = np.logspace(0, 2, nbins+1)\n", | |
"energy = mc_info['dusj_MCen']\n", | |
"log_energy = np.log10(energy) \n", | |
"r = np.sqrt(np.power(X['tf_r_x'], 2) + np.power(X['tf_r_y'], 2))\n", | |
"e_rec_cscd = X['dusj_d_E']\n", | |
"e_rec_trak = X['tf_r_NuE']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['EventID',\n", | |
" 'dusj_EventID',\n", | |
" 'dusj_GlobalWeight',\n", | |
" 'dusj_LivetimeSec',\n", | |
" 'dusj_MCCh',\n", | |
" 'dusj_MC_phi',\n", | |
" 'dusj_MC_theta',\n", | |
" 'dusj_MC_x',\n", | |
" 'dusj_MC_y',\n", | |
" 'dusj_MC_z',\n", | |
" 'dusj_MCby',\n", | |
" 'dusj_MCen',\n", | |
" 'dusj_MCpr',\n", | |
" 'dusj_MCtype',\n", | |
" 'dusj_NEvents',\n", | |
" 'dusj_OneWeight',\n", | |
" 'tf_EventID',\n", | |
" 'tf_MCen']" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sorted(mc_info.columns)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"tm = y == tag2num['track']\n", | |
"cm = y == tag2num['cscd']\n", | |
"mm = y == tag2num['atmu']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"lab = {'y': y}\n", | |
"lab['w2'] = mc_info['dusj_OneWeight']\n", | |
"lab['n_evts'] = mc_info['dusj_NEvents']\n", | |
"lab['livetime'] = mc_info['dusj_LivetimeSec']\n", | |
"lab['target'] = target\n", | |
"lab['mc_bjorken_y'] = mc_info['dusj_MCby']\n", | |
"lab['mc_energy'] = mc_info['dusj_MCen']\n", | |
"lab['log_mc_energy'] = np.log10(mc_info['dusj_MCen'])\n", | |
"lab = pd.DataFrame.from_dict(lab)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from honda_2015 import HondaFlux\n", | |
"zenith = mc_info['dusj_MC_theta']\n", | |
"hf = HondaFlux('pkg/honda_2015/honda_2015/honda2015_frejus_solarmin.h5')\n", | |
"# TODO: currently, atmu weights are = 1\n", | |
"wgt_honda = np.ones_like(lab['mc_energy'])\n", | |
"wgt_honda[tm] = hf('nu_mu', zenith[tm], energy[tm]) * lab['w2'][tm] / lab['n_evts'][tm]\n", | |
"wgt_honda[cm] = hf('nu_e', zenith[cm], energy[cm]) * lab['w2'][cm] / lab['n_evts'][cm]\n", | |
"wgt_honda[mm] = 1 / lab['livetime'][mm]\n", | |
"lab['honda'] = wgt_honda" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": false | |
}, | |
"outputs": [], | |
"source": [ | |
"for target in ['track', 'atmu', 'cscd']:\n", | |
" plt.hist('mc_energy', weights='honda', data=lab[lab.target == target], histtype='stepfilled', alpha=.5, bins=log_binlims, label=target, )\n", | |
"plt.legend()\n", | |
"#plt.yscale('log')\n", | |
"#plt.xscale('log')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"lab['mc_energy_bin'] = pd.cut(lab['mc_energy'], binlims, labels=False)\n", | |
"lab['log_mc_energy_bin'] = pd.cut(lab['log_mc_energy'], binlims, labels=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"X_train, X_test, lab_train, lab_test, = train_test_split(X, lab, stratify=lab['y'], test_size=.33, random_state=42)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.metrics import roc_auc_score, make_scorer\n", | |
"clf_pipe = Pipeline([\n", | |
" ('remove_constant', VarianceThreshold()), \n", | |
" ('quantile_scaler', RobustScaler()),\n", | |
" #('pca', PCA(svd_solver='auto')),\n", | |
" ('rf', RandomForestClassifier(\n", | |
" n_estimators=319, \n", | |
" max_features='auto',\n", | |
" criterion='entropy',\n", | |
" class_weight='balanced', \n", | |
" n_jobs=-1,\n", | |
" verbose=1\n", | |
" )),\n", | |
"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"#skf = StratifiedKFold(n_splits=3, shuffle=True)\n", | |
"#grid = GridSearchCV(\n", | |
"# clf_pipe, \n", | |
"# param_grid={\n", | |
"# 'rf__criterion': ['entropy', 'gini'],\n", | |
"# #'rf__max_features': ['auto', None, 'log2']\n", | |
"# }, \n", | |
"# refit=True, \n", | |
"# #scoring='roc_auc', \n", | |
"# scoring=roc_auc_score(), \n", | |
"# cv=skf, verbose=1,\n", | |
"# #fit_params={'rf__sample_weight': wgt_train,},\n", | |
"#)\n", | |
"grid = clf_pipe" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"clf = grid.fit(X_train, lab_train['y'])\n", | |
"score = clf.predict_proba(X_test)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"for t in ['cscd', 'track', 'atmu']:\n", | |
" lab_test[t] = score[:, tag2num[t]]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"lab_test.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"lab_test['pred'] = lab_test[['track', 'cscd', 'atmu']].idxmax(axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"binlims\n", | |
"binwidth = (binlims[1] - binlims[0])\n", | |
"x = binlims[:-1]\n", | |
"lines = 'steps-post'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"#lab_test.groupby(pd.cut(lab_test['mc_energy'], binlims)).query()\n", | |
"\n", | |
"col = []\n", | |
"for i in range(binlims.shape[0] - 1):\n", | |
" low = binlims[i]\n", | |
" up = binlims[i+1]\n", | |
" dat = lab_test.query('%f <= mc_energy < %f' % (low, up))\n", | |
" c = {}\n", | |
" for pred in ['cscd', 'track', 'atmu']:\n", | |
" n = dat.query('pred == \"%s\"' % pred).honda.sum()\n", | |
" for true in ['cscd', 'track', 'atmu']:\n", | |
" res = dat.query('target == \"%s\" and pred == \"%s\"' % (true, pred))\n", | |
" try:\n", | |
" co = res.honda.sum()/n\n", | |
" except ZeroDivisionError:\n", | |
" co = 0\n", | |
" c[\"%s_as_%s\" % (true, pred)] = co\n", | |
" col.append(c)\n", | |
"col = pd.DataFrame(col)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"col" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"col.atmu_as_track.plot(ls='steps-post')\n", | |
"col.track_as_track.plot(ls='steps-post')\n", | |
"col.cscd_as_track.plot(ls='steps-post')\n", | |
"plt.title('Prediction: Track')\n", | |
"plt.ylim(-0.05, 1.05)\n", | |
"plt.legend(loc=0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"col.atmu_as_cscd.plot(ls='steps-post')\n", | |
"col.track_as_cscd.plot(ls='steps-post')\n", | |
"col.cscd_as_cscd.plot(ls='steps-post')\n", | |
"plt.title('Prediction: Cascade')\n", | |
"plt.ylim(-0.05, 1.05)\n", | |
"plt.legend(loc=0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(10, 3))\n", | |
"bins = (\n", | |
" np.linspace(0, 2, 11),\n", | |
" np.linspace(0, 1, 11), \n", | |
")\n", | |
"ax1.hist2d('log_mc_energy', 'track', weights='honda', data=lab_test.query('target==\"track\"'), bins=bins)\n", | |
"ax2.hist2d('log_mc_energy', 'track', weights='honda', data=lab_test.query('target==\"cscd\"'), bins=bins)\n", | |
"ax3.hist2d('log_mc_energy', 'track', weights='honda', data=lab_test.query('target==\"atmu\"'))\n", | |
"ax2.set_title('pred = track')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(10, 3))\n", | |
"bins = (\n", | |
" np.linspace(0, 2, 11),\n", | |
" np.linspace(0, 1, 11), \n", | |
")\n", | |
"ax1.hist2d('log_mc_energy', 'cscd', weights='honda', data=lab_test.query('target==\"track\"'), bins=bins)\n", | |
"ax2.hist2d('log_mc_energy', 'cscd', weights='honda', data=lab_test.query('target==\"cscd\"'), bins=bins)\n", | |
"ax3.hist2d('log_mc_energy', 'cscd', weights='honda', data=lab_test.query('target==\"atmu\"'))\n", | |
"ax2.set_title('pred = cscd')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(10, 3))\n", | |
"bins = (\n", | |
" np.linspace(0, 2, 11),\n", | |
" np.linspace(0, 1, 11), \n", | |
")\n", | |
"ax1.hist2d('log_mc_energy', 'atmu', weights='honda', data=lab_test.query('target==\"track\"'), bins=bins)\n", | |
"ax2.hist2d('log_mc_energy', 'atmu', weights='honda', data=lab_test.query('target==\"cscd\"'), bins=bins)\n", | |
"ax3.hist2d('log_mc_energy', 'atmu', weights='honda', data=lab_test.query('target==\"atmu\"'))\n", | |
"ax2.set_title('pred = atmu')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"plt.title('''Prediction: \"It's a Track\"''')\n", | |
"norm = np.sum(np.array((track_as_track, cscd_as_track)), axis=0)\n", | |
"plt.plot(x, track_as_track/norm, label=r'$\\nu_\\mu$', ls=lines)\n", | |
"plt.plot(x, cscd_as_track/norm, label=r'$\\nu_e$', ls=lines)\n", | |
"plt.ylim(-0.05, 1.05)\n", | |
"plt.ylabel('Fraction of \"Track\" population')\n", | |
"plt.legend(loc=0)\n", | |
"plt.xlabel('Energy / GeV')\n", | |
"plt.savefig('plots/pred_track_composition.svg')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"plt.title('''Prediction: \"It's a Cascade\"''')\n", | |
"norm = np.sum(np.array((track_as_cscd, cscd_as_cscd)), axis=0)\n", | |
"plt.plot(x, track_as_cscd/norm, label=r'$\\nu_\\mu$', ls=lines)\n", | |
"plt.plot(x, cscd_as_cscd/norm, label=r'$\\nu_e$', ls=lines)\n", | |
"plt.ylim(-0.05, 1.05)\n", | |
"plt.ylabel('Fraction of \"Cascade\" population')\n", | |
"plt.legend(loc=0)\n", | |
"plt.xlabel('Energy / GeV')\n", | |
"plt.savefig('plots/pred_cscd_composition.svg')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"plt.title(r'''How get $\\nu_\\mu$ labeled?''')\n", | |
"norm = np.sum(np.array((track_as_track, track_as_cscd)), axis=0)\n", | |
"plt.plot(x, track_as_track/norm, label='Tagged: Track', ls=lines)\n", | |
"plt.plot(x, track_as_cscd/norm, label='Tagged: Cascade', ls=lines)\n", | |
"plt.ylim(-0.05, 1.05)\n", | |
"plt.ylabel(r'% of $\\nu_\\mu$ ')\n", | |
"plt.legend(loc=0)\n", | |
"plt.xlabel('Energy / GeV')\n", | |
"plt.savefig('plots/nu_mu_predictions.svg')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"plt.title(r'''How get $\\nu_e$ labeled?''')\n", | |
"norm = np.sum(np.array((cscd_as_track, cscd_as_cscd)), axis=0)\n", | |
"plt.plot(x, cscd_as_track/norm, label='Tagged: Track', ls=lines)\n", | |
"plt.plot(x, cscd_as_cscd/norm, label='Tagged: Cascade', ls=lines)\n", | |
"plt.ylim(-0.05, 1.05)\n", | |
"plt.ylabel(r'% of $\\nu_e$ ')\n", | |
"plt.legend(loc=0)\n", | |
"plt.xlabel('Energy / GeV')\n", | |
"plt.savefig('plots/nu_e_and_NC_predictions.svg')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"plt.plot(x, aucs, ls=lines)\n", | |
"plt.ylim(0, 1)\n", | |
"plt.title('Overall (Cut-Independent) PID Performance') \n", | |
"plt.ylabel('Area under ROC Curve (AUC)')\n", | |
"plt.xlabel('Energy / GeV')\n", | |
"plt.savefig('plots/auc.svg')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"print('Hi!')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"plt.title(r'How do $\\nu$ get labeled?')\n", | |
"plt.hist(score[y_test == 0], bins=np.linspace(0, 1, 50+1), histtype='stepfilled', alpha=.5, label=r'$\\nu_e$')\n", | |
"plt.hist(score[y_test == 1], bins=np.linspace(0, 1, 50+1), histtype='stepfilled', alpha=.5, label=r'$\\nu_\\mu$')\n", | |
"plt.legend()\n", | |
"plt.xlabel('Confidence of \"Event is Track\"')\n", | |
"plt.yscale('log')\n", | |
"plt.savefig('plots/rf_score.svg')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"print(sorted(mc_test.columns))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"out = mc_test.copy()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame.from_dict({\n", | |
" 'zenith_cscd': X_test['dusj_d_theta'],\n", | |
" 'zenith_track': np.arccos(-1 * X_test['tf_r_dir_z']) * 180 / np.pi,\n", | |
" 'zenith_mc': mc_test['dusj_MC_theta'],\n", | |
" 'azimuth_cscd': X_test['dusj_d_phi'],\n", | |
" 'azimuth_track': np.arctan2(X_test['tf_r_dir_x'], X_test['tf_r_dir_y']) * 180 / np.pi,\n", | |
" 'azimuth_mc': mc_test['dusj_MC_phi'],\n", | |
" 'energy_cscd': X_test['dusj_d_E'],\n", | |
" 'energy_track': X_test['tf_r_NuE'],\n", | |
" 'energy_mc': mc_test['dusj_MCen'], \n", | |
" 'bjorken_y': X_test['tf_r_by'], \n", | |
" 'bjorken_y_mc': mc_test['dusj_MCby'],\n", | |
" 'proba_track': score, \n", | |
" 'w2': mc_test['dusj_OneWeight'],\n", | |
" 'n_evts': mc_test['dusj_NEvents'],\n", | |
" 'livetime_sec': mc_test['dusj_LivetimeSec'],\n", | |
" 'type_mc': mc_test['dusj_MCtype'],\n", | |
" 'true_label': target_test,\n", | |
" 'true_y': y_test,\n", | |
" 'flux_honda2015': honda_test,\n", | |
" })" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"#h5 = h5py.File('df_test.h5', 'a')\n", | |
"#h5.create_dataset('pid_out', data=df.to_records())\n", | |
"#h5.close()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"e_max = 10" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"cut = 'energy_mc < %d and true_label == \"cscd\"' % e_max\n", | |
"plt.hexbin('proba_track', 'bjorken_y_mc', data=df.query(cut), gridsize=10, extent=(0, 1, 0, 1))\n", | |
"plt.ylabel('bjorken_y_mc')\n", | |
"plt.xlabel('confidence \"Event is Track\"')\n", | |
"plt.title(cut)\n", | |
"plt.colorbar()\n", | |
"plt.savefig('plots/pid_vs_mc_bjorken-y_cscd_%d.png' % e_max)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"cut = 'energy_mc < %d and true_label == \"track\"' % e_max\n", | |
"plt.hexbin('proba_track', 'bjorken_y_mc', data=df.query(cut), gridsize=10, extent=(0, 1, 0, 1))\n", | |
"plt.ylabel('bjorken_y_mc')\n", | |
"plt.xlabel('confidence \"Event is Track\"')\n", | |
"plt.title(cut)\n", | |
"plt.colorbar()\n", | |
"plt.savefig('plots/pid_vs_mc_bjorken-y_track_%d.png' % e_max)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
" #variances = clf.best_estimator_.steps[0][1].variances_\n", | |
"#rf = clf.best_estimator_.steps[-1][1]\n", | |
"#imp = pd.DataFrame.from_dict({\n", | |
"# 'features': X_test.columns[variances != 0],\n", | |
"# 'importance': rf.feature_importances_\n", | |
"#})\n", | |
"#imp.sort_values(by='importance')[::-1].head(5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"fig, (ax_cscd, ax_track) = plt.subplots(ncols=2, figsize=(10, 4))\n", | |
"from matplotlib.colors import LogNorm\n", | |
"lims = (0, 1, 0, 2)\n", | |
"p1 = ax_cscd.hexbin(score[y_test == 0], np.log10(mc_test['dusj_MCen'][y_test == 0]), gridsize=25, norm=LogNorm(), extent=lims)\n", | |
"p2 = ax_track.hexbin(score[y_test == 1], np.log10(mc_test['dusj_MCen'][y_test == 1]), gridsize=25, norm=LogNorm(), extent=lims)\n", | |
"fig.suptitle('Unweighted Input')\n", | |
"plt.colorbar(p1, ax=ax_cscd)\n", | |
"plt.colorbar(p2, ax=ax_track)\n", | |
"ax_cscd.set_title('true cscd')\n", | |
"ax_track.set_title('true track')\n", | |
"ax_cscd.set_ylabel('log10(Energy)')\n", | |
"ax_cscd.set_xlabel('Probability(Track)')\n", | |
"plt.tight_layout()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"fig, (ax_cscd, ax_track) = plt.subplots(ncols=2, figsize=(10, 4))\n", | |
"from matplotlib.colors import LogNorm\n", | |
"lims = (0, 1, 0, 2)\n", | |
"p1 = ax_cscd.hexbin(score[y_test == 0], np.log10(mc_test['dusj_MCen'][y_test == 0]), gridsize=25, norm=LogNorm(), extent=lims)\n", | |
"p2 = ax_track.hexbin(score[y_test == 1], np.log10(mc_test['dusj_MCen'][y_test == 1]), gridsize=25, norm=LogNorm(), extent=lims)\n", | |
"fig.suptitle('Unweighted Input')\n", | |
"plt.colorbar(p1, ax=ax_cscd)\n", | |
"plt.colorbar(p2, ax=ax_track)\n", | |
"ax_cscd.set_title('true cscd')\n", | |
"ax_track.set_title('true track')\n", | |
"ax_cscd.set_ylabel('log10(Energy)')\n", | |
"ax_cscd.set_xlabel('Probability(Track)')\n", | |
"plt.tight_layout()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(8, 3))\n", | |
"p1 = ax1.hexbin(pred_atmu, np.log10(X['tf_r_NuE'][Y_abs == 13]), gridsize=25, norm=LogNorm()\n", | |
" )\n", | |
" #, extent=lims\n", | |
"p2 = ax2.hexbin(pred_atmu, np.log10(X['dusj_d_E'][Y_abs == 13]), gridsize=25, norm=LogNorm()\n", | |
" #, extent=lims\n", | |
" )\n", | |
"plt.colorbar(p1, ax=ax1)\n", | |
"plt.colorbar(p2, ax=ax2)\n", | |
"ax1.set_ylabel('log10(NuE)')\n", | |
"ax2.set_ylabel('log10(d_E)')\n", | |
"ax1.set_xlabel('Probability(Track)')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"fig, (ax_cscd, ax_trak) = plt.subplots(ncols=2, figsize=(8, 3))\n", | |
"ax_cscd.hist(energy[y == 1], bins=lin_bins, histtype='stepfilled', alpha=.5, label='mc')\n", | |
"ax_cscd.hist(e_rec_cscd[y == 1], bins=lin_bins, histtype='stepfilled', alpha=.5, label='shower reco')\n", | |
"ax_trak.hist(energy[y == 2], bins=lin_bins, histtype='stepfilled', alpha=.5, label='mc')\n", | |
"ax_trak.hist(e_rec_trak[y == 2], bins=lin_bins, histtype='stepfilled', alpha=.5, label='track reco')\n", | |
"ax_cscd.set_title('cscd')\n", | |
"ax_trak.set_title('nu_mu cc')\n", | |
"for ax in ax_trak, ax_cscd:\n", | |
" #ax.set_xscale('log')\n", | |
" ax.set_xlabel('Energy / GeV')\n", | |
" ax.legend()\n", | |
" #ax.set_yscale('log')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(8, 3))\n", | |
"lims = (0, 100, 0, 100)\n", | |
"p1 = ax1.hexbin(energy[Y_abs == 12], e_rec_cscd[Y_abs == 12], gridsize=25, norm=LogNorm(), extent=lims)\n", | |
"p2 = ax2.hexbin(energy[Y_abs == 12], e_rec_cscd[Y_abs == 12], gridsize=25, extent=lims)\n", | |
"plt.colorbar(p1, ax=ax1)\n", | |
"plt.colorbar(p2, ax=ax2)\n", | |
"for ax in ax1, ax2:\n", | |
" ax.set_xlabel('e_mc')\n", | |
" ax.set_ylabel('e_reco')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(8, 3))\n", | |
"p1 = ax1.hexbin(energy[Y_abs == 14], e_rec_trak[Y_abs == 14], gridsize=25, norm=LogNorm())\n", | |
"p2 = ax2.hexbin(energy[Y_abs == 14], e_rec_trak[Y_abs == 14], gridsize=25,)\n", | |
"plt.colorbar(p1, ax=ax1)\n", | |
"plt.colorbar(p2, ax=ax2)\n", | |
"for ax in ax1, ax2:\n", | |
" ax.set_xlabel('e_mc')\n", | |
" ax.set_ylabel('e_reco')" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment