Last active
August 29, 2015 14:23
-
-
Save wiso/a1f8f1bdd4bd7def48fd to your computer and use it in GitHub Desktop.
photon jet sample statistics
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import ROOT\n", | |
"import rootnotes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"ENTRIES_MC_1000_2000_TOTAL = 102922\n", | |
"ENTRIES_MC_2000_4000_TOTAL = 1376" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Use as a minimal bin size 250 GeV, similarly to previous analysis" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"binsize = 250 # GeV\n", | |
"lefts = np.arange(2000, 7000, 250)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" M range events (1000-2000) events (2000-4000) fraction (1000-2000) fraction (2000-4000) \n", | |
"====================================================================================================\n", | |
"[ 2000 - 2250] 27206.000 4.000 26.434% 0.291%\n", | |
"[ 2250 - 2500] 21828.000 2.000 21.208% 0.145%\n", | |
"[ 2500 - 2750] 14010.000 8.000 13.612% 0.581%\n", | |
"[ 2750 - 3000] 8427.000 14.000 8.188% 1.017%\n", | |
"[ 3000 - 3250] 4923.000 37.000 4.783% 2.689%\n", | |
"[ 3250 - 3500] 3140.000 44.000 3.051% 3.198%\n", | |
"[ 3500 - 3750] 1893.000 66.000 1.839% 4.797%\n", | |
"[ 3750 - 4000] 1074.000 130.000 1.044% 9.448%\n", | |
"[ 4000 - 4250] 564.000 267.000 0.548% 19.404%\n", | |
"[ 4250 - 4500] 305.000 242.000 0.296% 17.587%\n", | |
"[ 4500 - 4750] 202.000 185.000 0.196% 13.445%\n", | |
"[ 4750 - 5000] 104.000 118.000 0.101% 8.576%\n", | |
"[ 5000 - 5250] 55.000 89.000 0.053% 6.468%\n", | |
"[ 5250 - 5500] 37.000 61.000 0.036% 4.433%\n", | |
"[ 5500 - 5750] 19.000 32.000 0.018% 2.326%\n", | |
"[ 5750 - 6000] 12.000 17.000 0.012% 1.235%\n", | |
"[ 6000 - 6250] 6.000 14.000 0.006% 1.017%\n", | |
"[ 6250 - 6500] 0.000 11.000 0.000% 0.799%\n", | |
"[ 6500 - 6750] 0.000 6.000 0.000% 0.436%\n", | |
"[ 6750 - 7000] 1.000 4.000 0.001% 0.291%\n" | |
] | |
} | |
], | |
"source": [ | |
"print \"{:^18s} {:^22s} {:^22s} {:^22s} {:^22s}\".format('M range', 'events (1000-2000)', 'events (2000-4000)', 'fraction (1000-2000)', 'fraction (2000-4000)')\n", | |
"print \"=\" * 100\n", | |
"events_1000_2000 = [27206, 21828, 14010, 8427, 4923, 3140, 1893, 1074, 564, 305, 202, 104, 55, 37, 19, 12, 6, 0, 0, 1]\n", | |
"events_2000_4000 = [4, 2, 8, 14, 37, 44, 66, 130, 267, 242, 185, 118, 89, 61, 32, 17, 14, 11, 6, 4]\n", | |
"for l, e1, e2 in zip(lefts, events_1000_2000, events_2000_4000):\n", | |
" fraction_events_1000_2000 = float(e1) / ENTRIES_MC_1000_2000_TOTAL\n", | |
" fraction_events_2000_4000 = float(e2) / ENTRIES_MC_2000_4000_TOTAL\n", | |
" print \"[{:5.0f} - {:5.0f}] {:20.3f} {:20.3f} {:20.3%} {:20.3%}\".format(l, l + binsize, e1, e2, fraction_events_1000_2000, fraction_events_2000_4000)\n", | |
"events_1000_2000 = np.array(events_1000_2000, dtype=float)\n", | |
"events_2000_4000 = np.array(events_2000_4000, dtype=float)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Minimum events needed to have small statistical error" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" statistical error minimum events\n", | |
"================================================================================\n", | |
" 30% 11.11\n", | |
" 10% 100.00\n", | |
" 5% 400.00\n" | |
] | |
} | |
], | |
"source": [ | |
"statistical_errors = np.array([30., 10., 5.]) / 100.\n", | |
"minimum_events = 1. / statistical_errors ** 2\n", | |
"print \" statistical error minimum events\"\n", | |
"print \"=\" * 80\n", | |
"for s, m in zip(statistical_errors, minimum_events):\n", | |
" print \"{:10.0%} {:5.2f}\".format(s, m)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Ratio wrt present simulation" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"How many events do we need in a bin to get a target statistical error wrt the present simulation? How much do we need to increase the statistics?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/usr/lib/python2.7/site-packages/IPython/kernel/__main__.py:1: RuntimeWarning: divide by zero encountered in divide\n", | |
" if __name__ == '__main__':\n" | |
] | |
} | |
], | |
"source": [ | |
"ratios_1000_2000 = minimum_events / events_1000_2000[:, np.newaxis]\n", | |
"ratios_2000_4000 = minimum_events / events_2000_4000[:, np.newaxis]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Sample 1000-2000" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" 30.00% 10.00% 5.00%\n", | |
"================================================================================\n", | |
"[ 2000 - 2250] 0.00 0.00 0.01\n", | |
"[ 2250 - 2500] 0.00 0.00 0.02\n", | |
"[ 2500 - 2750] 0.00 0.01 0.03\n", | |
"[ 2750 - 3000] 0.00 0.01 0.05\n", | |
"[ 3000 - 3250] 0.00 0.02 0.08\n", | |
"[ 3250 - 3500] 0.00 0.03 0.13\n", | |
"[ 3500 - 3750] 0.01 0.05 0.21\n", | |
"[ 3750 - 4000] 0.01 0.09 0.37\n", | |
"[ 4000 - 4250] 0.02 0.18 0.71\n", | |
"[ 4250 - 4500] 0.04 0.33 1.31\n", | |
"[ 4500 - 4750] 0.06 0.50 1.98\n", | |
"[ 4750 - 5000] 0.11 0.96 3.85\n", | |
"[ 5000 - 5250] 0.20 1.82 7.27\n", | |
"[ 5250 - 5500] 0.30 2.70 10.81\n", | |
"[ 5500 - 5750] 0.58 5.26 21.05\n", | |
"[ 5750 - 6000] 0.93 8.33 33.33\n", | |
"[ 6000 - 6250] 1.85 16.67 66.67\n", | |
"[ 6250 - 6500] inf inf inf\n", | |
"[ 6500 - 6750] inf inf inf\n", | |
"[ 6750 - 7000] 11.11 100.00 400.00\n" | |
] | |
} | |
], | |
"source": [ | |
"print \" \" + \" \".join([\"{:>10.2%}\".format(_) for _ in statistical_errors])\n", | |
"print \"=\" * 80\n", | |
"for l, row in zip(lefts, ratios_1000_2000):\n", | |
" print \"[{:5.0f} - {:5.0f}]\".format(l, l + binsize) + \" \".join([\"{:>10.2f}\".format(_) for _ in row])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Sample 2000-4000" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" 30.00% 10.00% 5.00%\n", | |
"================================================================================\n", | |
"[ 2000 - 2250] 2.78 25.00 100.00\n", | |
"[ 2250 - 2500] 5.56 50.00 200.00\n", | |
"[ 2500 - 2750] 1.39 12.50 50.00\n", | |
"[ 2750 - 3000] 0.79 7.14 28.57\n", | |
"[ 3000 - 3250] 0.30 2.70 10.81\n", | |
"[ 3250 - 3500] 0.25 2.27 9.09\n", | |
"[ 3500 - 3750] 0.17 1.52 6.06\n", | |
"[ 3750 - 4000] 0.09 0.77 3.08\n", | |
"[ 4000 - 4250] 0.04 0.37 1.50\n", | |
"[ 4250 - 4500] 0.05 0.41 1.65\n", | |
"[ 4500 - 4750] 0.06 0.54 2.16\n", | |
"[ 4750 - 5000] 0.09 0.85 3.39\n", | |
"[ 5000 - 5250] 0.12 1.12 4.49\n", | |
"[ 5250 - 5500] 0.18 1.64 6.56\n", | |
"[ 5500 - 5750] 0.35 3.12 12.50\n", | |
"[ 5750 - 6000] 0.65 5.88 23.53\n", | |
"[ 6000 - 6250] 0.79 7.14 28.57\n", | |
"[ 6250 - 6500] 1.01 9.09 36.36\n", | |
"[ 6500 - 6750] 1.85 16.67 66.67\n", | |
"[ 6750 - 7000] 2.78 25.00 100.00\n" | |
] | |
} | |
], | |
"source": [ | |
"print \" \" + \" \".join([\"{:>10.2%}\".format(_) for _ in statistical_errors])\n", | |
"print \"=\" * 80\n", | |
"for l, row in zip(lefts, ratios_2000_4000):\n", | |
" print \"[{:5.0f} - {:5.0f}]\".format(l, l + binsize) + \" \".join([\"{:>10.2f}\".format(_) for _ in row])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Number of events we want (after selection)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Sample 1000-2000" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" 30.00% 10.00% 5.00%\n", | |
"================================================================================\n", | |
"[ 2000 - 2250] 42 378 1513\n", | |
"[ 2250 - 2500] 52 472 1886\n", | |
"[ 2500 - 2750] 82 735 2939\n", | |
"[ 2750 - 3000] 136 1221 4885\n", | |
"[ 3000 - 3250] 232 2091 8363\n", | |
"[ 3250 - 3500] 364 3278 13111\n", | |
"[ 3500 - 3750] 604 5437 21748\n", | |
"[ 3750 - 4000] 1065 9583 38332\n", | |
"[ 4000 - 4250] 2028 18249 72994\n", | |
"[ 4250 - 4500] 3749 33745 134980\n", | |
"[ 4500 - 4750] 5661 50951 203806\n", | |
"[ 4750 - 5000] 10996 98963 395854\n", | |
"[ 5000 - 5250] 20792 187131 748524\n", | |
"[ 5250 - 5500] 30908 278168 1112670\n", | |
"[ 5500 - 5750] 60188 541695 2166779\n", | |
"[ 5750 - 6000] 95298 857683 3430733\n", | |
"[ 6000 - 6250] 190596 1715367 6861467\n", | |
"[ 6250 - 6500] inf inf inf\n", | |
"[ 6500 - 6750] inf inf inf\n", | |
"[ 6750 - 7000] 1143578 10292200 41168800\n" | |
] | |
} | |
], | |
"source": [ | |
"events_desidered = ratios_1000_2000 * ENTRIES_MC_1000_2000_TOTAL\n", | |
"print \" \" + \" \".join([\"{:>10.2%}\".format(_) for _ in statistical_errors])\n", | |
"print \"=\" * 80\n", | |
"for l, row in zip(lefts, events_desidered):\n", | |
" print \"[{:5.0f} - {:5.0f}]\".format(l, l + binsize) + \" \".join([\"{:>10.0f}\".format(_) for _ in row])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Sample 2000-4000" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" 30.00% 10.00% 5.00%\n", | |
"================================================================================\n", | |
"[ 2000 - 2250] 3822 34400 137600\n", | |
"[ 2250 - 2500] 7644 68800 275200\n", | |
"[ 2500 - 2750] 1911 17200 68800\n", | |
"[ 2750 - 3000] 1092 9829 39314\n", | |
"[ 3000 - 3250] 413 3719 14876\n", | |
"[ 3250 - 3500] 347 3127 12509\n", | |
"[ 3500 - 3750] 232 2085 8339\n", | |
"[ 3750 - 4000] 118 1058 4234\n", | |
"[ 4000 - 4250] 57 515 2061\n", | |
"[ 4250 - 4500] 63 569 2274\n", | |
"[ 4500 - 4750] 83 744 2975\n", | |
"[ 4750 - 5000] 130 1166 4664\n", | |
"[ 5000 - 5250] 172 1546 6184\n", | |
"[ 5250 - 5500] 251 2256 9023\n", | |
"[ 5500 - 5750] 478 4300 17200\n", | |
"[ 5750 - 6000] 899 8094 32376\n", | |
"[ 6000 - 6250] 1092 9829 39314\n", | |
"[ 6250 - 6500] 1390 12509 50036\n", | |
"[ 6500 - 6750] 2548 22933 91733\n", | |
"[ 6750 - 7000] 3822 34400 137600\n" | |
] | |
} | |
], | |
"source": [ | |
"events_desidered = ratios_2000_4000 * ENTRIES_MC_2000_4000_TOTAL\n", | |
"print \" \" + \" \".join([\"{:>10.2%}\".format(_) for _ in statistical_errors])\n", | |
"print \"=\" * 80\n", | |
"for l, row in zip(lefts, events_desidered):\n", | |
" print \"[{:5.0f} - {:5.0f}]\".format(l, l + binsize) + \" \".join([\"{:>10.0f}\".format(_) for _ in row])" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment