Created
March 23, 2016 21:10
-
-
Save Uberi/64be9b7946849fa45e50 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### e10s-beta46-noapz: Slow script analysis" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable.\n", | |
"Populating the interactive namespace from numpy and matplotlib\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/hadoop/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.\n", | |
" warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')\n" | |
] | |
} | |
], | |
"source": [ | |
"import ujson as json\n", | |
"import matplotlib.pyplot as plt\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import plotly.plotly as py\n", | |
"import plotly.graph_objs as go\n", | |
"import IPython\n", | |
"\n", | |
"from __future__ import division\n", | |
"from moztelemetry.spark import get_pings, get_one_ping_per_client, get_pings_properties\n", | |
"from montecarlino import grouped_permutation_test\n", | |
"\n", | |
"%pylab inline\n", | |
"IPython.core.pylabtools.figsize(16, 7)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"This notebook borrows heavily from these previously made notebooks:\n", | |
"\n", | |
"* https://github.com/vitillo/e10s_analyses/blob/master/aurora/e10s_experiment.ipynb\n", | |
"* https://gist.github.com/chutten/4c0c2cefdaf7837f1176" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"96" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sc.defaultParallelism" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Get data from beta 46 e10s experiment 3:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"dataset = sqlContext.read.load(\"s3://telemetry-parquet/e10s_experiment/[email protected]/v20160320\", \"parquet\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"858572" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dataset.count()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Count experiment/control samples:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"191676" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dataset.filter(dataset[\"experimentBranch\"] == \"experiment-no-addons\").count()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"207830" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dataset.filter(dataset[\"experimentBranch\"] == \"control-no-addons\").count()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def row_2_ping(row):\n", | |
" ping = {\"payload\": {\"simpleMeasurements\": json.loads(row.simpleMeasurements),\n", | |
" \"histograms\": json.loads(row.histograms),\n", | |
" \"keyedHistograms\": json.loads(row.keyedHistograms),\n", | |
" \"childPayloads\": json.loads(row.childPayloads),\n", | |
" \"threadHangStats\": json.loads(row.threadHangStats)},\n", | |
" \"e10s\": True if row.experimentBranch == \"experiment-no-addons\" else False}\n", | |
" return ping" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"subset = dataset.rdd.filter(lambda r: r.experimentBranch in [\"experiment-no-addons\", \"control-no-addons\"]).map(row_2_ping)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"frame = pd.DataFrame(get_pings_properties(subset, [\"payload/histograms/SLOW_SCRIPT_PAGE_COUNT\", \"payload/simpleMeasurements/uptime\", \"e10s\"]).collect())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"e10s 399506\n", | |
"payload/histograms/SLOW_SCRIPT_PAGE_COUNT 5644\n", | |
"payload/simpleMeasurements/uptime 399506\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"frame.count()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def median_diff(xs, ys):\n", | |
" return np.median(xs) - np.median(ys)\n", | |
"\n", | |
"def normalize_uptime_hour(frame):\n", | |
" frame = frame[frame[\"payload/simpleMeasurements/uptime\"] > 0]\n", | |
" frame = 60 * frame.apply(lambda x: x/frame[\"payload/simpleMeasurements/uptime\"]) # Metric per hour\n", | |
" frame.drop('payload/simpleMeasurements/uptime', axis=1, inplace=True)\n", | |
" return frame\n", | |
"\n", | |
"def compare_scalars(metric, scalars1, scalars2):\n", | |
" print \"Median difference in {} is {:.2f}, ({:.2f}, {:.2f}).\".format(metric, np.median(scalars1) - np.median(scalars2), np.median(scalars1), np.median(scalars2))\n", | |
" print \"The probablity of this effect being purely by chance is {:.2f}.\".format(grouped_permutation_test(median_diff, [scalars1, scalars2], num_samples=10000))\n", | |
"\n", | |
"e10s = frame[frame[\"e10s\"] == True]\n", | |
"e10s = normalize_uptime_hour(e10s)\n", | |
"\n", | |
"none10s = frame[frame[\"e10s\"] == False]\n", | |
"none10s = normalize_uptime_hour(none10s)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Median difference in payload/histograms/SLOW_SCRIPT_PAGE_COUNT per hour is 0.10, (0.45, 0.35).\n", | |
"The probablity of this effect being purely by chance is 0.00.\n" | |
] | |
} | |
], | |
"source": [ | |
"histogram = \"payload/histograms/SLOW_SCRIPT_PAGE_COUNT\"\n", | |
"notices_e10s = e10s[histogram].dropna()\n", | |
"notices_none10s = none10s[histogram].dropna()\n", | |
"compare_scalars(histogram + \" per hour\", notices_e10s, notices_none10s)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(3002, 2629)" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"notices_e10s.size, notices_none10s.size" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\"seamless=\"seamless\" src=\"https://plot.ly/~mozilla/378.embed\" height=\"525\" width=\"100%\"></iframe>" | |
], | |
"text/plain": [ | |
"<plotly.tools.PlotlyDisplay object>" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"trace1 = go.Histogram(\n", | |
" x=notices_e10s,\n", | |
" opacity=0.75,\n", | |
" name='e10s'\n", | |
")\n", | |
"trace2 = go.Histogram(\n", | |
" x=notices_none10s,\n", | |
" opacity=0.75,\n", | |
" name='non-e10s'\n", | |
")\n", | |
"data = [trace1, trace2]\n", | |
"layout = go.Layout(\n", | |
" barmode='overlay',\n", | |
" title='User Count vs. SLOW_SCRIPT_PAGE_COUNT Value',\n", | |
" xaxis=dict(title='SLOW_SCRIPT_PAGE_COUNT Value'),\n", | |
" yaxis=dict(title='User Count')\n", | |
")\n", | |
"fig = go.Figure(data=data, layout=layout)\n", | |
"py.iplot(fig, filename='overlaid-histogram')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"The plot doesn't show up on GitHub Gist, so here's a link as well: https://plot.ly/378/~mozilla/" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"percentile e10s none10s diff diff%\n", | |
" 0 0.008477 > 0.006872 0.001605 23.354055\n", | |
" 1 0.020709 > 0.018360 0.002349 12.794922\n", | |
" 2 0.028188 > 0.022058 0.006131 27.793530\n", | |
" 3 0.036898 > 0.029954 0.006944 23.181962\n", | |
" 4 0.044651 > 0.035457 0.009194 25.930293\n", | |
" 5 0.053641 > 0.040717 0.012924 31.742381\n", | |
" 6 0.066530 > 0.043165 0.023364 54.127694\n", | |
" 7 0.077362 > 0.050332 0.027030 53.702332\n", | |
" 8 0.087209 > 0.058907 0.028302 48.045572\n", | |
" 9 0.094964 > 0.066966 0.027998 41.809449\n", | |
" 10 0.101695 > 0.078493 0.023202 29.559180\n", | |
" 11 0.105820 > 0.084626 0.021194 25.044092\n", | |
" 12 0.109290 > 0.091285 0.018004 19.722877\n", | |
" 13 0.111940 > 0.098488 0.013452 13.658488\n", | |
" 14 0.114723 > 0.103434 0.011289 10.913934\n", | |
" 15 0.117188 > 0.106421 0.010767 10.117132\n", | |
" 16 0.120724 > 0.110294 0.010430 9.456740\n", | |
" 17 0.124224 > 0.112150 0.012074 10.766046\n", | |
" 18 0.127709 > 0.114294 0.013414 11.736469\n", | |
" 19 0.131004 > 0.116279 0.014725 12.663755\n", | |
" 20 0.136426 > 0.118110 0.018316 15.507144\n", | |
" 21 0.144231 > 0.120482 0.023749 19.711538\n", | |
" 22 0.148148 > 0.122951 0.025197 20.493827\n", | |
" 23 0.156250 > 0.126800 0.029450 23.225679\n", | |
" 24 0.160531 > 0.130639 0.029892 22.881036\n", | |
" 25 0.169074 > 0.134831 0.034242 25.396313\n", | |
" 26 0.177515 > 0.139302 0.038213 27.431814\n", | |
" 27 0.185549 > 0.145829 0.039719 27.236898\n", | |
" 28 0.192308 > 0.149194 0.043113 28.897329\n", | |
" 29 0.207104 > 0.155039 0.052065 33.582186\n", | |
" 30 0.215440 > 0.163043 0.052396 32.136445\n", | |
" 31 0.220994 > 0.168067 0.052927 31.491713\n", | |
" 32 0.226415 > 0.174398 0.052017 29.826373\n", | |
" 33 0.236529 > 0.183061 0.053467 29.207463\n", | |
" 34 0.245902 > 0.192308 0.053594 27.868852\n", | |
" 35 0.257511 > 0.199867 0.057644 28.840973\n", | |
" 36 0.265699 > 0.206925 0.058774 28.403270\n", | |
" 37 0.276734 > 0.213039 0.063695 29.898545\n", | |
" 38 0.287081 > 0.219780 0.067301 30.622010\n", | |
" 39 0.296137 > 0.225496 0.070641 31.326819\n", | |
" 40 0.309598 > 0.237154 0.072444 30.547164\n", | |
" 41 0.319149 > 0.243902 0.075246 30.851064\n", | |
" 42 0.335196 > 0.251046 0.084150 33.519553\n", | |
" 43 0.348837 > 0.258621 0.090217 34.883721\n", | |
" 44 0.363636 > 0.272727 0.090909 33.333333\n", | |
" 45 0.375000 > 0.284360 0.090640 31.875000\n", | |
" 46 0.386760 > 0.295517 0.091244 30.875930\n", | |
" 47 0.404442 > 0.311140 0.093302 29.987151\n", | |
" 48 0.418820 > 0.324324 0.094496 29.136230\n", | |
" 49 0.437956 > 0.337079 0.100878 29.927007\n", | |
" 50 0.447761 > 0.346821 0.100940 29.104478\n", | |
" 51 0.465116 > 0.359281 0.105835 29.457364\n", | |
" 52 0.483871 > 0.374486 0.109385 29.209367\n", | |
" 53 0.500000 > 0.387097 0.112903 29.166667\n", | |
" 54 0.526316 > 0.405405 0.120910 29.824561\n", | |
" 55 0.545455 > 0.422535 0.122919 29.090909\n", | |
" 56 0.571429 > 0.441176 0.130252 29.523810\n", | |
" 57 0.594059 > 0.457876 0.136183 29.742286\n", | |
" 58 0.618557 > 0.468750 0.149807 31.958763\n", | |
" 59 0.643777 > 0.487805 0.155972 31.974249\n", | |
" 60 0.666667 > 0.504202 0.162465 32.222222\n", | |
" 61 0.697674 > 0.530973 0.166701 31.395349\n", | |
" 62 0.730358 > 0.559078 0.171279 30.635969\n", | |
" 63 0.759494 > 0.582524 0.176969 30.379747\n", | |
" 64 0.779221 > 0.612245 0.166976 27.272727\n", | |
" 65 0.816327 > 0.645161 0.171165 26.530612\n", | |
" 66 0.857143 > 0.683688 0.173455 25.370413\n", | |
" 67 0.882353 > 0.705882 0.176471 25.000000\n", | |
" 68 0.912623 > 0.740741 0.171883 23.204156\n", | |
" 69 0.967742 > 0.785137 0.182605 23.257777\n", | |
" 70 1.032123 > 0.833333 0.198789 23.854727\n", | |
" 71 1.081081 > 0.857143 0.223938 26.126126\n", | |
" 72 1.132075 > 0.902256 0.229820 25.471698\n", | |
" 73 1.184974 > 0.941842 0.243132 25.814485\n", | |
" 74 1.224490 > 0.983607 0.240883 24.489796\n", | |
" 75 1.304348 > 1.052632 0.251716 23.913043\n", | |
" 76 1.367577 > 1.096566 0.271011 24.714549\n", | |
" 77 1.463415 > 1.166516 0.296899 25.451759\n", | |
" 78 1.538462 > 1.224490 0.313972 25.641026\n", | |
" 79 1.578947 > 1.304348 0.274600 21.052632\n", | |
" 80 1.711854 > 1.363636 0.348218 25.535968\n", | |
" 81 1.818182 > 1.428571 0.389610 27.272727\n", | |
" 82 1.935484 > 1.538462 0.397022 25.806452\n", | |
" 83 2.083898 > 1.621622 0.462276 28.507046\n", | |
" 84 2.307692 > 1.752430 0.555263 31.685302\n", | |
" 85 2.500000 > 1.893987 0.606013 31.996658\n", | |
" 86 2.727273 > 2.068966 0.658307 31.818182\n", | |
" 87 3.000000 > 2.222222 0.777778 35.000000\n", | |
" 88 3.157895 > 2.400000 0.757895 31.578947\n", | |
" 89 3.428571 > 2.608696 0.819876 31.428571\n", | |
" 90 3.750000 > 2.866359 0.883641 30.827974\n", | |
" 91 4.000000 > 3.157895 0.842105 26.666667\n", | |
" 92 4.285714 > 3.529412 0.756303 21.428571\n", | |
" 93 5.000000 > 4.000000 1.000000 25.000000\n", | |
" 94 5.800922 > 4.757053 1.043869 21.943609\n", | |
" 95 6.666667 > 5.850000 0.816667 13.960114\n", | |
" 96 8.571429 > 6.666667 1.904762 28.571429\n", | |
" 97 11.982857 > 8.571429 3.411429 39.800000\n", | |
" 98 15.000000 > 12.000000 3.000000 25.000000\n", | |
" 99 20.000000 <= 20.000000 0.000000 0.000000\n", | |
" 100 136.000000 > 60.000000 76.000000 126.666667\n" | |
] | |
} | |
], | |
"source": [ | |
"print \"percentile e10s none10s diff diff%\"\n", | |
"for i in range(101):\n", | |
" experiment, control = np.percentile(notices_e10s, i), np.percentile(notices_none10s, i)\n", | |
" print \"{:>5} {:>10f} {} {:>10f} {:>10f} {:>10f}\".format(\n", | |
" i,\n", | |
" experiment,\n", | |
" \">\" if experiment > control else \"<=\",\n", | |
" control,\n", | |
" experiment - control,\n", | |
" 100 * (experiment - control) / control\n", | |
" )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.11" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment