Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save Uberi/64be9b7946849fa45e50 to your computer and use it in GitHub Desktop.
Save Uberi/64be9b7946849fa45e50 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### e10s-beta46-noapz: Slow script analysis"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable.\n",
"Populating the interactive namespace from numpy and matplotlib\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hadoop/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.\n",
" warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')\n"
]
}
],
"source": [
"import ujson as json\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import numpy as np\n",
"import plotly.plotly as py\n",
"import plotly.graph_objs as go\n",
"import IPython\n",
"\n",
"from __future__ import division\n",
"from moztelemetry.spark import get_pings, get_one_ping_per_client, get_pings_properties\n",
"from montecarlino import grouped_permutation_test\n",
"\n",
"%pylab inline\n",
"IPython.core.pylabtools.figsize(16, 7)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebook borrows heavily from these previously made notebooks:\n",
"\n",
"* https://github.com/vitillo/e10s_analyses/blob/master/aurora/e10s_experiment.ipynb\n",
"* https://gist.github.com/chutten/4c0c2cefdaf7837f1176"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"96"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sc.defaultParallelism"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get data from beta 46 e10s experiment 3:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"dataset = sqlContext.read.load(\"s3://telemetry-parquet/e10s_experiment/[email protected]/v20160320\", \"parquet\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"858572"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Count experiment/control samples:"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"191676"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset.filter(dataset[\"experimentBranch\"] == \"experiment-no-addons\").count()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"207830"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset.filter(dataset[\"experimentBranch\"] == \"control-no-addons\").count()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def row_2_ping(row):\n",
" ping = {\"payload\": {\"simpleMeasurements\": json.loads(row.simpleMeasurements),\n",
" \"histograms\": json.loads(row.histograms),\n",
" \"keyedHistograms\": json.loads(row.keyedHistograms),\n",
" \"childPayloads\": json.loads(row.childPayloads),\n",
" \"threadHangStats\": json.loads(row.threadHangStats)},\n",
" \"e10s\": True if row.experimentBranch == \"experiment-no-addons\" else False}\n",
" return ping"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"subset = dataset.rdd.filter(lambda r: r.experimentBranch in [\"experiment-no-addons\", \"control-no-addons\"]).map(row_2_ping)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"frame = pd.DataFrame(get_pings_properties(subset, [\"payload/histograms/SLOW_SCRIPT_PAGE_COUNT\", \"payload/simpleMeasurements/uptime\", \"e10s\"]).collect())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"e10s 399506\n",
"payload/histograms/SLOW_SCRIPT_PAGE_COUNT 5644\n",
"payload/simpleMeasurements/uptime 399506\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame.count()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def median_diff(xs, ys):\n",
" return np.median(xs) - np.median(ys)\n",
"\n",
"def normalize_uptime_hour(frame):\n",
" frame = frame[frame[\"payload/simpleMeasurements/uptime\"] > 0]\n",
" frame = 60 * frame.apply(lambda x: x/frame[\"payload/simpleMeasurements/uptime\"]) # Metric per hour\n",
" frame.drop('payload/simpleMeasurements/uptime', axis=1, inplace=True)\n",
" return frame\n",
"\n",
"def compare_scalars(metric, scalars1, scalars2):\n",
" print \"Median difference in {} is {:.2f}, ({:.2f}, {:.2f}).\".format(metric, np.median(scalars1) - np.median(scalars2), np.median(scalars1), np.median(scalars2))\n",
" print \"The probablity of this effect being purely by chance is {:.2f}.\".format(grouped_permutation_test(median_diff, [scalars1, scalars2], num_samples=10000))\n",
"\n",
"e10s = frame[frame[\"e10s\"] == True]\n",
"e10s = normalize_uptime_hour(e10s)\n",
"\n",
"none10s = frame[frame[\"e10s\"] == False]\n",
"none10s = normalize_uptime_hour(none10s)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Median difference in payload/histograms/SLOW_SCRIPT_PAGE_COUNT per hour is 0.10, (0.45, 0.35).\n",
"The probablity of this effect being purely by chance is 0.00.\n"
]
}
],
"source": [
"histogram = \"payload/histograms/SLOW_SCRIPT_PAGE_COUNT\"\n",
"notices_e10s = e10s[histogram].dropna()\n",
"notices_none10s = none10s[histogram].dropna()\n",
"compare_scalars(histogram + \" per hour\", notices_e10s, notices_none10s)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"(3002, 2629)"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"notices_e10s.size, notices_none10s.size"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\"seamless=\"seamless\" src=\"https://plot.ly/~mozilla/378.embed\" height=\"525\" width=\"100%\"></iframe>"
],
"text/plain": [
"<plotly.tools.PlotlyDisplay object>"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trace1 = go.Histogram(\n",
" x=notices_e10s,\n",
" opacity=0.75,\n",
" name='e10s'\n",
")\n",
"trace2 = go.Histogram(\n",
" x=notices_none10s,\n",
" opacity=0.75,\n",
" name='non-e10s'\n",
")\n",
"data = [trace1, trace2]\n",
"layout = go.Layout(\n",
" barmode='overlay',\n",
" title='User Count vs. SLOW_SCRIPT_PAGE_COUNT Value',\n",
" xaxis=dict(title='SLOW_SCRIPT_PAGE_COUNT Value'),\n",
" yaxis=dict(title='User Count')\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"py.iplot(fig, filename='overlaid-histogram')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The plot doesn't show up on GitHub Gist, so here's a link as well: https://plot.ly/378/~mozilla/"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"percentile e10s none10s diff diff%\n",
" 0 0.008477 > 0.006872 0.001605 23.354055\n",
" 1 0.020709 > 0.018360 0.002349 12.794922\n",
" 2 0.028188 > 0.022058 0.006131 27.793530\n",
" 3 0.036898 > 0.029954 0.006944 23.181962\n",
" 4 0.044651 > 0.035457 0.009194 25.930293\n",
" 5 0.053641 > 0.040717 0.012924 31.742381\n",
" 6 0.066530 > 0.043165 0.023364 54.127694\n",
" 7 0.077362 > 0.050332 0.027030 53.702332\n",
" 8 0.087209 > 0.058907 0.028302 48.045572\n",
" 9 0.094964 > 0.066966 0.027998 41.809449\n",
" 10 0.101695 > 0.078493 0.023202 29.559180\n",
" 11 0.105820 > 0.084626 0.021194 25.044092\n",
" 12 0.109290 > 0.091285 0.018004 19.722877\n",
" 13 0.111940 > 0.098488 0.013452 13.658488\n",
" 14 0.114723 > 0.103434 0.011289 10.913934\n",
" 15 0.117188 > 0.106421 0.010767 10.117132\n",
" 16 0.120724 > 0.110294 0.010430 9.456740\n",
" 17 0.124224 > 0.112150 0.012074 10.766046\n",
" 18 0.127709 > 0.114294 0.013414 11.736469\n",
" 19 0.131004 > 0.116279 0.014725 12.663755\n",
" 20 0.136426 > 0.118110 0.018316 15.507144\n",
" 21 0.144231 > 0.120482 0.023749 19.711538\n",
" 22 0.148148 > 0.122951 0.025197 20.493827\n",
" 23 0.156250 > 0.126800 0.029450 23.225679\n",
" 24 0.160531 > 0.130639 0.029892 22.881036\n",
" 25 0.169074 > 0.134831 0.034242 25.396313\n",
" 26 0.177515 > 0.139302 0.038213 27.431814\n",
" 27 0.185549 > 0.145829 0.039719 27.236898\n",
" 28 0.192308 > 0.149194 0.043113 28.897329\n",
" 29 0.207104 > 0.155039 0.052065 33.582186\n",
" 30 0.215440 > 0.163043 0.052396 32.136445\n",
" 31 0.220994 > 0.168067 0.052927 31.491713\n",
" 32 0.226415 > 0.174398 0.052017 29.826373\n",
" 33 0.236529 > 0.183061 0.053467 29.207463\n",
" 34 0.245902 > 0.192308 0.053594 27.868852\n",
" 35 0.257511 > 0.199867 0.057644 28.840973\n",
" 36 0.265699 > 0.206925 0.058774 28.403270\n",
" 37 0.276734 > 0.213039 0.063695 29.898545\n",
" 38 0.287081 > 0.219780 0.067301 30.622010\n",
" 39 0.296137 > 0.225496 0.070641 31.326819\n",
" 40 0.309598 > 0.237154 0.072444 30.547164\n",
" 41 0.319149 > 0.243902 0.075246 30.851064\n",
" 42 0.335196 > 0.251046 0.084150 33.519553\n",
" 43 0.348837 > 0.258621 0.090217 34.883721\n",
" 44 0.363636 > 0.272727 0.090909 33.333333\n",
" 45 0.375000 > 0.284360 0.090640 31.875000\n",
" 46 0.386760 > 0.295517 0.091244 30.875930\n",
" 47 0.404442 > 0.311140 0.093302 29.987151\n",
" 48 0.418820 > 0.324324 0.094496 29.136230\n",
" 49 0.437956 > 0.337079 0.100878 29.927007\n",
" 50 0.447761 > 0.346821 0.100940 29.104478\n",
" 51 0.465116 > 0.359281 0.105835 29.457364\n",
" 52 0.483871 > 0.374486 0.109385 29.209367\n",
" 53 0.500000 > 0.387097 0.112903 29.166667\n",
" 54 0.526316 > 0.405405 0.120910 29.824561\n",
" 55 0.545455 > 0.422535 0.122919 29.090909\n",
" 56 0.571429 > 0.441176 0.130252 29.523810\n",
" 57 0.594059 > 0.457876 0.136183 29.742286\n",
" 58 0.618557 > 0.468750 0.149807 31.958763\n",
" 59 0.643777 > 0.487805 0.155972 31.974249\n",
" 60 0.666667 > 0.504202 0.162465 32.222222\n",
" 61 0.697674 > 0.530973 0.166701 31.395349\n",
" 62 0.730358 > 0.559078 0.171279 30.635969\n",
" 63 0.759494 > 0.582524 0.176969 30.379747\n",
" 64 0.779221 > 0.612245 0.166976 27.272727\n",
" 65 0.816327 > 0.645161 0.171165 26.530612\n",
" 66 0.857143 > 0.683688 0.173455 25.370413\n",
" 67 0.882353 > 0.705882 0.176471 25.000000\n",
" 68 0.912623 > 0.740741 0.171883 23.204156\n",
" 69 0.967742 > 0.785137 0.182605 23.257777\n",
" 70 1.032123 > 0.833333 0.198789 23.854727\n",
" 71 1.081081 > 0.857143 0.223938 26.126126\n",
" 72 1.132075 > 0.902256 0.229820 25.471698\n",
" 73 1.184974 > 0.941842 0.243132 25.814485\n",
" 74 1.224490 > 0.983607 0.240883 24.489796\n",
" 75 1.304348 > 1.052632 0.251716 23.913043\n",
" 76 1.367577 > 1.096566 0.271011 24.714549\n",
" 77 1.463415 > 1.166516 0.296899 25.451759\n",
" 78 1.538462 > 1.224490 0.313972 25.641026\n",
" 79 1.578947 > 1.304348 0.274600 21.052632\n",
" 80 1.711854 > 1.363636 0.348218 25.535968\n",
" 81 1.818182 > 1.428571 0.389610 27.272727\n",
" 82 1.935484 > 1.538462 0.397022 25.806452\n",
" 83 2.083898 > 1.621622 0.462276 28.507046\n",
" 84 2.307692 > 1.752430 0.555263 31.685302\n",
" 85 2.500000 > 1.893987 0.606013 31.996658\n",
" 86 2.727273 > 2.068966 0.658307 31.818182\n",
" 87 3.000000 > 2.222222 0.777778 35.000000\n",
" 88 3.157895 > 2.400000 0.757895 31.578947\n",
" 89 3.428571 > 2.608696 0.819876 31.428571\n",
" 90 3.750000 > 2.866359 0.883641 30.827974\n",
" 91 4.000000 > 3.157895 0.842105 26.666667\n",
" 92 4.285714 > 3.529412 0.756303 21.428571\n",
" 93 5.000000 > 4.000000 1.000000 25.000000\n",
" 94 5.800922 > 4.757053 1.043869 21.943609\n",
" 95 6.666667 > 5.850000 0.816667 13.960114\n",
" 96 8.571429 > 6.666667 1.904762 28.571429\n",
" 97 11.982857 > 8.571429 3.411429 39.800000\n",
" 98 15.000000 > 12.000000 3.000000 25.000000\n",
" 99 20.000000 <= 20.000000 0.000000 0.000000\n",
" 100 136.000000 > 60.000000 76.000000 126.666667\n"
]
}
],
"source": [
"print \"percentile e10s none10s diff diff%\"\n",
"for i in range(101):\n",
" experiment, control = np.percentile(notices_e10s, i), np.percentile(notices_none10s, i)\n",
" print \"{:>5} {:>10f} {} {:>10f} {:>10f} {:>10f}\".format(\n",
" i,\n",
" experiment,\n",
" \">\" if experiment > control else \"<=\",\n",
" control,\n",
" experiment - control,\n",
" 100 * (experiment - control) / control\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment