Created
March 30, 2016 20:39
-
-
Save Uberi/94433d777285944bc1913486e333418b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable.\n", | |
"Populating the interactive namespace from numpy and matplotlib\n" | |
] | |
} | |
], | |
"source": [ | |
"import ujson as json\n", | |
"import matplotlib.pyplot as plt\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import plotly.plotly as py\n", | |
"import IPython\n", | |
"import functools\n", | |
"\n", | |
"from __future__ import division\n", | |
"from moztelemetry.spark import get_pings, get_one_ping_per_client, get_pings_properties\n", | |
"\n", | |
"%pylab inline\n", | |
"IPython.core.pylabtools.figsize(16, 7)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"pings = get_pings(sc, channel=\"nightly\", submission_date=\"20160127\", build_id=(\"20160127000000\", \"20160127999999\"))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def gecko_activity(ping):\n", | |
" threads = ping[\"payload\"].get(\"threadHangStats\", {})\n", | |
" result = {}\n", | |
" \n", | |
" for thread in threads:\n", | |
" if thread[\"name\"] == \"Gecko\":\n", | |
" activity = thread[\"activity\"][\"values\"]\n", | |
" result[\"gecko_activity\"] = pd.Series(activity.values(), index=map(int, activity.keys())).sort_index()\n", | |
" \n", | |
" result[\"gecko_hangs\"] = pd.Series()\n", | |
" for hang in thread[\"hangs\"]:\n", | |
" histogram = hang[\"histogram\"][\"values\"]\n", | |
" hang_histogram = pd.Series(histogram.values(), index=map(int, histogram.keys()))\n", | |
" result[\"gecko_hangs\"] = result[\"gecko_hangs\"].add(hang_histogram, fill_value=0)\n", | |
" \n", | |
" result[\"gecko_hangs\"] = result[\"gecko_hangs\"].sort_index()\n", | |
" break\n", | |
" \n", | |
" return result" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"bhr_summary = pings.map(gecko_activity)\n", | |
"sample = bhr_summary.take(5)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Let's have a look at few individual submissions:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Sample 0 activity:\n", | |
"0 0\n", | |
"1 825\n", | |
"3 2\n", | |
"7 1\n", | |
"15 3\n", | |
"31 13\n", | |
"63 58\n", | |
"127 10\n", | |
"255 2\n", | |
"511 3\n", | |
"1023 0\n", | |
"dtype: int64\n", | |
"Sample 0 hangs:\n", | |
"127 0\n", | |
"255 2\n", | |
"511 3\n", | |
"1023 0\n", | |
"dtype: float64\n", | |
"\n", | |
"Sample 1 activity:\n", | |
"0 0\n", | |
"1 8114\n", | |
"3 207\n", | |
"7 182\n", | |
"15 199\n", | |
"31 116\n", | |
"63 38\n", | |
"127 21\n", | |
"255 6\n", | |
"511 2\n", | |
"1023 4\n", | |
"2047 0\n", | |
"dtype: int64\n", | |
"Sample 1 hangs:\n", | |
"127 0\n", | |
"255 6\n", | |
"511 2\n", | |
"1023 4\n", | |
"2047 0\n", | |
"dtype: float64\n", | |
"\n", | |
"Sample 2 activity:\n", | |
"0 0\n", | |
"1 7390\n", | |
"3 207\n", | |
"7 181\n", | |
"15 199\n", | |
"31 114\n", | |
"63 35\n", | |
"127 21\n", | |
"255 6\n", | |
"511 2\n", | |
"1023 3\n", | |
"2047 0\n", | |
"dtype: int64\n", | |
"Sample 2 hangs:\n", | |
"127 0\n", | |
"255 6\n", | |
"511 2\n", | |
"1023 3\n", | |
"2047 0\n", | |
"dtype: float64\n", | |
"\n", | |
"Sample 3 activity:\n", | |
"0 0\n", | |
"1 422\n", | |
"3 5\n", | |
"7 3\n", | |
"15 5\n", | |
"31 11\n", | |
"63 9\n", | |
"127 6\n", | |
"511 1\n", | |
"1023 1\n", | |
"2047 0\n", | |
"dtype: int64\n", | |
"Sample 3 hangs:\n", | |
"255 0\n", | |
"511 1\n", | |
"1023 1\n", | |
"2047 0\n", | |
"dtype: float64\n", | |
"\n", | |
"Sample 4 activity:\n", | |
"0 0\n", | |
"1 5244\n", | |
"3 22\n", | |
"7 34\n", | |
"15 92\n", | |
"31 246\n", | |
"63 152\n", | |
"127 75\n", | |
"255 36\n", | |
"511 11\n", | |
"1023 10\n", | |
"2047 5\n", | |
"8191 1\n", | |
"16383 0\n", | |
"dtype: int64\n", | |
"Sample 4 hangs:\n", | |
"127 0\n", | |
"255 34\n", | |
"511 13\n", | |
"1023 10\n", | |
"2047 5\n", | |
"4095 0\n", | |
"8191 1\n", | |
"16383 1\n", | |
"32767 0\n", | |
"dtype: float64\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"for idx, x in enumerate(sample):\n", | |
" print \"Sample {} activity:\".format(idx)\n", | |
" print x[\"gecko_activity\"]\n", | |
" print \"Sample {} hangs:\".format(idx)\n", | |
" print x[\"gecko_hangs\"]\n", | |
" print" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"How many submissions have mismatching BHR reports?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def compare(ping):\n", | |
" activity = ping[\"gecko_activity\"]\n", | |
" hangs = ping[\"gecko_hangs\"]\n", | |
" \n", | |
" index = sorted(set(hangs.index).union(set(activity.index))) \n", | |
" activity = pd.Series(activity[activity.index], index=index).fillna(0)\n", | |
" hangs = pd.Series(hangs[hangs.index], index=index)\n", | |
" \n", | |
" return np.all(activity[activity.index >= 255] == hangs[hangs.index >= 255])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.527938517179\n" | |
] | |
} | |
], | |
"source": [ | |
"total = bhr_summary.count()\n", | |
"equal = bhr_summary.filter(compare).count()\n", | |
"print equal/total" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Let's have a look at few individual submissions with mismatching BHR reports:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"sample = bhr_summary.filter(lambda x: not compare(x)).take(5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Sample 0 activity:\n", | |
"0 0\n", | |
"1 5244\n", | |
"3 22\n", | |
"7 34\n", | |
"15 92\n", | |
"31 246\n", | |
"63 152\n", | |
"127 75\n", | |
"255 36\n", | |
"511 11\n", | |
"1023 10\n", | |
"2047 5\n", | |
"8191 1\n", | |
"16383 0\n", | |
"dtype: int64\n", | |
"Sample 0 hangs:\n", | |
"127 0\n", | |
"255 34\n", | |
"511 13\n", | |
"1023 10\n", | |
"2047 5\n", | |
"4095 0\n", | |
"8191 1\n", | |
"16383 1\n", | |
"32767 0\n", | |
"dtype: float64\n", | |
"\n", | |
"Sample 1 activity:\n", | |
"0 0\n", | |
"1 68162\n", | |
"3 2223\n", | |
"7 609\n", | |
"15 667\n", | |
"31 1620\n", | |
"63 3040\n", | |
"127 364\n", | |
"255 232\n", | |
"511 85\n", | |
"1023 70\n", | |
"2047 5\n", | |
"4095 4\n", | |
"8191 2\n", | |
"16383 0\n", | |
"dtype: int64\n", | |
"Sample 1 hangs:\n", | |
"127 0\n", | |
"255 229\n", | |
"511 87\n", | |
"1023 71\n", | |
"2047 5\n", | |
"4095 4\n", | |
"8191 2\n", | |
"16383 0\n", | |
"dtype: float64\n", | |
"\n", | |
"Sample 2 activity:\n", | |
"0 0\n", | |
"1 66004\n", | |
"3 2201\n", | |
"7 585\n", | |
"15 643\n", | |
"31 1595\n", | |
"63 3009\n", | |
"127 358\n", | |
"255 227\n", | |
"511 85\n", | |
"1023 69\n", | |
"2047 5\n", | |
"4095 3\n", | |
"8191 2\n", | |
"16383 0\n", | |
"dtype: int64\n", | |
"Sample 2 hangs:\n", | |
"127 0\n", | |
"255 224\n", | |
"511 87\n", | |
"1023 70\n", | |
"2047 5\n", | |
"4095 3\n", | |
"8191 2\n", | |
"16383 0\n", | |
"dtype: float64\n", | |
"\n", | |
"Sample 3 activity:\n", | |
"0 0\n", | |
"1 33775\n", | |
"3 945\n", | |
"7 691\n", | |
"15 1026\n", | |
"31 879\n", | |
"63 398\n", | |
"127 106\n", | |
"255 46\n", | |
"511 24\n", | |
"1023 8\n", | |
"2047 0\n", | |
"dtype: int64\n", | |
"Sample 3 hangs:\n", | |
"127 0\n", | |
"255 46\n", | |
"511 21\n", | |
"1023 9\n", | |
"2047 1\n", | |
"4095 0\n", | |
"dtype: float64\n", | |
"\n", | |
"Sample 4 activity:\n", | |
"0 0\n", | |
"1 68201\n", | |
"3 1648\n", | |
"7 1456\n", | |
"15 1707\n", | |
"31 2454\n", | |
"63 1016\n", | |
"127 232\n", | |
"255 83\n", | |
"511 46\n", | |
"1023 15\n", | |
"2047 1\n", | |
"4095 0\n", | |
"dtype: int64\n", | |
"Sample 4 hangs:\n", | |
"127 0\n", | |
"255 79\n", | |
"511 46\n", | |
"1023 16\n", | |
"2047 2\n", | |
"4095 0\n", | |
"dtype: float64\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"for idx, x in enumerate(sample):\n", | |
" print \"Sample {} activity:\".format(idx)\n", | |
" print x[\"gecko_activity\"]\n", | |
" print \"Sample {} hangs:\".format(idx)\n", | |
" print x[\"gecko_hangs\"]\n", | |
" print" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment