Skip to content

Instantly share code, notes, and snippets.

@gregglind
Created November 3, 2015 21:45
Show Gist options
  • Save gregglind/9dcac65c228e7ab09d1f to your computer and use it in GitHub Desktop.
Save gregglind/9dcac65c228e7ab09d1f to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Flash updates in a month\n",
"1. ayup.\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This is a very a brief introduction to Spark and Telemetry in Python. You should have a look at the [tutorial](https://gist.github.com/vitillo/25a20b7c8685c0c82422) in Scala and the associated [talk](http://www.slideshare.net/RobertoAgostinoVitil/spark-meets-telemetry) if you are interested to learn more about Spark."
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"import ujson as json\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import numpy as np\n",
"import plotly.plotly as py\n",
"\n",
"import time\n",
"import collections\n",
"\n",
"from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history\n",
"\n",
"%pylab inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## number of flash upgrade events in the last month\n",
"\n",
"(for rebecca weiss, who is amazing)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (<ipython-input-34-aa309596518a>, line 12)",
"output_type": "error",
"traceback": [
"\u001b[1;36m File \u001b[1;32m\"<ipython-input-34-aa309596518a>\"\u001b[1;36m, line \u001b[1;32m12\u001b[0m\n\u001b[1;33m flash == .... whatever\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"pings = get_pings(sc, app=\"Firefox\", channel=\"release\", submission_date=(\"20150901\", \"20150930\"), fraction=0.001)\n",
"\n",
"#cohortStart = math.floor(time.mktime(time.strptime(\"01 Sep 15\", \"%d %b %y\"))/86400)\n",
"#cohortEnd = math.floor(time.mktime(time.strptime(\"30 Sep 15\", \"%d %b %y\"))/86400)\n",
"# \n",
"def hasFlash(ping):\n",
" ##\n",
" ## \n",
" return true\n",
"\n",
"def updatedInTheMonth(ping):\n",
" flash == .... whatever\n",
" ping['creationDate'] # client time, made the record\n",
" # rescale to...\n",
" ping['meta/submissionDate'] # server received\n",
" \n",
" ## pretend the clocs are right.\n",
" return (cohortStart <= flash['update'] <= cohortEnd)\n",
" \n",
"def maybeFlashUpdate(ping):\n",
" flash = ping['env']....\n",
" if not (cohortStart <= flash['update'] <= cohortEnd):\n",
" return \n",
" \n",
" #return (ping['clientId'], flash['version'])\n",
" return (ping['clientId'], flash['updateDay'])\n",
"\n",
"\n",
"\n",
"\n",
"pings.filter(hasFlash).map(maybeFlashUpdate).distinct().count() # numerator\n",
"get_one_per_client(pings).count() # demoninator\n",
"\n",
"#\n",
"for ping in pings:\n",
" if has flash\n",
" if flash update is in this month\n",
" inc!\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"16"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sc.defaultParallelism"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[{u'blocklisted': False,\n",
" u'clicktoplay': False,\n",
" u'description': u'Shockwave Flash 18.0 r0',\n",
" u'disabled': False,\n",
" u'mimeTypes': [u'application/x-shockwave-flash',\n",
" u'application/futuresplash'],\n",
" u'name': u'Shockwave Flash',\n",
" u'updateDay': 16660,\n",
" u'version': u'18.0.0.232'}]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[x for x in (pings.first()['environment']['addons']['activePlugins']) if x['name']=='Shockwave Flash']\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"pings = get_pings(sc, app=\"Firefox\", channel=\"release\", submission_date=(\"20150922\", \"20151001\"), fraction=0.001)\n",
"\n",
"# 41 release\n",
"cohortStart = math.floor(time.mktime(time.strptime(\"22 Sep 15\", \"%d %b %y\"))/86400)\n",
"cohortEnd = math.floor(time.mktime(time.strptime(\"1 Oct 15\", \"%d %b %y\"))/86400)\n",
"\n",
"\n",
"def hasFlash(ping):\n",
" try:\n",
" maybe = [x for x in (ping['environment']['addons']['activePlugins']) if x['name']=='Shockwave Flash']\n",
" if maybe:\n",
" return maybe[0]\n",
" except KeyError:\n",
" return \n",
" \n",
" return \n",
"\n",
"\n",
"def maybeFlashUpdate(ping):\n",
" flash = hasFlash(ping)\n",
" if not flash: return None\n",
" \n",
" ## pretend the clocks are right.\n",
" # correct would be creationTime, meta/submission\n",
" if not (cohortStart <= flash['updateDay'] <= cohortEnd):\n",
" return None\n",
" \n",
" #return (ping['clientId'], flash['version'])\n",
" return (ping['clientId'], flash['updateDay'])\n",
"\n",
" \n",
"print 'numevents', pings.filter(hasFlash).map(maybeFlashUpdate).filter(bool).distinct().collect() # numerator\n",
"\n",
"print 'numclients', pings.map(lambda x: x['clientId']).distinct().count()\n",
"\n",
"#print get_one_per_client(pings).count() # denominator\n",
"\n",
"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment