Created
November 3, 2015 21:45
-
-
Save gregglind/9dcac65c228e7ab09d1f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Flash updates in a month\n", | |
"1. ayup.\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"This is a very a brief introduction to Spark and Telemetry in Python. You should have a look at the [tutorial](https://gist.github.com/vitillo/25a20b7c8685c0c82422) in Scala and the associated [talk](http://www.slideshare.net/RobertoAgostinoVitil/spark-meets-telemetry) if you are interested to learn more about Spark." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Populating the interactive namespace from numpy and matplotlib\n" | |
] | |
} | |
], | |
"source": [ | |
"import ujson as json\n", | |
"import matplotlib.pyplot as plt\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import plotly.plotly as py\n", | |
"\n", | |
"import time\n", | |
"import collections\n", | |
"\n", | |
"from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history\n", | |
"\n", | |
"%pylab inline" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## number of flash upgrade events in the last month\n", | |
"\n", | |
"(for rebecca weiss, who is amazing)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "SyntaxError", | |
"evalue": "invalid syntax (<ipython-input-34-aa309596518a>, line 12)", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;36m File \u001b[1;32m\"<ipython-input-34-aa309596518a>\"\u001b[1;36m, line \u001b[1;32m12\u001b[0m\n\u001b[1;33m flash == .... whatever\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n" | |
] | |
} | |
], | |
"source": [ | |
"pings = get_pings(sc, app=\"Firefox\", channel=\"release\", submission_date=(\"20150901\", \"20150930\"), fraction=0.001)\n", | |
"\n", | |
"#cohortStart = math.floor(time.mktime(time.strptime(\"01 Sep 15\", \"%d %b %y\"))/86400)\n", | |
"#cohortEnd = math.floor(time.mktime(time.strptime(\"30 Sep 15\", \"%d %b %y\"))/86400)\n", | |
"# \n", | |
"def hasFlash(ping):\n", | |
" ##\n", | |
" ## \n", | |
" return true\n", | |
"\n", | |
"def updatedInTheMonth(ping):\n", | |
" flash == .... whatever\n", | |
" ping['creationDate'] # client time, made the record\n", | |
" # rescale to...\n", | |
" ping['meta/submissionDate'] # server received\n", | |
" \n", | |
" ## pretend the clocs are right.\n", | |
" return (cohortStart <= flash['update'] <= cohortEnd)\n", | |
" \n", | |
"def maybeFlashUpdate(ping):\n", | |
" flash = ping['env']....\n", | |
" if not (cohortStart <= flash['update'] <= cohortEnd):\n", | |
" return \n", | |
" \n", | |
" #return (ping['clientId'], flash['version'])\n", | |
" return (ping['clientId'], flash['updateDay'])\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"pings.filter(hasFlash).map(maybeFlashUpdate).distinct().count() # numerator\n", | |
"get_one_per_client(pings).count() # demoninator\n", | |
"\n", | |
"#\n", | |
"for ping in pings:\n", | |
" if has flash\n", | |
" if flash update is in this month\n", | |
" inc!\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"16" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sc.defaultParallelism" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{u'blocklisted': False,\n", | |
" u'clicktoplay': False,\n", | |
" u'description': u'Shockwave Flash 18.0 r0',\n", | |
" u'disabled': False,\n", | |
" u'mimeTypes': [u'application/x-shockwave-flash',\n", | |
" u'application/futuresplash'],\n", | |
" u'name': u'Shockwave Flash',\n", | |
" u'updateDay': 16660,\n", | |
" u'version': u'18.0.0.232'}]" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"[x for x in (pings.first()['environment']['addons']['activePlugins']) if x['name']=='Shockwave Flash']\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"pings = get_pings(sc, app=\"Firefox\", channel=\"release\", submission_date=(\"20150922\", \"20151001\"), fraction=0.001)\n", | |
"\n", | |
"# 41 release\n", | |
"cohortStart = math.floor(time.mktime(time.strptime(\"22 Sep 15\", \"%d %b %y\"))/86400)\n", | |
"cohortEnd = math.floor(time.mktime(time.strptime(\"1 Oct 15\", \"%d %b %y\"))/86400)\n", | |
"\n", | |
"\n", | |
"def hasFlash(ping):\n", | |
" try:\n", | |
" maybe = [x for x in (ping['environment']['addons']['activePlugins']) if x['name']=='Shockwave Flash']\n", | |
" if maybe:\n", | |
" return maybe[0]\n", | |
" except KeyError:\n", | |
" return \n", | |
" \n", | |
" return \n", | |
"\n", | |
"\n", | |
"def maybeFlashUpdate(ping):\n", | |
" flash = hasFlash(ping)\n", | |
" if not flash: return None\n", | |
" \n", | |
" ## pretend the clocks are right.\n", | |
" # correct would be creationTime, meta/submission\n", | |
" if not (cohortStart <= flash['updateDay'] <= cohortEnd):\n", | |
" return None\n", | |
" \n", | |
" #return (ping['clientId'], flash['version'])\n", | |
" return (ping['clientId'], flash['updateDay'])\n", | |
"\n", | |
" \n", | |
"print 'numevents', pings.filter(hasFlash).map(maybeFlashUpdate).filter(bool).distinct().collect() # numerator\n", | |
"\n", | |
"print 'numclients', pings.map(lambda x: x['clientId']).distinct().count()\n", | |
"\n", | |
"#print get_one_per_client(pings).count() # denominator\n", | |
"\n", | |
"\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment