Created
September 23, 2015 21:19
-
-
Save SamPenrose/34369a647be2e286753c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"nbformat_minor": 0, "cells": [{"execution_count": 1, "cell_type": "code", "source": "from moztelemetry import get_pings\npings = get_pings(sc, app=\"Firefox\", channel=\"beta\", \n schema=\"v4\", doc_type=\"main\", fraction=0.1)", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 2, "cell_type": "code", "source": "def extract_dates(d):\n cd = d.get('creationDate', '')[:10]\n sd = d.get('meta', {}).get('submissionDate', '')\n if not (cd and sd):\n return None\n return {'creation': cd, 'submission': sd}\naugust = pings.map(lambda d: extract_dates(d)).filter(lambda d: d and d['creation'].startswith('2015-08'))", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 3, "cell_type": "code", "source": "august.first()", "outputs": [{"execution_count": 3, "output_type": "execute_result", "data": {"text/plain": "{'creation': u'2015-08-02', 'submission': u'20150719'}"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 4, "cell_type": "code", "source": "import datetime as DT\nbounds = range(5)\ndef setup():\n # for values under 5, count in place\n counters = [sc.accumulator(0) for i in bounds]\n def extract_delta(d):\n cd = DT.datetime.strptime(d['creation'], \"%Y-%m-%d\").date()\n sd = DT.datetime.strptime(d['submission'], \"%Y%m%d\").date()\n delta = (sd - cd).days\n if delta in bounds:\n counters[delta].add(1)\n return None\n return delta\n return counters, extract_delta", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 5, "cell_type": "code", "source": "counters, extractor = setup()\ndeltas = august.map(extractor)", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 8, "cell_type": "code", "source": "deltas = deltas.filter(lambda i: i)\nresults = deltas.collect()", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 9, "cell_type": "code", "source": "print counters\nprint len(results)\nprint results[-10:]", "outputs": [{"output_type": "stream", "name": "stdout", "text": "[Accumulator<id=0, value=16492159>, Accumulator<id=1, value=8686657>, Accumulator<id=2, value=2821738>, Accumulator<id=3, value=1442018>, Accumulator<id=4, value=798156>]\n2946868\n[6, 6, 6, 6, 6, 5, 5, -1, -1, -1]\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 10, "cell_type": "code", "source": "results.sort()\nprint results[:10], results[-10:]", "outputs": [{"output_type": "stream", "name": "stdout", "text": "[-93, -93, -93, -93, -93, -92, -92, -92, -92, -92] [53, 53, 53, 53, 53, 53, 53, 53, 53, 53]\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 11, "cell_type": "code", "source": "results.index(5)", "outputs": [{"execution_count": 11, "output_type": "execute_result", "data": {"text/plain": "560352"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 13, "cell_type": "code", "source": "total = sum([a.value for a in counters]+[len(results)])\nprint total", "outputs": [{"output_type": "stream", "name": "stdout", "text": "33187596\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 17, "cell_type": "code", "source": "total *= 1.0\nindex = 0\nfor i in range(len(bounds)):\n index += counters[i].value\n print \"%d days old: %02f\" % (i, index/total)", "outputs": [{"output_type": "stream", "name": "stdout", "text": "0 days old: 0.496937\n1 days old: 0.758682\n2 days old: 0.843705\n3 days old: 0.887156\n4 days old: 0.911206\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 18, "cell_type": "code", "source": "high_count = len(results)\nfor i in range(5, 22):\n high_index = results.index(i)\n overall_index = total - (high_count-high_index)\n print \"%d days old: %02f\" % (i, overall_index/total)", "outputs": [{"output_type": "stream", "name": "stdout", "text": "5 days old: 0.928090\n6 days old: 0.943891\n7 days old: 0.955970\n8 days old: 0.966862\n9 days old: 0.974510\n10 days old: 0.979967\n11 days old: 0.984380\n12 days old: 0.988069\n13 days old: 0.991246\n14 days old: 0.994102\n15 days old: 0.995861\n16 days old: 0.996295\n17 days old: 0.996655\n18 days old: 0.996986\n19 days old: 0.997279\n20 days old: 0.997553\n21 days old: 0.997826\n"}], "metadata": {"collapsed": false, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.9", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment