Created
November 17, 2016 00:40
-
-
Save saptarshiguha/db5747e321d21adafc692abb25b3a466 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Import data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "mainpingspq = sqlContext.read.load(\"s3://telemetry-parquet/main_summary/v3\", \"parquet\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pyspark.sql.functions as F\n", | |
| "import operator\n", | |
| "import numpy as np" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Create relevant variables" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 183, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# filter the data by dates that we want\n", | |
| "import datetime\n", | |
| "DATE_1970_01_01 = datetime.datetime(1970, 1, 1)\n", | |
| "START_profile = \"2016-08-14\"\n", | |
| "END_profile = \"2016-08-28\"\n", | |
| "DAYS_START_profile = (datetime.datetime.strptime(START_profile, \"%Y-%m-%d\") - DATE_1970_01_01).days #in days since Jan 1, 1970\n", | |
| "DAYS_END_profile = (datetime.datetime.strptime(END_profile, \"%Y-%m-%d\") - DATE_1970_01_01).days #in days since Jan 1, 1970\n", | |
| "START_ping = '2016-08-14'\n", | |
| "END_ping = '2016-10-15'\n", | |
| "START_s3 = '20160813'\n", | |
| "END_s3 = '20161025'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Subset data by applying filters and selecting columns" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "u0 = mainpingspq.selectExpr(\"submission_date_s3\",\n", | |
| " \"app_name\",\n", | |
| " \"vendor\",\n", | |
| " \"normalized_channel\",\n", | |
| " \"profile_creation_date\",\n", | |
| " \"subsession_start_date\",\n", | |
| " \"e10s_cohort\",\n", | |
| " \"client_id\",\n", | |
| " \"e10s_enabled\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "u1 = u0.filter(u0.submission_date_s3 >= START_s3)\\\n", | |
| " .filter(u0.submission_date_s3 <= END_s3)\\\n", | |
| " .filter(u0.app_name == 'Firefox')\\\n", | |
| " .filter(u0.vendor == 'Mozilla')\\\n", | |
| " .filter(u0.normalized_channel == 'release')\\\n", | |
| " .filter(u0.profile_creation_date >= DAYS_START_profile)\\\n", | |
| " .filter(u0.profile_creation_date <= DAYS_END_profile)\\\n", | |
| " .filter(u0.subsession_start_date.substr(1,10) >= START_ping)\\\n", | |
| " .filter(u0.subsession_start_date.substr(1,10) <= END_ping)\\\n", | |
| " .filter(u0.e10s_cohort.isin('test','control'))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "u2 = u1.selectExpr(\"client_id\",\n", | |
| " \"profile_creation_date\",\n", | |
| " \"substr(subsession_start_date, 1, 10) as date\",\n", | |
| " \"e10s_enabled\",\n", | |
| " \"e10s_cohort\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "202206844" | |
| ] | |
| }, | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# count total rows (may change slightly)\n", | |
| "u2.count() #202,206,844" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[Row(count(client_id)=8643850)]" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# count unique user_ids (may change slightly)\n", | |
| "u2.agg(countDistinct(u2.client_id)).collect() #8,643,850" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Reshape data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# For each client_id, subsession_start_date (as date), get max(profile_creation_date), max(e10s_cohort), min(e10s_cohort), max(e10s_enabled), min(e10s_enabled)\n", | |
| "u3 = u2.groupBy([u2.client_id, u2.date]).agg(F.max(\"profile_creation_date\").alias(\"profile_creation_date\"),\n", | |
| " F.max(\"e10s_cohort\").alias(\"e10s_cohort_max\"),\n", | |
| " F.min(\"e10s_cohort\").alias(\"e10s_cohort_min\"),\n", | |
| " F.max(\"e10s_enabled\").alias(\"e10s_enabled_max\"),\n", | |
| " F.min(\"e10s_enabled\").alias(\"e10s_enabled_min\"))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Get distribution of data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# Rows per client_id\n", | |
| "u4 = u3.groupBy(u3.client_id).agg(F.count(\"*\").alias(\"num_days_active\"))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# Distribution per number of days active (better)\n", | |
| "u5 = u4.groupBy(u4.num_days_active).agg(F.count(\"*\").alias(\"cnt\"))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 36, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[Row(num_days_active=1, cnt=4204801),\n", | |
| " Row(num_days_active=2, cnt=890262),\n", | |
| " Row(num_days_active=3, cnt=453709),\n", | |
| " Row(num_days_active=4, cnt=313680),\n", | |
| " Row(num_days_active=5, cnt=241930),\n", | |
| " Row(num_days_active=6, cnt=196693),\n", | |
| " Row(num_days_active=7, cnt=165900),\n", | |
| " Row(num_days_active=8, cnt=143558),\n", | |
| " Row(num_days_active=9, cnt=126036),\n", | |
| " Row(num_days_active=10, cnt=111326),\n", | |
| " Row(num_days_active=11, cnt=100943),\n", | |
| " Row(num_days_active=12, cnt=91051),\n", | |
| " Row(num_days_active=13, cnt=83713),\n", | |
| " Row(num_days_active=14, cnt=78057),\n", | |
| " Row(num_days_active=15, cnt=72417),\n", | |
| " Row(num_days_active=16, cnt=67222),\n", | |
| " Row(num_days_active=17, cnt=63339),\n", | |
| " Row(num_days_active=18, cnt=59691),\n", | |
| " Row(num_days_active=19, cnt=56503),\n", | |
| " Row(num_days_active=20, cnt=53782),\n", | |
| " Row(num_days_active=21, cnt=51441),\n", | |
| " Row(num_days_active=22, cnt=50137),\n", | |
| " Row(num_days_active=23, cnt=47469),\n", | |
| " Row(num_days_active=24, cnt=45686),\n", | |
| " Row(num_days_active=25, cnt=44261),\n", | |
| " Row(num_days_active=26, cnt=43156),\n", | |
| " Row(num_days_active=27, cnt=42095),\n", | |
| " Row(num_days_active=28, cnt=40619),\n", | |
| " Row(num_days_active=29, cnt=40561),\n", | |
| " Row(num_days_active=30, cnt=39174),\n", | |
| " Row(num_days_active=31, cnt=37877),\n", | |
| " Row(num_days_active=32, cnt=37045),\n", | |
| " Row(num_days_active=33, cnt=36022),\n", | |
| " Row(num_days_active=34, cnt=35887),\n", | |
| " Row(num_days_active=35, cnt=34965),\n", | |
| " Row(num_days_active=36, cnt=32984),\n", | |
| " Row(num_days_active=37, cnt=31514),\n", | |
| " Row(num_days_active=38, cnt=29389),\n", | |
| " Row(num_days_active=39, cnt=27648),\n", | |
| " Row(num_days_active=40, cnt=26283),\n", | |
| " Row(num_days_active=41, cnt=25255),\n", | |
| " Row(num_days_active=42, cnt=24126),\n", | |
| " Row(num_days_active=43, cnt=22536),\n", | |
| " Row(num_days_active=44, cnt=21242),\n", | |
| " Row(num_days_active=45, cnt=19508),\n", | |
| " Row(num_days_active=46, cnt=17694),\n", | |
| " Row(num_days_active=47, cnt=17120),\n", | |
| " Row(num_days_active=48, cnt=17024),\n", | |
| " Row(num_days_active=49, cnt=17629),\n", | |
| " Row(num_days_active=50, cnt=16640),\n", | |
| " Row(num_days_active=51, cnt=15685),\n", | |
| " Row(num_days_active=52, cnt=13770),\n", | |
| " Row(num_days_active=53, cnt=11460),\n", | |
| " Row(num_days_active=54, cnt=9984),\n", | |
| " Row(num_days_active=55, cnt=8890),\n", | |
| " Row(num_days_active=56, cnt=7619),\n", | |
| " Row(num_days_active=57, cnt=6794),\n", | |
| " Row(num_days_active=58, cnt=5898),\n", | |
| " Row(num_days_active=59, cnt=5092),\n", | |
| " Row(num_days_active=60, cnt=4031),\n", | |
| " Row(num_days_active=61, cnt=3270),\n", | |
| " Row(num_days_active=62, cnt=2343),\n", | |
| " Row(num_days_active=63, cnt=1415)]" | |
| ] | |
| }, | |
| "execution_count": 36, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "u5.orderBy(u5.num_days_active).collect()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Take a small sample to see what it looks like" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[Row(client_id=u'000068d1-0677-4db0-8b7e-9b97c597c486', date=u'2016-08-23', profile_creation_date=17036, e10s_cohort_max=u'control', e10s_cohort_min=u'control', e10s_enabled_max=False, e10s_enabled_min=False),\n", | |
| " Row(client_id=u'00007aab-2946-4279-8774-15d8d94289c3', date=u'2016-09-23', profile_creation_date=17041, e10s_cohort_max=u'test', e10s_cohort_min=u'test', e10s_enabled_max=True, e10s_enabled_min=True),\n", | |
| " Row(client_id=u'0000b7d5-d45b-467c-a0d8-a4f2f264be2a', date=u'2016-08-29', profile_creation_date=17029, e10s_cohort_max=u'control', e10s_cohort_min=u'control', e10s_enabled_max=False, e10s_enabled_min=False)]" | |
| ] | |
| }, | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "u3.take(3)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Make into RDD" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "r3 = u3.rdd" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Group by client_id" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 131, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# mapValues(list) necessary to convert the iterable object into an actual list\n", | |
| "\n", | |
| "# for each client, get a list of rows containing all attributes\n", | |
| "r6 = r3.groupBy(lambda x: x.client_id).mapValues(list)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "x = r6.take(2)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[(u'ec8ea196-d2a6-400b-8c01-da0daeae6a23',\n", | |
| " [Row(client_id=u'ec8ea196-d2a6-400b-8c01-da0daeae6a23', date=u'2016-08-25', profile_creation_date=17038, e10s_cohort_max=u'control', e10s_cohort_min=u'control', e10s_enabled_max=False, e10s_enabled_min=False)]),\n", | |
| " (u'566e988a-0dc1-4aa3-bc28-5a8754843c2f',\n", | |
| " [Row(client_id=u'566e988a-0dc1-4aa3-bc28-5a8754843c2f', date=u'2016-09-22', profile_creation_date=17028, e10s_cohort_max=u'control', e10s_cohort_min=u'control', e10s_enabled_max=False, e10s_enabled_min=False),\n", | |
| " Row(client_id=u'566e988a-0dc1-4aa3-bc28-5a8754843c2f', date=u'2016-10-12', profile_creation_date=17028, e10s_cohort_max=u'test', e10s_cohort_min=u'test', e10s_enabled_max=True, e10s_enabled_min=True),\n", | |
| " Row(client_id=u'566e988a-0dc1-4aa3-bc28-5a8754843c2f', date=u'2016-09-25', profile_creation_date=17028, e10s_cohort_max=u'test', e10s_cohort_min=u'test', e10s_enabled_max=True, e10s_enabled_min=True),\n", | |
| " Row(client_id=u'566e988a-0dc1-4aa3-bc28-5a8754843c2f', date=u'2016-10-13', profile_creation_date=17028, e10s_cohort_max=u'test', e10s_cohort_min=u'test', e10s_enabled_max=True, e10s_enabled_min=True),\n", | |
| " Row(client_id=u'566e988a-0dc1-4aa3-bc28-5a8754843c2f', date=u'2016-10-10', profile_creation_date=17028, e10s_cohort_max=u'test', e10s_cohort_min=u'test', e10s_enabled_max=True, e10s_enabled_min=True),\n", | |
| " Row(client_id=u'566e988a-0dc1-4aa3-bc28-5a8754843c2f', date=u'2016-09-23', profile_creation_date=17028, e10s_cohort_max=u'test', e10s_cohort_min=u'test', e10s_enabled_max=True, e10s_enabled_min=True),\n", | |
| " Row(client_id=u'566e988a-0dc1-4aa3-bc28-5a8754843c2f', date=u'2016-10-14', profile_creation_date=17028, e10s_cohort_max=u'test', e10s_cohort_min=u'test', e10s_enabled_max=True, e10s_enabled_min=True),\n", | |
| " Row(client_id=u'566e988a-0dc1-4aa3-bc28-5a8754843c2f', date=u'2016-10-09', profile_creation_date=17028, e10s_cohort_max=u'test', e10s_cohort_min=u'test', e10s_enabled_max=True, e10s_enabled_min=True),\n", | |
| " Row(client_id=u'566e988a-0dc1-4aa3-bc28-5a8754843c2f', date=u'2016-09-24', profile_creation_date=17028, e10s_cohort_max=u'test', e10s_cohort_min=u'test', e10s_enabled_max=True, e10s_enabled_min=True),\n", | |
| " Row(client_id=u'566e988a-0dc1-4aa3-bc28-5a8754843c2f', date=u'2016-09-28', profile_creation_date=17028, e10s_cohort_max=u'test', e10s_cohort_min=u'test', e10s_enabled_max=True, e10s_enabled_min=True),\n", | |
| " Row(client_id=u'566e988a-0dc1-4aa3-bc28-5a8754843c2f', date=u'2016-10-11', profile_creation_date=17028, e10s_cohort_max=u'test', e10s_cohort_min=u'test', e10s_enabled_max=True, e10s_enabled_min=True),\n", | |
| " Row(client_id=u'566e988a-0dc1-4aa3-bc28-5a8754843c2f', date=u'2016-09-27', profile_creation_date=17028, e10s_cohort_max=u'test', e10s_cohort_min=u'test', e10s_enabled_max=True, e10s_enabled_min=True),\n", | |
| " Row(client_id=u'566e988a-0dc1-4aa3-bc28-5a8754843c2f', date=u'2016-10-08', profile_creation_date=17028, e10s_cohort_max=u'test', e10s_cohort_min=u'test', e10s_enabled_max=True, e10s_enabled_min=True)])]" | |
| ] | |
| }, | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "x" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# For each client_id, get profile_creation date (single number) and all attributes as lists\n", | |
| "## Note: the lists are sorted by the ordering of date (subsession_start_date)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 156, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "\"\"\"\n", | |
| "Helper function for the map\n", | |
| "\n", | |
| "For each client_id, get a dictionary with:\n", | |
| "-client_id\n", | |
| "-profile_creation_date\n", | |
| "-a list of subsession_start_dates\n", | |
| "-a list of e10s_cohort_max\n", | |
| "-a list of e10s_cohort_min\n", | |
| "-a list of e10s_enabled_max\n", | |
| "-a list of e10s_enabled_min\n", | |
| "\n", | |
| "These lists are also ordered chronologically from the first date (order is preserved for all lists)\n", | |
| "\"\"\"\n", | |
| "def transform_rows_into_ordered_lists(row):\n", | |
| " d = dict(zip( #dictionary of lists\n", | |
| " [\"date\",\"e10s_cohort_max\",\"e10s_cohort_min\",\"e10s_enabled_max\",\"e10s_enabled_min\"],\n", | |
| " zip(*sorted(zip( #sort the following lists\n", | |
| " [d.date for d in row[1]], #date (subsession_start_date)\n", | |
| " [d.e10s_cohort_max for d in row[1]], #e10s_cohort_max\n", | |
| " [d.e10s_cohort_min for d in row[1]], #e10s_cohort_min\n", | |
| " [d.e10s_enabled_max for d in row[1]], #e10s_enabled_max\n", | |
| " [d.e10s_enabled_min for d in row[1]]), #e10s_enabled_min\n", | |
| " key=operator.itemgetter(0)))))\n", | |
| " d['client_id'] = row[0]\n", | |
| " d['profile_creation_date'] = row[1][0][2]\n", | |
| " return d" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 157, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "r7 = r6.map(transform_rows_into_ordered_lists) #by the ordering of the first list (date)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 158, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[{'client_id': u'93a7731c-f26b-4aa4-82d4-0f95e5ffb6c5',\n", | |
| " 'date': (u'2016-08-18',\n", | |
| " u'2016-08-19',\n", | |
| " u'2016-08-23',\n", | |
| " u'2016-08-29',\n", | |
| " u'2016-09-01',\n", | |
| " u'2016-09-12',\n", | |
| " u'2016-09-15',\n", | |
| " u'2016-09-16',\n", | |
| " u'2016-09-19',\n", | |
| " u'2016-09-22',\n", | |
| " u'2016-09-23',\n", | |
| " u'2016-09-26',\n", | |
| " u'2016-09-28',\n", | |
| " u'2016-10-06',\n", | |
| " u'2016-10-10',\n", | |
| " u'2016-10-13'),\n", | |
| " 'e10s_cohort_max': (u'control',\n", | |
| " u'control',\n", | |
| " u'control',\n", | |
| " u'control',\n", | |
| " u'control',\n", | |
| " u'control',\n", | |
| " u'control',\n", | |
| " u'test',\n", | |
| " u'test',\n", | |
| " u'test',\n", | |
| " u'test',\n", | |
| " u'test',\n", | |
| " u'test',\n", | |
| " u'test',\n", | |
| " u'test',\n", | |
| " u'test'),\n", | |
| " 'e10s_cohort_min': (u'control',\n", | |
| " u'control',\n", | |
| " u'control',\n", | |
| " u'control',\n", | |
| " u'control',\n", | |
| " u'control',\n", | |
| " u'control',\n", | |
| " u'test',\n", | |
| " u'test',\n", | |
| " u'test',\n", | |
| " u'test',\n", | |
| " u'test',\n", | |
| " u'test',\n", | |
| " u'test',\n", | |
| " u'test',\n", | |
| " u'test'),\n", | |
| " 'e10s_enabled_max': (False,\n", | |
| " False,\n", | |
| " False,\n", | |
| " False,\n", | |
| " False,\n", | |
| " False,\n", | |
| " False,\n", | |
| " True,\n", | |
| " True,\n", | |
| " True,\n", | |
| " True,\n", | |
| " True,\n", | |
| " True,\n", | |
| " True,\n", | |
| " True,\n", | |
| " True),\n", | |
| " 'e10s_enabled_min': (False,\n", | |
| " False,\n", | |
| " False,\n", | |
| " False,\n", | |
| " False,\n", | |
| " False,\n", | |
| " False,\n", | |
| " True,\n", | |
| " True,\n", | |
| " True,\n", | |
| " True,\n", | |
| " True,\n", | |
| " True,\n", | |
| " True,\n", | |
| " True,\n", | |
| " True),\n", | |
| " 'profile_creation_date': 17031}]" | |
| ] | |
| }, | |
| "execution_count": 158, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "r7.take(1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Each `client_id` in **r7** now has elements stored in a dictionary:\n", | |
| "\n", | |
| "- row['client_id'] -> client_id\n", | |
| "- row['profile_creation_date'] -> profile_creation_date\n", | |
| "- row['date'] -> list of date (subsession_start_date)\n", | |
| "- row['e10s_cohort_max'] -> list of e10s_cohort_max\n", | |
| "- row['e10s_cohort_min'] -> list of e10s_cohort_min\n", | |
| "- row['e10s_enabled_max'] -> list of e10s_enabled_max\n", | |
| "- row['e10s_enabled_min'] -> list of e10s_enabled_min\n", | |
| "\n", | |
| "**Note**: the 5 lists in this dictionary are *ordered* chronologically since the first subsession_start_date\n", | |
| "\n", | |
| "A `client_id` is in 'ignore' cohort iff\n", | |
| "\n", | |
| "- there was a switch in cohorts throughout the experiment\n", | |
| "\n", | |
| "A `client_id` is in 'control' cohort iff\n", | |
| "\n", | |
| "- there was no switch in cohorts\n", | |
| "- e10s_cohort == 'control' for all entries\n", | |
| "- e10s_enabled stays False (seems to follow suit)\n", | |
| "\n", | |
| "A `client_id` is in 'variation' cohort iff\n", | |
| "\n", | |
| "- there was no switch in cohorts\n", | |
| "- e10s_enabled == True within 3 days of profile_creation_date\n", | |
| "- e10s_enabled stays True for the rest of the list (since ordered by date)\n", | |
| "- e10s_cohort stays 'test' for the rest of the list (seems to follow suit)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 177, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "\"\"\"\n", | |
| "Determine whether a client_id has switched cohorts\n", | |
| "We want to keep client_ids where, throughout the experiment, the e10s_cohort has remained the same\n", | |
| "This is equivalent to saying that the set of e10s_cohort_max/min contains only one value\n", | |
| "\"\"\"\n", | |
| "def client_one_cohort(client):\n", | |
| " if len(set(client['e10s_cohort_max']+client['e10s_cohort_min']))<=1:\n", | |
| " return True\n", | |
| " else:\n", | |
| " return False\n", | |
| "\n", | |
| "\"\"\"\n", | |
| "Determine whether a client_id is 'variation' or 'control'\n", | |
| "If client_id passes the client_one_cohort() test, then all values in the lists e10s_cohort_max/min are the same\n", | |
| " We therefore only need to check the first to determine the e10s_cohort\n", | |
| "Also, we need that e10s_enabled_max/min has not varied throughout the experiment\n", | |
| " We therefore check that the set of e10s_enabled_max/min contains only one value\n", | |
| " For 'control', we also need to check that e10s_enabled is False, so we check e10s_enabled_max\n", | |
| " For 'variation', we also need to check that e10s_enabled is True, so we check e10s_enabled_min\n", | |
| "To determine whether the profile in 'test' switched e10s_enabled to True within 3 days of profile creation,\n", | |
| "we check that the last e10s_enabled is True, and then get the date of the last switch in e10s_enabled\n", | |
| " If this switch was within 3 days of profile creation, then the client_id is in 'variation'\n", | |
| "\"\"\"\n", | |
| "# np.where(np.array(client['e10s_enabled_min'][:-1]) != np.array(client['e10s_enabled_min'][1:]))[0][-1] is the last index of the change in e10s_enabled\n", | |
| "def variation_or_control(client):\n", | |
| " if client_one_cohort(client):\n", | |
| " if client['e10s_cohort_max'][0] == 'control' and client['e10s_enabled_max'][0] == False and len(set(client['e10s_enabled_max']+client['e10s_enabled_min'])) <= 1:\n", | |
| " return \"control\"\n", | |
| " elif client['e10s_cohort_max'][0] == 'test' and client['e10s_enabled_min'][0] == True and len(set(client['e10s_enabled_max'] +client['e10s_enabled_min'])) <= 1:\n", | |
| " return \"variation\"\n", | |
| " elif client['e10s_cohort_max'][-1] == 'test' and client['e10s_enabled_min'][-1] == True and ((datetime.datetime.strptime(client['date'][np.where(np.array(client['e10s_enabled_min'][:-1]) != np.array(client['e10s_enabled_min'][1:]))[0][-1]], \"%Y-%m-%d\") - datetime.datetime(1970, 1, 1)).days - client['profile_creation_date']) <= 3:\n", | |
| " return\"variation\"\n", | |
| " else:\n", | |
| " return \"changed_e10s_enabled\"\n", | |
| " else:\n", | |
| " return \"changed_e10s_cohort\"" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Get some counts" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "8643851" | |
| ] | |
| }, | |
| "execution_count": 25, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Number of unique client_ids at this point\n", | |
| "r7.count() #8,643,851" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 178, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "r8 = r7.map(variation_or_control)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 332, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5673668" | |
| ] | |
| }, | |
| "execution_count": 332, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Number of client_ids in 'control'\n", | |
| "r8.filter(lambda x: x=='control').count() #5,673,668" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 333, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "420410" | |
| ] | |
| }, | |
| "execution_count": 333, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Number of client_ids in 'variation'\n", | |
| "r8.filter(lambda x: x=='variation').count() #420,410" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 334, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "214278" | |
| ] | |
| }, | |
| "execution_count": 334, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Number of client_ids that changed e10s_enabled outside 3 days limit\n", | |
| "r8.filter(lambda x: x=='changed_e10s_enabled').count() #214,278" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 335, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "2335495" | |
| ] | |
| }, | |
| "execution_count": 335, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Number of client_ids that changed e10s_cohort\n", | |
| "r8.filter(lambda x: x=='changed_e10s_cohort').count() #2,335,495" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Get the counts per day" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "1) Get the group/cohort\n", | |
| "\n", | |
| "2) If it's 'control' or 'variation':\n", | |
| " - (cohort, profile_creation_date, date) -> 1/0 (if user was active or not that **week**)\n", | |
| "3) If not:\n", | |
| " - ignore" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 336, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def generate_counts(client):\n", | |
| " cohort = variation_or_control(client)\n", | |
| " if cohort in [\"control\", \"variation\"]:\n", | |
| " # profile_creation_date in YYYY-MM-DD format\n", | |
| " pcd = DATE_1970_01_01 + datetime.timedelta(days=client['profile_creation_date'])\n", | |
| " \n", | |
| " week = range(11)\n", | |
| " active = [0]*10\n", | |
| " \n", | |
| " for d in client['date']:\n", | |
| " # indicate that profile was created (week0 counts profile creations)\n", | |
| " active[0] = 1\n", | |
| " # find week that contains the date\n", | |
| " w = 1\n", | |
| " while d > (pcd+datetime.timedelta(days=w*7-1)).strftime(\"%Y-%m-%d\"):\n", | |
| " w+=1\n", | |
| " # if the entry is 0, change it to 1\n", | |
| " if active[w] == 0:\n", | |
| " active[w] = 1\n", | |
| " \n", | |
| " for w, a in zip(week, active):\n", | |
| " yield (pcd.strftime(\"%Y-%m-%d\"), cohort, w), a\n", | |
| " else:\n", | |
| " pass" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 337, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "r9 = r7.flatMap(generate_counts).reduceByKey(operator.add)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 338, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "res = r9.collect()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 324, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "300" | |
| ] | |
| }, | |
| "execution_count": 324, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "\"\"\"\n", | |
| "We should have 2*10*15 = 300 entries\n", | |
| " 2 cohorts\n", | |
| " 9 weeks of experiment\n", | |
| " 15 days for profile creation\n", | |
| "\"\"\"\n", | |
| "len(res)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 339, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[(('2016-08-22', 'variation', 0), 41155),\n", | |
| " (('2016-08-21', 'control', 2), 29898),\n", | |
| " (('2016-08-22', 'control', 4), 20613),\n", | |
| " (('2016-08-28', 'variation', 0), 25539),\n", | |
| " (('2016-08-27', 'variation', 8), 4446),\n", | |
| " (('2016-08-24', 'variation', 0), 43549),\n", | |
| " (('2016-08-15', 'variation', 7), 5939),\n", | |
| " (('2016-08-17', 'variation', 0), 16006),\n", | |
| " (('2016-08-23', 'variation', 1), 29198),\n", | |
| " (('2016-08-14', 'control', 6), 7754),\n", | |
| " (('2016-08-26', 'variation', 1), 27451),\n", | |
| " (('2016-08-19', 'control', 8), 8526),\n", | |
| " (('2016-08-20', 'variation', 2), 10226),\n", | |
| " (('2016-08-25', 'variation', 1), 30276),\n", | |
| " (('2016-08-25', 'control', 0), 436623),\n", | |
| " (('2016-08-16', 'variation', 1), 3946),\n", | |
| " (('2016-08-27', 'control', 5), 6689),\n", | |
| " (('2016-08-20', 'control', 1), 301287),\n", | |
| " (('2016-08-18', 'variation', 6), 8422),\n", | |
| " (('2016-08-22', 'variation', 8), 14611),\n", | |
| " (('2016-08-17', 'control', 4), 21677),\n", | |
| " (('2016-08-27', 'variation', 0), 27666),\n", | |
| " (('2016-08-21', 'variation', 3), 8747),\n", | |
| " (('2016-08-24', 'variation', 8), 12483),\n", | |
| " (('2016-08-28', 'control', 4), 7943),\n", | |
| " (('2016-08-23', 'variation', 9), 0),\n", | |
| " (('2016-08-26', 'control', 6), 9365),\n", | |
| " (('2016-08-18', 'control', 7), 11118),\n", | |
| " (('2016-08-26', 'variation', 9), 0),\n", | |
| " (('2016-08-19', 'control', 0), 395311),\n", | |
| " (('2016-08-25', 'control', 8), 5577),\n", | |
| " (('2016-08-15', 'control', 1), 374949),\n", | |
| " (('2016-08-19', 'variation', 6), 12536),\n", | |
| " (('2016-08-23', 'control', 0), 438756),\n", | |
| " (('2016-08-24', 'variation', 7), 15858),\n", | |
| " (('2016-08-14', 'variation', 7), 3376),\n", | |
| " (('2016-08-24', 'control', 7), 12788),\n", | |
| " (('2016-08-20', 'control', 8), 5262),\n", | |
| " (('2016-08-16', 'control', 4), 21267),\n", | |
| " (('2016-08-27', 'variation', 9), 0),\n", | |
| " (('2016-08-27', 'control', 2), 17976),\n", | |
| " (('2016-08-15', 'variation', 6), 5224),\n", | |
| " (('2016-08-22', 'variation', 3), 15433),\n", | |
| " (('2016-08-21', 'control', 3), 14618),\n", | |
| " (('2016-08-15', 'control', 9), 7771),\n", | |
| " (('2016-08-28', 'variation', 7), 8175),\n", | |
| " (('2016-08-22', 'control', 3), 24681),\n", | |
| " (('2016-08-23', 'control', 8), 8784),\n", | |
| " (('2016-08-26', 'variation', 0), 38394),\n", | |
| " (('2016-08-17', 'variation', 3), 4249),\n", | |
| " (('2016-08-25', 'control', 1), 422695),\n", | |
| " (('2016-08-23', 'variation', 2), 16666),\n", | |
| " (('2016-08-14', 'control', 7), 6018),\n", | |
| " (('2016-08-18', 'control', 0), 424235),\n", | |
| " (('2016-08-18', 'variation', 7), 8944),\n", | |
| " (('2016-08-20', 'control', 0), 307812),\n", | |
| " (('2016-08-20', 'variation', 1), 18886),\n", | |
| " (('2016-08-25', 'variation', 6), 15865),\n", | |
| " (('2016-08-27', 'variation', 1), 20974),\n", | |
| " (('2016-08-16', 'variation', 2), 3411),\n", | |
| " (('2016-08-17', 'control', 3), 30914),\n", | |
| " (('2016-08-21', 'variation', 0), 23270),\n", | |
| " (('2016-08-26', 'variation', 8), 8443),\n", | |
| " (('2016-08-28', 'control', 7), 4340),\n", | |
| " (('2016-08-25', 'control', 9), 0),\n", | |
| " (('2016-08-26', 'control', 5), 11445),\n", | |
| " (('2016-08-18', 'control', 8), 9595),\n", | |
| " (('2016-08-15', 'control', 0), 389692),\n", | |
| " (('2016-08-20', 'variation', 9), 3837),\n", | |
| " (('2016-08-19', 'control', 3), 23552),\n", | |
| " (('2016-08-24', 'variation', 6), 16507),\n", | |
| " (('2016-08-21', 'control', 4), 11034),\n", | |
| " (('2016-08-19', 'variation', 5), 12081),\n", | |
| " (('2016-08-23', 'control', 1), 424462),\n", | |
| " (('2016-08-21', 'variation', 8), 7334),\n", | |
| " (('2016-08-14', 'variation', 4), 2290),\n", | |
| " (('2016-08-24', 'control', 6), 15173),\n", | |
| " (('2016-08-14', 'control', 8), 5085),\n", | |
| " (('2016-08-27', 'control', 3), 12014),\n", | |
| " (('2016-08-16', 'control', 5), 18464),\n", | |
| " (('2016-08-22', 'control', 2), 47248),\n", | |
| " (('2016-08-28', 'variation', 6), 8653),\n", | |
| " (('2016-08-15', 'control', 8), 9077),\n", | |
| " (('2016-08-15', 'variation', 5), 4558),\n", | |
| " (('2016-08-22', 'variation', 2), 16378),\n", | |
| " (('2016-08-23', 'control', 9), 0),\n", | |
| " (('2016-08-18', 'control', 1), 407629),\n", | |
| " (('2016-08-26', 'variation', 3), 14560),\n", | |
| " (('2016-08-17', 'variation', 2), 3509),\n", | |
| " (('2016-08-24', 'control', 9), 0),\n", | |
| " (('2016-08-25', 'variation', 7), 15014),\n", | |
| " (('2016-08-25', 'control', 2), 34793),\n", | |
| " (('2016-08-23', 'variation', 3), 16000),\n", | |
| " (('2016-08-14', 'control', 0), 269373),\n", | |
| " (('2016-08-18', 'variation', 4), 7471),\n", | |
| " (('2016-08-20', 'control', 3), 16185),\n", | |
| " (('2016-08-20', 'variation', 0), 25228),\n", | |
| " (('2016-08-17', 'control', 2), 53099),\n", | |
| " (('2016-08-27', 'variation', 2), 11497),\n", | |
| " (('2016-08-16', 'variation', 3), 4122),\n", | |
| " (('2016-08-23', 'control', 6), 13053),\n", | |
| " (('2016-08-21', 'variation', 1), 17882),\n", | |
| " (('2016-08-18', 'control', 9), 5422),\n", | |
| " (('2016-08-28', 'control', 6), 4695),\n", | |
| " (('2016-08-24', 'control', 1), 421743),\n", | |
| " (('2016-08-26', 'control', 4), 14748),\n", | |
| " (('2016-08-21', 'control', 5), 8566),\n", | |
| " (('2016-08-22', 'control', 9), 0),\n", | |
| " (('2016-08-15', 'control', 3), 32137),\n", | |
| " (('2016-08-19', 'control', 2), 45753),\n", | |
| " (('2016-08-20', 'variation', 8), 7951),\n", | |
| " (('2016-08-24', 'variation', 5), 16410),\n", | |
| " (('2016-08-19', 'variation', 4), 12161),\n", | |
| " (('2016-08-14', 'control', 9), 4707),\n", | |
| " (('2016-08-21', 'variation', 9), 0),\n", | |
| " (('2016-08-14', 'variation', 5), 2501),\n", | |
| " (('2016-08-23', 'variation', 4), 15542),\n", | |
| " (('2016-08-27', 'control', 0), 298538),\n", | |
| " (('2016-08-16', 'control', 2), 55126),\n", | |
| " (('2016-08-28', 'variation', 5), 9252),\n", | |
| " (('2016-08-22', 'control', 1), 419578),\n", | |
| " (('2016-08-16', 'variation', 4), 4932),\n", | |
| " (('2016-08-15', 'variation', 4), 4112),\n", | |
| " (('2016-08-22', 'variation', 5), 15654),\n", | |
| " (('2016-08-24', 'control', 8), 8659),\n", | |
| " (('2016-08-21', 'variation', 6), 8185),\n", | |
| " (('2016-08-14', 'control', 1), 260949),\n", | |
| " (('2016-08-18', 'control', 2), 49677),\n", | |
| " (('2016-08-26', 'variation', 2), 15458),\n", | |
| " (('2016-08-17', 'variation', 5), 5325),\n", | |
| " (('2016-08-25', 'variation', 4), 15730),\n", | |
| " (('2016-08-25', 'control', 3), 22060),\n", | |
| " (('2016-08-26', 'control', 3), 19069),\n", | |
| " (('2016-08-27', 'control', 8), 1350),\n", | |
| " (('2016-08-18', 'variation', 5), 7703),\n", | |
| " (('2016-08-20', 'control', 2), 33312),\n", | |
| " (('2016-08-20', 'variation', 7), 8526),\n", | |
| " (('2016-08-23', 'control', 7), 11075),\n", | |
| " (('2016-08-17', 'control', 1), 417002),\n", | |
| " (('2016-08-27', 'variation', 3), 10212),\n", | |
| " (('2016-08-24', 'control', 0), 445407),\n", | |
| " (('2016-08-28', 'control', 1), 275460),\n", | |
| " (('2016-08-21', 'control', 6), 6480),\n", | |
| " (('2016-08-22', 'control', 8), 9592),\n", | |
| " (('2016-08-15', 'control', 2), 50845),\n", | |
| " (('2016-08-19', 'control', 5), 15306),\n", | |
| " (('2016-08-17', 'control', 9), 7238),\n", | |
| " (('2016-08-24', 'variation', 4), 15764),\n", | |
| " (('2016-08-15', 'variation', 3), 3290),\n", | |
| " (('2016-08-19', 'variation', 3), 12234),\n", | |
| " (('2016-08-23', 'variation', 5), 16171),\n", | |
| " (('2016-08-26', 'variation', 5), 14524),\n", | |
| " (('2016-08-16', 'control', 3), 33160),\n", | |
| " (('2016-08-28', 'control', 9), 0),\n", | |
| " (('2016-08-14', 'variation', 2), 1790),\n", | |
| " (('2016-08-16', 'variation', 5), 5380),\n", | |
| " (('2016-08-27', 'control', 1), 293382),\n", | |
| " (('2016-08-18', 'variation', 2), 6816),\n", | |
| " (('2016-08-20', 'control', 5), 9908),\n", | |
| " (('2016-08-22', 'variation', 4), 15151),\n", | |
| " (('2016-08-28', 'variation', 4), 9284),\n", | |
| " (('2016-08-22', 'control', 0), 434761),\n", | |
| " (('2016-08-21', 'variation', 7), 7796),\n", | |
| " (('2016-08-17', 'variation', 4), 4817),\n", | |
| " (('2016-08-26', 'control', 2), 27355),\n", | |
| " (('2016-08-14', 'control', 2), 35682),\n", | |
| " (('2016-08-18', 'control', 3), 27252),\n", | |
| " (('2016-08-20', 'variation', 6), 8853),\n", | |
| " (('2016-08-25', 'variation', 5), 15978),\n", | |
| " (('2016-08-25', 'control', 4), 17592),\n", | |
| " (('2016-08-27', 'control', 9), 0),\n", | |
| " (('2016-08-15', 'control', 5), 16465),\n", | |
| " (('2016-08-23', 'control', 4), 20910),\n", | |
| " (('2016-08-17', 'control', 0), 435088),\n", | |
| " (('2016-08-27', 'variation', 4), 10116),\n", | |
| " (('2016-08-28', 'control', 0), 279793),\n", | |
| " (('2016-08-24', 'control', 3), 25293),\n", | |
| " (('2016-08-16', 'control', 8), 10247),\n", | |
| " (('2016-08-19', 'control', 4), 18709),\n", | |
| " (('2016-08-15', 'variation', 2), 2735),\n", | |
| " (('2016-08-21', 'control', 7), 5345),\n", | |
| " (('2016-08-22', 'control', 7), 11047),\n", | |
| " (('2016-08-28', 'variation', 3), 9626),\n", | |
| " (('2016-08-19', 'variation', 2), 13003),\n", | |
| " (('2016-08-17', 'control', 8), 10716),\n", | |
| " (('2016-08-24', 'variation', 3), 16002),\n", | |
| " (('2016-08-26', 'variation', 4), 14174),\n", | |
| " (('2016-08-14', 'variation', 3), 1939),\n", | |
| " (('2016-08-28', 'control', 8), 0),\n", | |
| " (('2016-08-26', 'control', 9), 0),\n", | |
| " (('2016-08-23', 'variation', 6), 16409),\n", | |
| " (('2016-08-20', 'control', 4), 12376),\n", | |
| " (('2016-08-18', 'variation', 3), 7081),\n", | |
| " (('2016-08-16', 'control', 0), 428364),\n", | |
| " (('2016-08-25', 'variation', 2), 17136),\n", | |
| " (('2016-08-16', 'variation', 6), 6164),\n", | |
| " (('2016-08-27', 'control', 6), 5388),\n", | |
| " (('2016-08-22', 'variation', 7), 15460),\n", | |
| " (('2016-08-17', 'control', 7), 12328),\n", | |
| " (('2016-08-21', 'variation', 4), 8119),\n", | |
| " (('2016-08-17', 'variation', 7), 6770),\n", | |
| " (('2016-08-25', 'control', 5), 13664),\n", | |
| " (('2016-08-26', 'control', 1), 393355),\n", | |
| " (('2016-08-14', 'control', 3), 21276),\n", | |
| " (('2016-08-18', 'control', 4), 20381),\n", | |
| " (('2016-08-15', 'control', 4), 19276),\n", | |
| " (('2016-08-20', 'variation', 5), 8721),\n", | |
| " (('2016-08-27', 'variation', 5), 10172),\n", | |
| " (('2016-08-15', 'variation', 9), 6134),\n", | |
| " (('2016-08-23', 'control', 5), 16433),\n", | |
| " (('2016-08-14', 'variation', 8), 3372),\n", | |
| " (('2016-08-28', 'control', 3), 10786),\n", | |
| " (('2016-08-24', 'control', 2), 40670),\n", | |
| " (('2016-08-18', 'variation', 8), 8750),\n", | |
| " (('2016-08-16', 'control', 9), 8008),\n", | |
| " (('2016-08-28', 'variation', 2), 10854),\n", | |
| " (('2016-08-22', 'control', 6), 13139),\n", | |
| " (('2016-08-19', 'control', 7), 9883),\n", | |
| " (('2016-08-24', 'variation', 2), 16575),\n", | |
| " (('2016-08-15', 'variation', 1), 3377),\n", | |
| " (('2016-08-21', 'control', 0), 285779),\n", | |
| " (('2016-08-19', 'variation', 1), 22889),\n", | |
| " (('2016-08-26', 'variation', 7), 13761),\n", | |
| " (('2016-08-25', 'variation', 3), 16013),\n", | |
| " (('2016-08-14', 'variation', 0), 8656),\n", | |
| " (('2016-08-26', 'control', 8), 3577),\n", | |
| " (('2016-08-23', 'variation', 7), 16022),\n", | |
| " (('2016-08-14', 'control', 4), 12492),\n", | |
| " (('2016-08-27', 'control', 7), 4824),\n", | |
| " (('2016-08-20', 'control', 7), 6038),\n", | |
| " (('2016-08-18', 'variation', 0), 22234),\n", | |
| " (('2016-08-16', 'control', 1), 411311),\n", | |
| " (('2016-08-17', 'control', 6), 14779),\n", | |
| " (('2016-08-16', 'variation', 7), 7001),\n", | |
| " (('2016-08-22', 'variation', 6), 15991),\n", | |
| " (('2016-08-21', 'control', 8), 4898),\n", | |
| " (('2016-08-19', 'variation', 9), 7303),\n", | |
| " (('2016-08-21', 'variation', 5), 8158),\n", | |
| " (('2016-08-18', 'control', 5), 16859),\n", | |
| " (('2016-08-17', 'variation', 6), 6181),\n", | |
| " (('2016-08-25', 'control', 6), 11224),\n", | |
| " (('2016-08-26', 'control', 0), 404136),\n", | |
| " (('2016-08-28', 'variation', 9), 0),\n", | |
| " (('2016-08-15', 'control', 7), 10903),\n", | |
| " (('2016-08-20', 'variation', 4), 8877),\n", | |
| " (('2016-08-27', 'variation', 6), 9674),\n", | |
| " (('2016-08-15', 'variation', 8), 6211),\n", | |
| " (('2016-08-23', 'control', 2), 43786),\n", | |
| " (('2016-08-17', 'variation', 9), 5752),\n", | |
| " (('2016-08-25', 'variation', 8), 11089),\n", | |
| " (('2016-08-14', 'variation', 9), 3327),\n", | |
| " (('2016-08-28', 'control', 2), 16178),\n", | |
| " (('2016-08-24', 'control', 5), 18306),\n", | |
| " (('2016-08-18', 'variation', 9), 6532),\n", | |
| " (('2016-08-16', 'control', 6), 14707),\n", | |
| " (('2016-08-21', 'control', 1), 280516),\n", | |
| " (('2016-08-28', 'variation', 1), 19914),\n", | |
| " (('2016-08-22', 'control', 5), 16832),\n", | |
| " (('2016-08-19', 'control', 6), 11877),\n", | |
| " (('2016-08-16', 'variation', 8), 7131),\n", | |
| " (('2016-08-24', 'variation', 1), 29457),\n", | |
| " (('2016-08-15', 'variation', 0), 14357),\n", | |
| " (('2016-08-22', 'variation', 1), 27730),\n", | |
| " (('2016-08-19', 'variation', 0), 33669),\n", | |
| " (('2016-08-14', 'control', 5), 10013),\n", | |
| " (('2016-08-26', 'variation', 6), 14251),\n", | |
| " (('2016-08-17', 'variation', 1), 4135),\n", | |
| " (('2016-08-25', 'variation', 0), 42308),\n", | |
| " (('2016-08-14', 'variation', 1), 2836),\n", | |
| " (('2016-08-23', 'variation', 0), 42177),\n", | |
| " (('2016-08-27', 'control', 4), 8824),\n", | |
| " (('2016-08-20', 'control', 6), 7048),\n", | |
| " (('2016-08-18', 'variation', 1), 10439),\n", | |
| " (('2016-08-20', 'variation', 3), 9281),\n", | |
| " (('2016-08-19', 'control', 9), 3600),\n", | |
| " (('2016-08-21', 'control', 9), 0),\n", | |
| " (('2016-08-17', 'control', 5), 18733),\n", | |
| " (('2016-08-16', 'variation', 0), 16202),\n", | |
| " (('2016-08-24', 'variation', 9), 0),\n", | |
| " (('2016-08-22', 'variation', 9), 0),\n", | |
| " (('2016-08-19', 'variation', 8), 11857),\n", | |
| " (('2016-08-21', 'variation', 2), 9571),\n", | |
| " (('2016-08-18', 'control', 6), 13158),\n", | |
| " (('2016-08-28', 'control', 5), 5724),\n", | |
| " (('2016-08-25', 'control', 7), 9709),\n", | |
| " (('2016-08-26', 'control', 7), 8353),\n", | |
| " (('2016-08-23', 'variation', 8), 14030),\n", | |
| " (('2016-08-15', 'control', 6), 13417),\n", | |
| " (('2016-08-28', 'variation', 8), 0),\n", | |
| " (('2016-08-19', 'control', 1), 382059),\n", | |
| " (('2016-08-23', 'control', 3), 25100),\n", | |
| " (('2016-08-27', 'variation', 7), 9221),\n", | |
| " (('2016-08-17', 'variation', 8), 6925),\n", | |
| " (('2016-08-19', 'variation', 7), 12356),\n", | |
| " (('2016-08-24', 'control', 4), 22173),\n", | |
| " (('2016-08-16', 'control', 7), 12203),\n", | |
| " (('2016-08-25', 'variation', 9), 0),\n", | |
| " (('2016-08-14', 'variation', 6), 2844),\n", | |
| " (('2016-08-16', 'variation', 9), 6460),\n", | |
| " (('2016-08-20', 'control', 9), 1520)]" | |
| ] | |
| }, | |
| "execution_count": 339, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "res" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Transform into CSV format" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 340, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "l_res = [[elem for elem in row[0]]+[row[1]] for row in res]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 341, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 342, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "df = pd.DataFrame(l_res, columns=[\"Profile_Creation_Date\",\"Cohort\",\"Week\",\"Count\"])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 343, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "df = df.sort_values(['Profile_Creation_Date','Week','Cohort'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 344, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "pivoted = df.set_index(['Profile_Creation_Date','Week','Cohort']).unstack([\"Week\",\"Cohort\"])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 345, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th colspan=\"20\" halign=\"left\">Count</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Week</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">0</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">1</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">2</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">3</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">4</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">5</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">6</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">7</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">8</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">9</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Cohort</th>\n", | |
| " <th>control</th>\n", | |
| " <th>variation</th>\n", | |
| " <th>control</th>\n", | |
| " <th>variation</th>\n", | |
| " <th>control</th>\n", | |
| " <th>variation</th>\n", | |
| " <th>control</th>\n", | |
| " <th>variation</th>\n", | |
| " <th>control</th>\n", | |
| " <th>variation</th>\n", | |
| " <th>control</th>\n", | |
| " <th>variation</th>\n", | |
| " <th>control</th>\n", | |
| " <th>variation</th>\n", | |
| " <th>control</th>\n", | |
| " <th>variation</th>\n", | |
| " <th>control</th>\n", | |
| " <th>variation</th>\n", | |
| " <th>control</th>\n", | |
| " <th>variation</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Profile_Creation_Date</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>2016-08-14</th>\n", | |
| " <td>269373</td>\n", | |
| " <td>8656</td>\n", | |
| " <td>260949</td>\n", | |
| " <td>2836</td>\n", | |
| " <td>35682</td>\n", | |
| " <td>1790</td>\n", | |
| " <td>21276</td>\n", | |
| " <td>1939</td>\n", | |
| " <td>12492</td>\n", | |
| " <td>2290</td>\n", | |
| " <td>10013</td>\n", | |
| " <td>2501</td>\n", | |
| " <td>7754</td>\n", | |
| " <td>2844</td>\n", | |
| " <td>6018</td>\n", | |
| " <td>3376</td>\n", | |
| " <td>5085</td>\n", | |
| " <td>3372</td>\n", | |
| " <td>4707</td>\n", | |
| " <td>3327</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2016-08-15</th>\n", | |
| " <td>389692</td>\n", | |
| " <td>14357</td>\n", | |
| " <td>374949</td>\n", | |
| " <td>3377</td>\n", | |
| " <td>50845</td>\n", | |
| " <td>2735</td>\n", | |
| " <td>32137</td>\n", | |
| " <td>3290</td>\n", | |
| " <td>19276</td>\n", | |
| " <td>4112</td>\n", | |
| " <td>16465</td>\n", | |
| " <td>4558</td>\n", | |
| " <td>13417</td>\n", | |
| " <td>5224</td>\n", | |
| " <td>10903</td>\n", | |
| " <td>5939</td>\n", | |
| " <td>9077</td>\n", | |
| " <td>6211</td>\n", | |
| " <td>7771</td>\n", | |
| " <td>6134</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2016-08-16</th>\n", | |
| " <td>428364</td>\n", | |
| " <td>16202</td>\n", | |
| " <td>411311</td>\n", | |
| " <td>3946</td>\n", | |
| " <td>55126</td>\n", | |
| " <td>3411</td>\n", | |
| " <td>33160</td>\n", | |
| " <td>4122</td>\n", | |
| " <td>21267</td>\n", | |
| " <td>4932</td>\n", | |
| " <td>18464</td>\n", | |
| " <td>5380</td>\n", | |
| " <td>14707</td>\n", | |
| " <td>6164</td>\n", | |
| " <td>12203</td>\n", | |
| " <td>7001</td>\n", | |
| " <td>10247</td>\n", | |
| " <td>7131</td>\n", | |
| " <td>8008</td>\n", | |
| " <td>6460</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2016-08-17</th>\n", | |
| " <td>435088</td>\n", | |
| " <td>16006</td>\n", | |
| " <td>417002</td>\n", | |
| " <td>4135</td>\n", | |
| " <td>53099</td>\n", | |
| " <td>3509</td>\n", | |
| " <td>30914</td>\n", | |
| " <td>4249</td>\n", | |
| " <td>21677</td>\n", | |
| " <td>4817</td>\n", | |
| " <td>18733</td>\n", | |
| " <td>5325</td>\n", | |
| " <td>14779</td>\n", | |
| " <td>6181</td>\n", | |
| " <td>12328</td>\n", | |
| " <td>6770</td>\n", | |
| " <td>10716</td>\n", | |
| " <td>6925</td>\n", | |
| " <td>7238</td>\n", | |
| " <td>5752</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2016-08-18</th>\n", | |
| " <td>424235</td>\n", | |
| " <td>22234</td>\n", | |
| " <td>407629</td>\n", | |
| " <td>10439</td>\n", | |
| " <td>49677</td>\n", | |
| " <td>6816</td>\n", | |
| " <td>27252</td>\n", | |
| " <td>7081</td>\n", | |
| " <td>20381</td>\n", | |
| " <td>7471</td>\n", | |
| " <td>16859</td>\n", | |
| " <td>7703</td>\n", | |
| " <td>13158</td>\n", | |
| " <td>8422</td>\n", | |
| " <td>11118</td>\n", | |
| " <td>8944</td>\n", | |
| " <td>9595</td>\n", | |
| " <td>8750</td>\n", | |
| " <td>5422</td>\n", | |
| " <td>6532</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2016-08-19</th>\n", | |
| " <td>395311</td>\n", | |
| " <td>33669</td>\n", | |
| " <td>382059</td>\n", | |
| " <td>22889</td>\n", | |
| " <td>45753</td>\n", | |
| " <td>13003</td>\n", | |
| " <td>23552</td>\n", | |
| " <td>12234</td>\n", | |
| " <td>18709</td>\n", | |
| " <td>12161</td>\n", | |
| " <td>15306</td>\n", | |
| " <td>12081</td>\n", | |
| " <td>11877</td>\n", | |
| " <td>12536</td>\n", | |
| " <td>9883</td>\n", | |
| " <td>12356</td>\n", | |
| " <td>8526</td>\n", | |
| " <td>11857</td>\n", | |
| " <td>3600</td>\n", | |
| " <td>7303</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2016-08-20</th>\n", | |
| " <td>307812</td>\n", | |
| " <td>25228</td>\n", | |
| " <td>301287</td>\n", | |
| " <td>18886</td>\n", | |
| " <td>33312</td>\n", | |
| " <td>10226</td>\n", | |
| " <td>16185</td>\n", | |
| " <td>9281</td>\n", | |
| " <td>12376</td>\n", | |
| " <td>8877</td>\n", | |
| " <td>9908</td>\n", | |
| " <td>8721</td>\n", | |
| " <td>7048</td>\n", | |
| " <td>8853</td>\n", | |
| " <td>6038</td>\n", | |
| " <td>8526</td>\n", | |
| " <td>5262</td>\n", | |
| " <td>7951</td>\n", | |
| " <td>1520</td>\n", | |
| " <td>3837</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2016-08-21</th>\n", | |
| " <td>285779</td>\n", | |
| " <td>23270</td>\n", | |
| " <td>280516</td>\n", | |
| " <td>17882</td>\n", | |
| " <td>29898</td>\n", | |
| " <td>9571</td>\n", | |
| " <td>14618</td>\n", | |
| " <td>8747</td>\n", | |
| " <td>11034</td>\n", | |
| " <td>8119</td>\n", | |
| " <td>8566</td>\n", | |
| " <td>8158</td>\n", | |
| " <td>6480</td>\n", | |
| " <td>8185</td>\n", | |
| " <td>5345</td>\n", | |
| " <td>7796</td>\n", | |
| " <td>4898</td>\n", | |
| " <td>7334</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2016-08-22</th>\n", | |
| " <td>434761</td>\n", | |
| " <td>41155</td>\n", | |
| " <td>419578</td>\n", | |
| " <td>27730</td>\n", | |
| " <td>47248</td>\n", | |
| " <td>16378</td>\n", | |
| " <td>24681</td>\n", | |
| " <td>15433</td>\n", | |
| " <td>20613</td>\n", | |
| " <td>15151</td>\n", | |
| " <td>16832</td>\n", | |
| " <td>15654</td>\n", | |
| " <td>13139</td>\n", | |
| " <td>15991</td>\n", | |
| " <td>11047</td>\n", | |
| " <td>15460</td>\n", | |
| " <td>9592</td>\n", | |
| " <td>14611</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2016-08-23</th>\n", | |
| " <td>438756</td>\n", | |
| " <td>42177</td>\n", | |
| " <td>424462</td>\n", | |
| " <td>29198</td>\n", | |
| " <td>43786</td>\n", | |
| " <td>16666</td>\n", | |
| " <td>25100</td>\n", | |
| " <td>16000</td>\n", | |
| " <td>20910</td>\n", | |
| " <td>15542</td>\n", | |
| " <td>16433</td>\n", | |
| " <td>16171</td>\n", | |
| " <td>13053</td>\n", | |
| " <td>16409</td>\n", | |
| " <td>11075</td>\n", | |
| " <td>16022</td>\n", | |
| " <td>8784</td>\n", | |
| " <td>14030</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2016-08-24</th>\n", | |
| " <td>445407</td>\n", | |
| " <td>43549</td>\n", | |
| " <td>421743</td>\n", | |
| " <td>29457</td>\n", | |
| " <td>40670</td>\n", | |
| " <td>16575</td>\n", | |
| " <td>25293</td>\n", | |
| " <td>16002</td>\n", | |
| " <td>22173</td>\n", | |
| " <td>15764</td>\n", | |
| " <td>18306</td>\n", | |
| " <td>16410</td>\n", | |
| " <td>15173</td>\n", | |
| " <td>16507</td>\n", | |
| " <td>12788</td>\n", | |
| " <td>15858</td>\n", | |
| " <td>8659</td>\n", | |
| " <td>12483</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2016-08-25</th>\n", | |
| " <td>436623</td>\n", | |
| " <td>42308</td>\n", | |
| " <td>422695</td>\n", | |
| " <td>30276</td>\n", | |
| " <td>34793</td>\n", | |
| " <td>17136</td>\n", | |
| " <td>22060</td>\n", | |
| " <td>16013</td>\n", | |
| " <td>17592</td>\n", | |
| " <td>15730</td>\n", | |
| " <td>13664</td>\n", | |
| " <td>15978</td>\n", | |
| " <td>11224</td>\n", | |
| " <td>15865</td>\n", | |
| " <td>9709</td>\n", | |
| " <td>15014</td>\n", | |
| " <td>5577</td>\n", | |
| " <td>11089</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2016-08-26</th>\n", | |
| " <td>404136</td>\n", | |
| " <td>38394</td>\n", | |
| " <td>393355</td>\n", | |
| " <td>27451</td>\n", | |
| " <td>27355</td>\n", | |
| " <td>15458</td>\n", | |
| " <td>19069</td>\n", | |
| " <td>14560</td>\n", | |
| " <td>14748</td>\n", | |
| " <td>14174</td>\n", | |
| " <td>11445</td>\n", | |
| " <td>14524</td>\n", | |
| " <td>9365</td>\n", | |
| " <td>14251</td>\n", | |
| " <td>8353</td>\n", | |
| " <td>13761</td>\n", | |
| " <td>3577</td>\n", | |
| " <td>8443</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2016-08-27</th>\n", | |
| " <td>298538</td>\n", | |
| " <td>27666</td>\n", | |
| " <td>293382</td>\n", | |
| " <td>20974</td>\n", | |
| " <td>17976</td>\n", | |
| " <td>11497</td>\n", | |
| " <td>12014</td>\n", | |
| " <td>10212</td>\n", | |
| " <td>8824</td>\n", | |
| " <td>10116</td>\n", | |
| " <td>6689</td>\n", | |
| " <td>10172</td>\n", | |
| " <td>5388</td>\n", | |
| " <td>9674</td>\n", | |
| " <td>4824</td>\n", | |
| " <td>9221</td>\n", | |
| " <td>1350</td>\n", | |
| " <td>4446</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2016-08-28</th>\n", | |
| " <td>279793</td>\n", | |
| " <td>25539</td>\n", | |
| " <td>275460</td>\n", | |
| " <td>19914</td>\n", | |
| " <td>16178</td>\n", | |
| " <td>10854</td>\n", | |
| " <td>10786</td>\n", | |
| " <td>9626</td>\n", | |
| " <td>7943</td>\n", | |
| " <td>9284</td>\n", | |
| " <td>5724</td>\n", | |
| " <td>9252</td>\n", | |
| " <td>4695</td>\n", | |
| " <td>8653</td>\n", | |
| " <td>4340</td>\n", | |
| " <td>8175</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Count \\\n", | |
| "Week 0 1 2 \n", | |
| "Cohort control variation control variation control variation \n", | |
| "Profile_Creation_Date \n", | |
| "2016-08-14 269373 8656 260949 2836 35682 1790 \n", | |
| "2016-08-15 389692 14357 374949 3377 50845 2735 \n", | |
| "2016-08-16 428364 16202 411311 3946 55126 3411 \n", | |
| "2016-08-17 435088 16006 417002 4135 53099 3509 \n", | |
| "2016-08-18 424235 22234 407629 10439 49677 6816 \n", | |
| "2016-08-19 395311 33669 382059 22889 45753 13003 \n", | |
| "2016-08-20 307812 25228 301287 18886 33312 10226 \n", | |
| "2016-08-21 285779 23270 280516 17882 29898 9571 \n", | |
| "2016-08-22 434761 41155 419578 27730 47248 16378 \n", | |
| "2016-08-23 438756 42177 424462 29198 43786 16666 \n", | |
| "2016-08-24 445407 43549 421743 29457 40670 16575 \n", | |
| "2016-08-25 436623 42308 422695 30276 34793 17136 \n", | |
| "2016-08-26 404136 38394 393355 27451 27355 15458 \n", | |
| "2016-08-27 298538 27666 293382 20974 17976 11497 \n", | |
| "2016-08-28 279793 25539 275460 19914 16178 10854 \n", | |
| "\n", | |
| " \\\n", | |
| "Week 3 4 5 \n", | |
| "Cohort control variation control variation control variation \n", | |
| "Profile_Creation_Date \n", | |
| "2016-08-14 21276 1939 12492 2290 10013 2501 \n", | |
| "2016-08-15 32137 3290 19276 4112 16465 4558 \n", | |
| "2016-08-16 33160 4122 21267 4932 18464 5380 \n", | |
| "2016-08-17 30914 4249 21677 4817 18733 5325 \n", | |
| "2016-08-18 27252 7081 20381 7471 16859 7703 \n", | |
| "2016-08-19 23552 12234 18709 12161 15306 12081 \n", | |
| "2016-08-20 16185 9281 12376 8877 9908 8721 \n", | |
| "2016-08-21 14618 8747 11034 8119 8566 8158 \n", | |
| "2016-08-22 24681 15433 20613 15151 16832 15654 \n", | |
| "2016-08-23 25100 16000 20910 15542 16433 16171 \n", | |
| "2016-08-24 25293 16002 22173 15764 18306 16410 \n", | |
| "2016-08-25 22060 16013 17592 15730 13664 15978 \n", | |
| "2016-08-26 19069 14560 14748 14174 11445 14524 \n", | |
| "2016-08-27 12014 10212 8824 10116 6689 10172 \n", | |
| "2016-08-28 10786 9626 7943 9284 5724 9252 \n", | |
| "\n", | |
| " \\\n", | |
| "Week 6 7 8 \n", | |
| "Cohort control variation control variation control variation \n", | |
| "Profile_Creation_Date \n", | |
| "2016-08-14 7754 2844 6018 3376 5085 3372 \n", | |
| "2016-08-15 13417 5224 10903 5939 9077 6211 \n", | |
| "2016-08-16 14707 6164 12203 7001 10247 7131 \n", | |
| "2016-08-17 14779 6181 12328 6770 10716 6925 \n", | |
| "2016-08-18 13158 8422 11118 8944 9595 8750 \n", | |
| "2016-08-19 11877 12536 9883 12356 8526 11857 \n", | |
| "2016-08-20 7048 8853 6038 8526 5262 7951 \n", | |
| "2016-08-21 6480 8185 5345 7796 4898 7334 \n", | |
| "2016-08-22 13139 15991 11047 15460 9592 14611 \n", | |
| "2016-08-23 13053 16409 11075 16022 8784 14030 \n", | |
| "2016-08-24 15173 16507 12788 15858 8659 12483 \n", | |
| "2016-08-25 11224 15865 9709 15014 5577 11089 \n", | |
| "2016-08-26 9365 14251 8353 13761 3577 8443 \n", | |
| "2016-08-27 5388 9674 4824 9221 1350 4446 \n", | |
| "2016-08-28 4695 8653 4340 8175 0 0 \n", | |
| "\n", | |
| " \n", | |
| "Week 9 \n", | |
| "Cohort control variation \n", | |
| "Profile_Creation_Date \n", | |
| "2016-08-14 4707 3327 \n", | |
| "2016-08-15 7771 6134 \n", | |
| "2016-08-16 8008 6460 \n", | |
| "2016-08-17 7238 5752 \n", | |
| "2016-08-18 5422 6532 \n", | |
| "2016-08-19 3600 7303 \n", | |
| "2016-08-20 1520 3837 \n", | |
| "2016-08-21 0 0 \n", | |
| "2016-08-22 0 0 \n", | |
| "2016-08-23 0 0 \n", | |
| "2016-08-24 0 0 \n", | |
| "2016-08-25 0 0 \n", | |
| "2016-08-26 0 0 \n", | |
| "2016-08-27 0 0 \n", | |
| "2016-08-28 0 0 " | |
| ] | |
| }, | |
| "execution_count": 345, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pivoted" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Export table to spreadsheet if necessary" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "anaconda-cloud": {}, | |
| "kernelspec": { | |
| "display_name": "Python [conda root]", | |
| "language": "python", | |
| "name": "conda-root-py" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 2 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython2", | |
| "version": "2.7.12" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 1 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment