Skip to content

Instantly share code, notes, and snippets.

@saptarshiguha
Created March 25, 2016 00:06
Show Gist options
  • Select an option

  • Save saptarshiguha/12a13144e14eb6c286cd to your computer and use it in GitHub Desktop.

Select an option

Save saptarshiguha/12a13144e14eb6c286cd to your computer and use it in GitHub Desktop.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
sc.setJobGroup(groupId = "sguha",description = "interactive")
frame = sqlContext.read.load("s3://telemetry-parquet/longitudinal/v20160314")
first = frame.filter("normalized_channel = 'beta'").limit(5).rdd.collect()
profilePassed = {'total':sc.accumulator(0),'hasSubmissions':sc.accumulator(0),'didPass':sc.accumulator(0),'nulle10s':sc.accumulator(0)}
def smry(p):
clientid = p.client_id
submissiondates = [ x[0:10] for x in p.submission_date ]
slength = p.subsession_length or [0]*len(submissiondates)
if p.subprocess_crashes_with_dump is None:
subprocess_crashes_with_dump = {}
else:
subprocess_crashes_with_dump = p.subprocess_crashes_with_dump
crashplugin = subprocess_crashes_with_dump.get("content",[0]*len(submissiondates))
crashcontent = subprocess_crashes_with_dump.get("plugin",[0]*len(submissiondates))
version = [x.version for x in p.build]
e10s = [x.e10s_enabled for x in p.settings]
## We want to collect
## during the period 20160305-20160311
## for beta, version 46.0
## profile id, e10s, total session time, total contentcrash, total plugincrash
## and profile must be on beta 46
addOnce = True
plugincrashes = {True:0, False:0}
contentcrashes = {True:0, False:0}
totaltime = {True:0, False:0}
profilePassed['total'].add(1)
if len(submissiondates)>0:
profilePassed['hasSubmissions'].add(1)
for sdate, ver, pc,cc, tt,e10 in zip(submissiondates,version,plugincrashes,contentcrashes,slength,e10s):
if e10s is None: profilePassed['nulle10s'].add(1)
if sdate >= "2016-03-01" and sdate<="2016-03-11" and ver=="46.0":
if addOnce:
profilePassed['didPass'].add(1)
addOnce = False
plugincrashes[e10] += pc
contentcrashes[e10] += cc
totaltime[e10] += tt
if any(e10s):
yield ",".join([str(x) for x in [clientid, True, totaltime[True],contentcrashes[True],plugincrashes[True]]])
if not all(e10s):
yield ",".join([str(x) for x in [clientid, False, totaltime[False],contentcrashes[False],plugincrashes[False]]])
sf = frame.filter("normalized_channel = 'beta'").select("client_id","submission_date","subsession_length","subprocess_crashes_with_dump","settings","build")
sf3=sf.flatMap(smry).coalesce(100)
sf3.saveAsTextFile("s3://telemetry-test-bucket/sguhatmp/crashhours11")
"Total Profiles = {}, PassedWithSubmission = {} , isOfInterest = {} empty Nulls = {}".format(profilePassed['total'].value,
profilePassed['hasSubmissions'].value,
profilePassed['didPass'].value,
profilePassed['nulle10s'].value)
sf3.take(1)
## Different versions present
from operator import add
def versionMap(p):
version = [x.version for x in p.build][0]
return (version,1)
versions=sf.map(versionMap).reduceByKey(add).collect()
# x.fx_page_load_ms
# first_paint
# subprocess_crashes_with_dump
# subsession_length
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment