Skip to content

Instantly share code, notes, and snippets.

@saptarshiguha
Created April 26, 2016 06:45
Show Gist options
  • Select an option

  • Save saptarshiguha/583f3ba9f9503db1ca0587509dad8662 to your computer and use it in GitHub Desktop.

Select an option

Save saptarshiguha/583f3ba9f9503db1ca0587509dad8662 to your computer and use it in GitHub Desktop.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
sc.setJobGroup(groupId = "sguha",description = "interactive")
frame = sqlContext.read.load("s3://telemetry-parquet/longitudinal/v20160321")
first = frame.filter("normalized_channel = 'beta'").limit(5).rdd.collect()
profilePassed = {'total':sc.accumulator(0),'hasSubmissions':sc.accumulator(0),'didPass':sc.accumulator(0),'nulle10s':sc.accumulator(0)}
def smry(p):
clientid = p.client_id
submissiondates = [ x[0:10] for x in p.submission_date ]
slength = p.subsession_length or [0]*len(submissiondates)
buildid = [x.build_id[:8] for x in p.build] or ['20150101'] * len(submissiondates)
if p.subprocess_crashes_with_dump is None:
subprocess_crashes_with_dump = {}
else:
subprocess_crashes_with_dump = p.subprocess_crashes_with_dump
crashplugin = subprocess_crashes_with_dump.get("content",[0]*len(submissiondates))
crashcontent = subprocess_crashes_with_dump.get("plugin",[0]*len(submissiondates))
version = [x.version for x in p.build]
e10s = [x.e10s_enabled for x in p.settings]
## We want to collect
## during the period 20160305-20160311
## for beta, version 46.0
## profile id, e10s, total session time, total contentcrash, total plugincrash
## and profile must be on beta 46
addOnce = True
plugincrashes = {True:0, False:0}
contentcrashes = {True:0, False:0}
totaltime = {True:0, False:0}
profilePassed['total'].add(1)
if len(submissiondates)>0:
profilePassed['hasSubmissions'].add(1)
for bd, ver, pc,cc, tt,e10 in zip(buildid,version,crashplugin,crashcontent,slength,e10s):
if ver=="46.0": #bd >= "20160307" and bd<="20160315" and
if addOnce:
profilePassed['didPass'].add(1)
addOnce = False
plugincrashes[e10] += pc
contentcrashes[e10] += cc
totaltime[e10] += tt
if not addOnce and any(e10s):
yield ",".join([str(x) for x in [clientid, True, totaltime[True],contentcrashes[True],plugincrashes[True]]])
if not addOnce and not all(e10s):
yield ",".join([str(x) for x in [clientid, False, totaltime[False],contentcrashes[False],plugincrashes[False]]])
sf = frame.filter("normalized_channel = 'beta'").select("client_id","submission_date","subsession_length","subprocess_crashes_with_dump","settings","build")
sf3=sf.flatMap(smry).coalesce(100)
sf3.saveAsTextFile("s3://telemetry-test-bucket/sguhatmp/crashhours14")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment