Saptarshi Guha saptarshiguha

Generate Queries using REDASH

Compare average WAU counts across months of June,July, August for Firefox desktop. These queries need to be run on https://sql.telemetry.mozilla.org and then downloaded as CSV files.

WITH sample AS
  ( SELECT *
   FROM client_count

As configured in my dotfiles.

start new:

tmux

start new with session name:


	## Download Protobuf Packages (2.5) and install
	wget http://cbs.centos.org/kojifiles/packages/protobuf/2.5.0/10.el7.centos/x86_64/protobuf-2.5.0-10.el7.centos.x86_64.rpm
	wget http://cbs.centos.org/kojifiles/packages/protobuf/2.5.0/10.el7.centos/x86_64/protobuf-devel-2.5.0-10.el7.centos.x86_64.rpm
	wget http://cbs.centos.org/kojifiles/packages/protobuf/2.5.0/10.el7.centos/x86_64/protobuf-compiler-2.5.0-10.el7.centos.x86_64.rpm
	sudo yum -y install protobuf-2.5.0-10.el7.centos.x86_64.rpm protobuf-compiler-2.5.0-10.el7.centos.x86_64.rpm protobuf-devel-2.5.0-10.el7.centos.x86_64.rpm


	## Set Hadoop Config VAriables that RHIPE requires
	echo "export HADOOP_LIBS=/usr/lib/hadoop/client:/usr/lib/hadoop/lib:/usr/lib/hadoop:/usr/lib/hadoop-hdfs/:/usr/lib/hadoop-yarn/:/usr/lib/hadoop-mapreduce/:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/:/usr/share/aws/emr/emrfs/auxlib/" >> /home/hadoop/.bash_profile

	import findspark
	findspark.init()
	import pyspark
	sc = pyspark.SparkContext(appName="myAppName")
	sqlContext = pyspark.sql.SQLContext(sc)

	import pyspark.sql.functions as fun
	from pyspark.sql.window import Window
	from pyspark.sql import Row
	from operator import add

	x0 = sqlContext.sql("""
	select
	client_id,
	substr(subsession_start_date,0,10) as date,
	case when (sum(plugin_hangs) + sum(crashes_detected_plugin) + sum(crashes_detected_gmplugin )) > 0 then 1 else 0 end as cpq,
	case when sum(crashes_detected_content)>0 then 1 else 0 end as ccq
	from frame
	where channel='release' and app_name = 'Firefox' and vendor='Mozilla' and sample_id in (1,2,3,4,5,6,7,8,9,10)
	and substr(subsession_start_date,0,10) >='2016-07-25' and substr(subsession_start_date,0,10) <= '2016-08-30'
	and client_id is not null


	addonID = ["{e4a8a97b-f2ed-450b-b12d-ee082ba24781}",
	"{b9bfaf1c-a63f-47cd-8b9a-29526ced9060}",
	"{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
	"jid1-F9UJ2thwoAm5gQ@jetpack",
	"{3d7eb24f-2740-49df-8937-200b1cc08f8a}",
	"{d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d}",
	"[email protected]",
	"jid1-Xo5SuA6qc1DFpw@jetpack",
	"jid0-GXjLLfbCoAx0LcltEdFrEkQdQPI@jetpack"]

	import mozillametricstools.common.functions as cf
	from mozillametricstools.common.functions import dateRangeFromTo
	import datetime, time

	frame = sqlContext.read.load(cf.latest_longitudinal_path())
	first = frame.limit(5).rdd.collect()



	def dateDiff(d,delta=0):