- Compare average WAU counts across months of June,July, August for Firefox desktop. These queries need to be run on https://sql.telemetry.mozilla.org and then downloaded as CSV files.
WITH sample AS ( SELECT * FROM client_count
| ## Download Protobuf Packages (2.5) and install | |
| wget http://cbs.centos.org/kojifiles/packages/protobuf/2.5.0/10.el7.centos/x86_64/protobuf-2.5.0-10.el7.centos.x86_64.rpm | |
| wget http://cbs.centos.org/kojifiles/packages/protobuf/2.5.0/10.el7.centos/x86_64/protobuf-devel-2.5.0-10.el7.centos.x86_64.rpm | |
| wget http://cbs.centos.org/kojifiles/packages/protobuf/2.5.0/10.el7.centos/x86_64/protobuf-compiler-2.5.0-10.el7.centos.x86_64.rpm | |
| sudo yum -y install protobuf-2.5.0-10.el7.centos.x86_64.rpm protobuf-compiler-2.5.0-10.el7.centos.x86_64.rpm protobuf-devel-2.5.0-10.el7.centos.x86_64.rpm | |
| ## Set Hadoop Config VAriables that RHIPE requires | |
| echo "export HADOOP_LIBS=/usr/lib/hadoop/client:/usr/lib/hadoop/lib:/usr/lib/hadoop:/usr/lib/hadoop-hdfs/:/usr/lib/hadoop-yarn/:/usr/lib/hadoop-mapreduce/:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/:/usr/share/aws/emr/emrfs/auxlib/" >> /home/hadoop/.bash_profile |
| import findspark | |
| findspark.init() | |
| import pyspark | |
| sc = pyspark.SparkContext(appName="myAppName") | |
| sqlContext = pyspark.sql.SQLContext(sc) | |
| import pyspark.sql.functions as fun | |
| from pyspark.sql.window import Window | |
| from pyspark.sql import Row | |
| from operator import add |
WITH sample AS ( SELECT * FROM client_count
| x0 = sqlContext.sql(""" | |
| select | |
| client_id, | |
| substr(subsession_start_date,0,10) as date, | |
| case when (sum(plugin_hangs) + sum(crashes_detected_plugin) + sum(crashes_detected_gmplugin )) > 0 then 1 else 0 end as cpq, | |
| case when sum(crashes_detected_content)>0 then 1 else 0 end as ccq | |
| from frame | |
| where channel='release' and app_name = 'Firefox' and vendor='Mozilla' and sample_id in (1,2,3,4,5,6,7,8,9,10) | |
| and substr(subsession_start_date,0,10) >='2016-07-25' and substr(subsession_start_date,0,10) <= '2016-08-30' | |
| and client_id is not null |
| addonID = ["{e4a8a97b-f2ed-450b-b12d-ee082ba24781}", | |
| "{b9bfaf1c-a63f-47cd-8b9a-29526ced9060}", | |
| "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}", | |
| "jid1-F9UJ2thwoAm5gQ@jetpack", | |
| "{3d7eb24f-2740-49df-8937-200b1cc08f8a}", | |
| "{d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d}", | |
| "[email protected]", | |
| "jid1-Xo5SuA6qc1DFpw@jetpack", | |
| "jid0-GXjLLfbCoAx0LcltEdFrEkQdQPI@jetpack"] |
As configured in my dotfiles.
start new:
tmux
start new with session name:
| import mozillametricstools.common.functions as cf | |
| from mozillametricstools.common.functions import dateRangeFromTo | |
| import datetime, time | |
| frame = sqlContext.read.load(cf.latest_longitudinal_path()) | |
| first = frame.limit(5).rdd.collect() | |
| def dateDiff(d,delta=0): |