Skip to content

Instantly share code, notes, and snippets.

@saptarshiguha
Created November 22, 2016 21:50
Show Gist options
  • Save saptarshiguha/365fdf7eafa64684865d4fec9b9ece6c to your computer and use it in GitHub Desktop.
Save saptarshiguha/365fdf7eafa64684865d4fec9b9ece6c to your computer and use it in GitHub Desktop.
mainpingspq = sqlContext.read.load("s3://telemetry-parquet/main_summary/v3", "parquet")
import datetime
DATE_1970_01_01 = datetime.datetime(1970, 1, 1)
START_profile = "2016-08-14"
END_profile = "2016-08-28"
DAYS_START_profile = (datetime.datetime.strptime(START_profile, "%Y-%m-%d") - DATE_1970_01_01).days #in days since Jan 1, 1970
DAYS_END_profile = (datetime.datetime.strptime(END_profile, "%Y-%m-%d") - DATE_1970_01_01).days #in days since Jan 1, 1970
START_ping = '2016-08-14'
END_ping = '2016-10-15'
START_s3 = '20160813'
END_s3 = '20161125'
u0 = mainpingspq.selectExpr("submission_date_s3",
"app_name",
"vendor",
"normalized_channel",
"profile_creation_date",
"subsession_start_date",
"e10s_cohort",
"client_id",
"e10s_enabled")
u1 = u0.filter(u0.submission_date_s3 >= START_s3)\
.filter(u0.submission_date_s3 <= END_s3)\
.filter(u0.app_name == 'Firefox')\
.filter(u0.vendor == 'Mozilla')\
.filter(u0.normalized_channel == 'release')\
.filter(u0.profile_creation_date >= DAYS_START_profile)\
.filter(u0.profile_creation_date <= DAYS_END_profile)\
.filter(u0.subsession_start_date.substr(1,10) >= START_ping)\
.filter(u0.subsession_start_date.substr(1,10) <= END_ping)\
.filter(u0.e10s_cohort.isin('test','control'))
u1.coalesce(400).write.csv("s3://mozilla-metrics/user/sguha/tmp/e10sqthis")
u1.coalesce(400).write.csv("s3://mozilla-metrics/user/sguha/tmp/e10anax")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment