Skip to content

Instantly share code, notes, and snippets.

@saptarshiguha
Created December 16, 2016 20:30
Show Gist options
  • Select an option

  • Save saptarshiguha/bbf7fdf143d5ba7b9afd1155b8c99f46 to your computer and use it in GitHub Desktop.

Select an option

Save saptarshiguha/bbf7fdf143d5ba7b9afd1155b8c99f46 to your computer and use it in GitHub Desktop.
mp = sqlContext.read.load("s3://telemetry-parquet/main_summary/v3",'parquet')
mp.registerTempTable("ms")
newusers=[]
for whatdate in [('2016-04',16892,16922),('2016-05',16922,16953),('2016-06',16953,16983),('2016-07',16983,17014),('2016-08',17014,17045),('2016-09',17045,17075),('2016-10',17075,17106),('2016-11',17106,17136)]:
print(whatdate)
res2 = sqlContext.sql("""
select client_id, count(distinct( substring(subsession_start_date,1,10))) as ndayactive
from ms where app_name = 'Firefox'
and substring(subsession_start_date,1,7)=='{}'
and sample_id >= '1' and sample_id <='5' and profile_creation_date >= {} and profile_creation_date< {}
group by 1
""".format(whatdate[0],whatdate[1],whatdate[2]))
res2.registerTempTable("res2")
f = sqlContext.sql(""" select \'{}-01\' as month,ndayactive, count(distinct(client_id)) as freq from res2 group by ndayactive""".format(whatdate[0])).collect()
newusers.append(f)
properusers=[]
for whatdate in [('2016-04',16892,16922),('2016-05',16922,16953),('2016-06',16953,16983),('2016-07',16983,17014),('2016-08',17014,17045),('2016-09',17045,17075),('2016-10',17075,17106),('2016-11',17106,17136)]:
print(whatdate)
res2 = sqlContext.sql("""
select client_id, count(distinct( substring(subsession_start_date,1,10))) as ndayactive
from ms where app_name = 'Firefox'
and substring(subsession_start_date,1,7)=='{}'
and sample_id >= '1' and sample_id <='5' and profile_creation_date < {}
group by 1
""".format(whatdate[0],whatdate[1]))
res2.registerTempTable("res2")
f = sqlContext.sql(""" select \'{}-01\' as month,ndayactive, count(distinct(client_id)) as freq from res2 group by ndayactive""".format(whatdate[0])).collect()
properusers.append(f)
newusers = awsDF(cl)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment