Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save saptarshiguha/1a9b67615e82d0d26fdf06506cb07d00 to your computer and use it in GitHub Desktop.
Save saptarshiguha/1a9b67615e82d0d26fdf06506cb07d00 to your computer and use it in GitHub Desktop.
```{r}
## Get the JSON file of releases
library(rjson)
library(data.table)
library(infuser)
download.file('https://product-details.mozilla.org/1.0/firefox_history_major_releases.json',dest='/tmp/x.json')
releases <- local({
x <- fromJSON(file='/tmp/x.json')
data.table(version=names(x), date=as.Date(unlist(x)))
})[date>='2016-07-01',]
dau4everQ <- sql(" select
from_unixtime(unix_timestamp(submission_date_s3, 'yyyyMMdd'), 'yyyy-MM-dd') as date,
count(distinct(client_id)) as dau
from main_summary
where app_name='Firefox' and normalized_channel = 'release' and sample_id='42'
and submission_date_s3>='20160701'
group by 1 order by 1")
dau4ever <- data.table(collect(dau4everQ))
dau4ever$date <- as.Date(dau4ever$date)
collector <- list()
for(i in 1:nrow(releases)){
print(releases[i,])
q1 <- sql(as.character(infuse(
"select
from_unixtime(unix_timestamp(submission_date_s3, 'yyyyMMdd'), 'yyyy-MM-dd') as date,
count(distinct(client_id)) as np
from main_summary
where app_name = 'Firefox' and normalized_channel = 'release' and sample_id='42'
and profile_creation_date >= {{dateSinceEpoch}}
and submission_date_s3 >='{{releasedateYYMMDD}}'
group by 1 order by 1
",dateSinceEpoch=as.integer(releases[i,date]),releasedateYYMMDD=strftime(releases[i,date],"%Y%m%d")
)))
x <- data.table(collect(q1))
x$version <- releases[i,version]
x$releasedate <- as.Date(releases[i,date])
x$date <- as.Date(x$date)
x$daysSince <- as.numeric(x$date - x$releasedate)
x <- x[daysSince<365,]
x <- merge(x,dau4ever,by='date',all.x=TRUE)
x[, pNewOnDAU :=np/dau]
collector[[ length(collector)+1 ]] <- x
}
y <- rbindlist(collector)
xyplot(pNewOnDAU~ daysSince,type=c('g','l'),col='#00000030',groups=version,data=y)
z=y[, list( mean=mean(pNewOnDAU), low=mean(pNewOnDAU)-1.96*sd(pNewOnDAU)/sqrt(.N), high=mean(pNewOnDAU)+1.96*sd(pNewOnDAU)/sqrt(.N)), by=daysSince]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment