Skip to content

Instantly share code, notes, and snippets.

@saptarshiguha
Created April 26, 2016 21:24
Show Gist options
  • Select an option

  • Save saptarshiguha/05ba68a6eb9ff88f4002081833af805a to your computer and use it in GitHub Desktop.

Select an option

Save saptarshiguha/05ba68a6eb9ff88f4002081833af805a to your computer and use it in GitHub Desktop.
## as of D,
## active on day i between D-7,D-1
## active on day i between D-28,D-1
## was active in last 7 days
## was active in last 28 days
# number of active days in last 7
# number of active days in last 28
m <- function(a,b,paramDate){
b <- fromJSON(b)
dydate <- seq( from=as.Date(paramDate) - 90,to=as.Date(paramDate)-14, by=1)
dy <- b$data$days
for(i in seq_along(dydate)){
cutDate <- dydate[[i]]
d7Range <- as.character(seq(from=cutDate-7,to=cutDate-1, by=1))
d28Range <- as.character(seq(from=cutDate-28,to=cutDate-1, by=1))
wasActive <- FALSE;s <- 0
for(d in d7Range){
en <- dy[[ d ]]
rhcollect(list(f = paramDate,fd=cutDate,date=d,type='daily7'), (1-is.null(en)))
wasActive <- wasActive || !is.null(en)
s <- s+(1-is.null(en))
}
rhcollect(list(f = paramDate,fd=cutDate,type='wasActive7'),1*wasActive)
rhcollect(list(f = paramDate,fd=cutDate,type='d7dist', x = s),1)
wasActive <- FALSE;s <- 0
for(d in d28Range){
en <- dy[[ d ]]
rhcollect(list(f = paramDate,fd=cutDate,date=d,type='daily28'), (1-is.null(en)))
wasActive <- wasActive || !is.null(en)
s <- s+(1-is.null(en))
}
rhcollect(list(f = paramDate,fd=cutDate,type='wasActive28'),1*wasActive)
rhcollect(list(f = paramDate,fd=cutDate,type='d28dist', x = s),1)
}
}
whichsamples <- local({
u <- sort(rhls("/user/sguha/fhr/samples/backup/")$file)
mon <- unlist(lapply(strsplit(u,"/"),function(s) as.POSIXlt(tail(s,1))$mon))
as.character(tapply(u, mon, function(s) max(s)))
})
whichsamples <- c("/user/sguha/fhr/samples/backup/2015-01-26",tail(whichsamples,-1))
cc <- list()
for(i in seq_along(whichsamples)){
s <- whichsamples[i]
cc[[ length(cc) +1 ]] <- rhwatch(map=function(a,b) m(a,b,filename),
reduce=rhoptions()$temp$colsummer,
input=sqtxt(sprintf("%s/1pct",s)),
jobname= sprintf("%s of %s: %s", i, length(whichsamples),s),
setup=expression({
library(rjson)
suppressPackageStartupMessages(library(data.table))
})
,param=list(m=m,filename=as.Date(tail(strsplit(s,"/")[[1]],1)))
,debug='collect')
}
cc2 <- unlist(cc,rec=FALSE)
wasActive <- make.dt(Filter(function(s) s[[1]]$type %in% c("wasActive7",'wasActive28'), cc2),c("pd","asOf","type","n"))
wasActive[, ":="(pd=as.Date(pd,origin="1970-01-01"), asOf=as.Date(asOf,origin='1970-01-01'))]
wasActive <- wasActive[, {
x <- tail(.SD[ (pd - .BY$asOf) %between% c(0,170),][order(pd),],1)
x[, del:=(pd - .BY$asOf)]
x
},by=list(asOf,type)][order(asOf),]
dailyActive <- make.dt(Filter(function(s) s[[1]]$type %in% c("daily7",'daily28'), cc2),c("pd","asOf","dayBefore","type","n"))
dailyActive[, ":="(pd=as.Date(pd,origin="1970-01-01"), asOf=as.Date(asOf,origin='1970-01-01'),dayBefore = as.Date(dayBefore,origin="1970-01-01"))]
dailyActive <- dailyActive[, list(avgAdi = mean(n)),by=list(pd, asOf,type)]
dailyActive <- dailyActive[, {
x <- tail(.SD[ (pd - .BY$asOf) %between% c(0,170),][order(pd),],1)
x[, del:=(pd - .BY$asOf)]
x
},by=list(asOf,type)][order(asOf),]
period7 <- merge(dailyActive[type=="daily7", list(asOf,avgAdi7=avgAdi)], wasActive[type=="wasActive7", list(asOf, tActive7 = n)], by="asOf")
period28 <- merge(dailyActive[type=="daily28", list(asOf,avgAdi28=avgAdi)], wasActive[type=="wasActive28", list(asOf, tActive28 = n)], by="asOf")
periods <- merge(period7,period28)[, ":="(er1 = avgAdi7/tActive28, er2 = tActive7/tActive28, er3=avgAdi7/avgAdi28)]
periods[,":="(doy=as.POSIXlt(asOf)$yday, year=as.POSIXlt(asOf)$year, label=strftime(asOf,"%b\n%d"))]
period2 <- periods
period2[, ":="(er1 = er1 * mean(er2)/mean(er1), er3 = er3*mean(er2)/mean(er3))]
pdf("k.pdf",width=15)
u <- xyplot( er1+er2+er3 ~ doy|factor(year+1900)
, layout=c(1,2)
, xlim=c(1,365)
, type='b'
, cex=0.4
, strip = FALSE
, strip.left = TRUE
, data=periods
, scale=list(y=list(tick.number=10,cex=0.7,alternating=c(3,3)),x=list(tick.number=25,cex=0.6,alternating=c(3,3)))
, lwd=1.5
, panel=function(x,y,...){
panel.superpose(x,y,...,
panel.groups=function(x,y,subscripts, groups,col,col.symbol,...){
panel.grid(v=20)
panel.xyplot( x,y, col=col.symbol, ...)
})
}
, ylab=''
, xlab='time of year'
, main="Engagement Ratios\n(means of ER1 and ER3 scaled to ER2's mean)",auto.key=list(columns=3)
, xscale.components= function (lim, logsc = FALSE, at = NULL, ...)
{
ans <- xscale.components.default(lim = lim, logsc = logsc, at = at, ...)
side <- "bottom"
ans[[side]]$labels$labels <- strftime(as.Date("2014-01-01")+ans[[side]]$labels$at,"%b\n%d")
ans[[side]]$labels$check.overlap <- FALSE
ans$top <- ans$bottom; side <- "top"
ans[[side]]$labels$labels <- strftime(as.Date("2015-01-01")+ans[[side]]$labels$at,"%b\n%d")
ans[[side]]$labels$check.overlap <- FALSE
ans
}
)
print(u)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment