Created
March 6, 2012 19:00
-
-
Save jebyrnes/1988217 to your computer and use it in GitHub Desktop.
Analysis code for looking at the relationship between page views and dollars raised for #SciFund
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source("./sciFundFunctions.r") | |
library(gridExtra) | |
############ | |
#Begin with a plot of both with and without the outlier | |
############ | |
viewsAll<-qplot(Pageviews, total, data=projects)+theme_bw(base_size=24) + ylab("Total Dollars Raised\n") + | |
xlab("\nPage Views") + | |
scale_x_continuous(breaks=seq(0,40000, 10000)) | |
viewsNoOut<-qplot(Pageviews, total, data=projects[-outIDX,])+theme_bw(base_size=24) + ylab("") + | |
xlab("\nPage Views")+geom_text(aes(x=2000, y=8500, label="No Outlier"), size=7) | |
grid.arrange(viewsAll, viewsNoOut, ncol=2) | |
#model the sucker! | |
viewGlm <- glm(total ~ Pageviews+0, data=projects, family=poisson(link="identity")) | |
viewGlmLog <- glm(total ~ Pageviews+0, data=projects, family=poisson(link="log")) | |
#linear or not so linear | |
AIC(viewGlm) - AIC(viewGlmLog) | |
#fit stats | |
Anova(viewGlm) | |
summary(viewGlm) | |
r2.corr(viewGlm) | |
############ | |
#ok, now rinse and repeat, but without the outlier | |
############ | |
viewGLM2 <- glm(total ~ Pageviews+0, data=projects[-outIDX,], family=poisson(link="identity")) | |
viewGLM2Log<- glm(total ~ Pageviews+0, data=projects[-outIDX,], family=poisson(link="log")) | |
#check linearity again | |
AIC(viewGLM2) -AIC(viewGLM2Log) | |
#and fit.. | |
summary(viewGLM2) | |
r2.corr(viewGLM2) | |
############ | |
#plot the fitted models | |
############ | |
grid.arrange(viewsAll+stat_smooth(method="glm", family=poisson(link="identity")), | |
viewsNoOut+stat_smooth(method="glm", family=poisson(link="identity")), ncol=2) | |
############ | |
#Split pre and post success page views | |
############ | |
viewTotalSplit<-qplot(preGoalPageviews, total, data=projects, size=postGoalPageviews)+theme_bw(base_size=24)+ | |
ylab("Total Dollars Raised") + | |
xlab("\nPage Views Before\nReaching Goal") + | |
scale_size("Views After\n Goal")+ | |
scale_x_continuous(breaks=seq(0,40000, 10000)) | |
grid.arrange(viewTotalSplit, viewTotalSplitNoOut, ncol=2) | |
viewTotalSplitNoOut<-qplot(preGoalPageviews, total, data=projects[-outIDX,], size=postGoalPageviews)+ | |
theme_bw(base_size=24)+ ylab("") + | |
xlab("\nPage Views Before\nReaching Goal")+geom_text(aes(x=2000, y=8500, label="No Outlier"), size=7) + | |
scale_size("Views After\n Goal")+ | |
scale_x_continuous(breaks=seq(0,10000, 2000)) | |
grid.arrange(viewTotalSplit, viewTotalSplitNoOut, ncol=2) | |
############ | |
#Analysis | |
############ | |
viewGLMSplit <- glm(total ~ preGoalPageviews+postGoalPageviews+0, data=projects, family=poisson(link="identity")) | |
Anova(viewGLMSplit) | |
summary(viewGLMSplit) | |
r2.corr(viewGLMSplit) | |
#witout outlier | |
viewGLMSplitNoOut <- glm(total ~ preGoalPageviews+postGoalPageviews+0, data=projects[-outIDX,], family=poisson(link="identity")) | |
anova(viewGLMSplitNoOut) | |
summary(viewGLMSplitNoOut) | |
r2.corr(viewGLMSplitNoOut) | |
#test for correlations between pre and post | |
cor(projects$preGoalPageviews, projects$postGoalPageviews) | |
cor(projects[-outIDX,]$preGoalPageviews, projects[-outIDX,]$postGoalPageviews) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment