Skip to content

Instantly share code, notes, and snippets.

@jebyrnes
Created March 6, 2012 19:00
Show Gist options
  • Save jebyrnes/1988217 to your computer and use it in GitHub Desktop.
Save jebyrnes/1988217 to your computer and use it in GitHub Desktop.
Analysis code for looking at the relationship between page views and dollars raised for #SciFund
source("./sciFundFunctions.r")
library(gridExtra)
############
#Begin with a plot of both with and without the outlier
############
viewsAll<-qplot(Pageviews, total, data=projects)+theme_bw(base_size=24) + ylab("Total Dollars Raised\n") +
xlab("\nPage Views") +
scale_x_continuous(breaks=seq(0,40000, 10000))
viewsNoOut<-qplot(Pageviews, total, data=projects[-outIDX,])+theme_bw(base_size=24) + ylab("") +
xlab("\nPage Views")+geom_text(aes(x=2000, y=8500, label="No Outlier"), size=7)
grid.arrange(viewsAll, viewsNoOut, ncol=2)
#model the sucker!
viewGlm <- glm(total ~ Pageviews+0, data=projects, family=poisson(link="identity"))
viewGlmLog <- glm(total ~ Pageviews+0, data=projects, family=poisson(link="log"))
#linear or not so linear
AIC(viewGlm) - AIC(viewGlmLog)
#fit stats
Anova(viewGlm)
summary(viewGlm)
r2.corr(viewGlm)
############
#ok, now rinse and repeat, but without the outlier
############
viewGLM2 <- glm(total ~ Pageviews+0, data=projects[-outIDX,], family=poisson(link="identity"))
viewGLM2Log<- glm(total ~ Pageviews+0, data=projects[-outIDX,], family=poisson(link="log"))
#check linearity again
AIC(viewGLM2) -AIC(viewGLM2Log)
#and fit..
summary(viewGLM2)
r2.corr(viewGLM2)
############
#plot the fitted models
############
grid.arrange(viewsAll+stat_smooth(method="glm", family=poisson(link="identity")),
viewsNoOut+stat_smooth(method="glm", family=poisson(link="identity")), ncol=2)
############
#Split pre and post success page views
############
viewTotalSplit<-qplot(preGoalPageviews, total, data=projects, size=postGoalPageviews)+theme_bw(base_size=24)+
ylab("Total Dollars Raised") +
xlab("\nPage Views Before\nReaching Goal") +
scale_size("Views After\n Goal")+
scale_x_continuous(breaks=seq(0,40000, 10000))
grid.arrange(viewTotalSplit, viewTotalSplitNoOut, ncol=2)
viewTotalSplitNoOut<-qplot(preGoalPageviews, total, data=projects[-outIDX,], size=postGoalPageviews)+
theme_bw(base_size=24)+ ylab("") +
xlab("\nPage Views Before\nReaching Goal")+geom_text(aes(x=2000, y=8500, label="No Outlier"), size=7) +
scale_size("Views After\n Goal")+
scale_x_continuous(breaks=seq(0,10000, 2000))
grid.arrange(viewTotalSplit, viewTotalSplitNoOut, ncol=2)
############
#Analysis
############
viewGLMSplit <- glm(total ~ preGoalPageviews+postGoalPageviews+0, data=projects, family=poisson(link="identity"))
Anova(viewGLMSplit)
summary(viewGLMSplit)
r2.corr(viewGLMSplit)
#witout outlier
viewGLMSplitNoOut <- glm(total ~ preGoalPageviews+postGoalPageviews+0, data=projects[-outIDX,], family=poisson(link="identity"))
anova(viewGLMSplitNoOut)
summary(viewGLMSplitNoOut)
r2.corr(viewGLMSplitNoOut)
#test for correlations between pre and post
cor(projects$preGoalPageviews, projects$postGoalPageviews)
cor(projects[-outIDX,]$preGoalPageviews, projects[-outIDX,]$postGoalPageviews)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment