Created
April 12, 2016 03:33
-
-
Save githoov/032dce67f36032932cf2231713e81897 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# preliminaries | |
# using pinger | |
# 2014 (control): https://metanew.looker.com/sql/cb77dv2thvbrxd | |
# 2015 (treat): https://metanew.looker.com/sql/xqttksqxp3qgrn | |
# using license | |
# 2014 (control): https://metanew.looker.com/sql/fm8hgny7f9yrkh | |
# 2015 (treat): https://metanew.looker.com/sql/rq2sxqkwqmc7z2 | |
# read in 2014 and 2015 data sets | |
Y2014 <- read.csv(file = "~/Downloads/sql_runner_fm8hgny7f9yrkh_2016-04-09_05-37-37.csv", header = TRUE) | |
Y2015 <- read.csv(file = "~/Downloads/sql_runner_rq2sxqkwqmc7z2_2016-04-09_05-37-41.csv", header = TRUE) | |
df <- rbind(Y2014, Y2015) | |
# calculate average number of users pre and post for control and treat | |
pre_control <- sapply(subset(df, state == "pre" & year == 2014, select = number_of_users), mean) | |
post_control <- sapply(subset(df, state == "post" & year == 2014, select = number_of_users), mean) | |
pre_treat <- sapply(subset(df, state == "pre" & year == 2015, select = number_of_users), mean) | |
post_treat <- sapply(subset(df, state == "post" & year == 2015, select = number_of_users), mean) | |
# simple diff-in-diff | |
(post_treat - pre_treat) - (post_control - pre_control) | |
# diff-in-diff regression | |
df$treat <- as.integer(df$year == 2015) | |
df$post <- as.integer(df$state == "post") | |
summary(lm(number_of_users ~ post + treat + (post * treat), data = df)) | |
# prep average treatment effect | |
small.df <- data.frame(rbind(rbind(pre_treat , post_treat), rbind(pre_control, post_control)), rep(c(0, 1), 2), c(rep(c(1), 2), rep(c(0), 2))) | |
names(small.df) <- c("number_of_users", "post", "treat") | |
# visualize average treatment effect | |
q <- ggplot(small.df, aes(x=post, y=number_of_users, color=treat, group=treat)) + geom_line() + geom_abline(aes(intercept = df.intercept, slope = 30.390), linetype = 2, alpha = 0.2) + xlab("Pre/Post") + ylab("Number of Users") + scale_shape_discrete(name = "Treat/Control", breaks = c(0, 1), labels = c("Control", "Treat")) + scale_x_continuous(breaks = c(0,1), labels = c("Deal Close", "6 Months Out")) + ggtitle("Average Treatment Effect") | |
# visualize paths | |
ggplot(df, aes(x = post, y = number_of_users, group = client_id, colour = treat)) + geom_line(alpha = 0.2) + xlab("Pre/Post") + ylab("Number of Users") + scale_shape_discrete(name = "Treat/Control", breaks = c(0, 1), labels = c("Control", "Treat")) | |
# remove outliers | |
k <- ggplot(subset(df, number_of_users < 250), aes(x = post, y = number_of_users, group = client_id, colour = treat)) + geom_line(alpha = 0.2) + xlab("Pre/Post") + ylab("Number of Users") + scale_shape_discrete(name = "Treat/Control", breaks = c(0, 1), labels = c("Control", "Treat")) + scale_x_continuous(breaks = c(0,1), labels = c("Deal Close", "6 Months Out")) + ggtitle("6-Month User Growth Paths") | |
grid.arrange(k, q, ncol=2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment