Skip to content

Instantly share code, notes, and snippets.

@githoov
Created April 12, 2016 03:33
Show Gist options
  • Save githoov/032dce67f36032932cf2231713e81897 to your computer and use it in GitHub Desktop.
Save githoov/032dce67f36032932cf2231713e81897 to your computer and use it in GitHub Desktop.
# preliminaries
# using pinger
# 2014 (control): https://metanew.looker.com/sql/cb77dv2thvbrxd
# 2015 (treat): https://metanew.looker.com/sql/xqttksqxp3qgrn
# using license
# 2014 (control): https://metanew.looker.com/sql/fm8hgny7f9yrkh
# 2015 (treat): https://metanew.looker.com/sql/rq2sxqkwqmc7z2
# read in 2014 and 2015 data sets
Y2014 <- read.csv(file = "~/Downloads/sql_runner_fm8hgny7f9yrkh_2016-04-09_05-37-37.csv", header = TRUE)
Y2015 <- read.csv(file = "~/Downloads/sql_runner_rq2sxqkwqmc7z2_2016-04-09_05-37-41.csv", header = TRUE)
df <- rbind(Y2014, Y2015)
# calculate average number of users pre and post for control and treat
pre_control <- sapply(subset(df, state == "pre" & year == 2014, select = number_of_users), mean)
post_control <- sapply(subset(df, state == "post" & year == 2014, select = number_of_users), mean)
pre_treat <- sapply(subset(df, state == "pre" & year == 2015, select = number_of_users), mean)
post_treat <- sapply(subset(df, state == "post" & year == 2015, select = number_of_users), mean)
# simple diff-in-diff
(post_treat - pre_treat) - (post_control - pre_control)
# diff-in-diff regression
df$treat <- as.integer(df$year == 2015)
df$post <- as.integer(df$state == "post")
summary(lm(number_of_users ~ post + treat + (post * treat), data = df))
# prep average treatment effect
small.df <- data.frame(rbind(rbind(pre_treat , post_treat), rbind(pre_control, post_control)), rep(c(0, 1), 2), c(rep(c(1), 2), rep(c(0), 2)))
names(small.df) <- c("number_of_users", "post", "treat")
# visualize average treatment effect
q <- ggplot(small.df, aes(x=post, y=number_of_users, color=treat, group=treat)) + geom_line() + geom_abline(aes(intercept = df.intercept, slope = 30.390), linetype = 2, alpha = 0.2) + xlab("Pre/Post") + ylab("Number of Users") + scale_shape_discrete(name = "Treat/Control", breaks = c(0, 1), labels = c("Control", "Treat")) + scale_x_continuous(breaks = c(0,1), labels = c("Deal Close", "6 Months Out")) + ggtitle("Average Treatment Effect")
# visualize paths
ggplot(df, aes(x = post, y = number_of_users, group = client_id, colour = treat)) + geom_line(alpha = 0.2) + xlab("Pre/Post") + ylab("Number of Users") + scale_shape_discrete(name = "Treat/Control", breaks = c(0, 1), labels = c("Control", "Treat"))
# remove outliers
k <- ggplot(subset(df, number_of_users < 250), aes(x = post, y = number_of_users, group = client_id, colour = treat)) + geom_line(alpha = 0.2) + xlab("Pre/Post") + ylab("Number of Users") + scale_shape_discrete(name = "Treat/Control", breaks = c(0, 1), labels = c("Control", "Treat")) + scale_x_continuous(breaks = c(0,1), labels = c("Deal Close", "6 Months Out")) + ggtitle("6-Month User Growth Paths")
grid.arrange(k, q, ncol=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment