Created
August 3, 2020 06:56
-
-
Save vincentarelbundock/3ff82c86191bf7dfcaa9a4e4cbb47d6e to your computer and use it in GitHub Desktop.
rowwise data.table hack
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
library(tidyverse) | |
library(microbenchmark) | |
make_tibble <- function(...) { | |
treated_units <- sample(1:20000, 4000) | |
# year fixed effects | |
year <- tibble( | |
year = 1981:2010, | |
year_fe = rnorm(30, 0, 1)) | |
unit <- tibble( | |
unit = 1:20000, | |
unit_fe = rnorm(20000, 0, 1), | |
treat = if_else(unit %in% treated_units, 1, 0)) %>% | |
# make first and last year per unit, and treat year if treated | |
rowwise() %>% | |
mutate(first_year = sample(seq(1981, 2010), 1), | |
# pull last year as a randomly selected date bw first and 2010 | |
last_year = if_else(first_year < 2010, sample(seq(first_year, 2010), 1), | |
as.integer(2010)), | |
# pull treat year as randomly selected year bw first and last if treated | |
treat_year = if_else(treat == 1, | |
if_else(first_year != last_year, | |
sample(first_year:last_year, 1), as.integer(first_year)), | |
as.integer(0))) %>% | |
ungroup() | |
# make panel | |
crossing(unit, year) %>% | |
arrange(unit, year) %>% | |
# keep only if year between first and last year | |
rowwise() %>% | |
filter(year %>% between(first_year, last_year)) %>% | |
ungroup() %>% | |
# make error term, treat term and log age term | |
mutate(error = rnorm(nrow(.), 0, 1), | |
posttreat = if_else(treat == 1 & year >= treat_year, 1, 0), | |
rel_year = if_else(treat == 1, year - treat_year, as.integer(NA)), | |
tau = if_else(posttreat == 1, .2, 0), | |
firm_age = year - first_year, | |
log_age = log(firm_age + 1)) %>% | |
# make cumulative treatment effects | |
group_by(unit) %>% | |
mutate(cumtau = cumsum(tau)) %>% | |
ungroup() | |
# omitting the dummy_cols call | |
} | |
# make data | |
make_dt <- function() { | |
treated_units <- sample(1:20000, 4000) | |
year2 <- data.table(year = 1981:2010, | |
year_fe = rnorm(30, 0, 1)) | |
lyear <- function(x) sapply(x, function(k) sample(seq(k, 2010), 1)) | |
tyear <- function(x, y) sapply(seq_along(x), function(i) sample(seq(x[i], y[i]), 1)) | |
unit2 <- data.table()[, | |
unit := 1:20000][, | |
unit_fe := rnorm(20000, 0, 1)][, | |
treat := fifelse(unit %in% treated_units, 1, 0)][, | |
first_year := sample(1981:2010, .N, replace = TRUE)][, | |
last_year := 2010] | |
unit2[first_year < 2010, last_year := lyear(first_year)] | |
unit2[, treat_year := 0] | |
unit2[treat == 1 & first_year != last_year, treat_year := tyear(first_year, last_year)] | |
out <- CJ(unit = unit2$unit, year = year2$year) | |
out <- out[order(unit, year)] | |
out <- merge(out, unit2, by = 'unit') | |
out <- merge(out, year2, by = 'year') | |
out <- out[year >= first_year & year <= last_year] | |
out[, error := rnorm(.N, 0, 1)][, | |
posttreat := fifelse(treat == 1 & year >= treat_year, 1, 0)][, | |
rel_year := fifelse(treat == 1, year - treat_year, as.integer(NA))][, | |
tau := fifelse(posttreat == 1, .2, 0)][, | |
firm_age := year - first_year][, | |
log_age := log(firm_age + 1)] | |
out[, cumtau := cumsum(tau), by = 'unit'] | |
# omitting the dummy_cols call | |
data.table(out) | |
} | |
# set.seed(74792766) | |
# dt <- make_dt() | |
# set.seed(74792766) | |
# ti <- make_tibble() | |
microbenchmark(make_tibble(), make_dt(), times = 3) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment