Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save vincentarelbundock/3ff82c86191bf7dfcaa9a4e4cbb47d6e to your computer and use it in GitHub Desktop.
Save vincentarelbundock/3ff82c86191bf7dfcaa9a4e4cbb47d6e to your computer and use it in GitHub Desktop.
rowwise data.table hack
library(data.table)
library(tidyverse)
library(microbenchmark)
make_tibble <- function(...) {
treated_units <- sample(1:20000, 4000)
# year fixed effects
year <- tibble(
year = 1981:2010,
year_fe = rnorm(30, 0, 1))
unit <- tibble(
unit = 1:20000,
unit_fe = rnorm(20000, 0, 1),
treat = if_else(unit %in% treated_units, 1, 0)) %>%
# make first and last year per unit, and treat year if treated
rowwise() %>%
mutate(first_year = sample(seq(1981, 2010), 1),
# pull last year as a randomly selected date bw first and 2010
last_year = if_else(first_year < 2010, sample(seq(first_year, 2010), 1),
as.integer(2010)),
# pull treat year as randomly selected year bw first and last if treated
treat_year = if_else(treat == 1,
if_else(first_year != last_year,
sample(first_year:last_year, 1), as.integer(first_year)),
as.integer(0))) %>%
ungroup()
# make panel
crossing(unit, year) %>%
arrange(unit, year) %>%
# keep only if year between first and last year
rowwise() %>%
filter(year %>% between(first_year, last_year)) %>%
ungroup() %>%
# make error term, treat term and log age term
mutate(error = rnorm(nrow(.), 0, 1),
posttreat = if_else(treat == 1 & year >= treat_year, 1, 0),
rel_year = if_else(treat == 1, year - treat_year, as.integer(NA)),
tau = if_else(posttreat == 1, .2, 0),
firm_age = year - first_year,
log_age = log(firm_age + 1)) %>%
# make cumulative treatment effects
group_by(unit) %>%
mutate(cumtau = cumsum(tau)) %>%
ungroup()
# omitting the dummy_cols call
}
# make data
make_dt <- function() {
treated_units <- sample(1:20000, 4000)
year2 <- data.table(year = 1981:2010,
year_fe = rnorm(30, 0, 1))
lyear <- function(x) sapply(x, function(k) sample(seq(k, 2010), 1))
tyear <- function(x, y) sapply(seq_along(x), function(i) sample(seq(x[i], y[i]), 1))
unit2 <- data.table()[,
unit := 1:20000][,
unit_fe := rnorm(20000, 0, 1)][,
treat := fifelse(unit %in% treated_units, 1, 0)][,
first_year := sample(1981:2010, .N, replace = TRUE)][,
last_year := 2010]
unit2[first_year < 2010, last_year := lyear(first_year)]
unit2[, treat_year := 0]
unit2[treat == 1 & first_year != last_year, treat_year := tyear(first_year, last_year)]
out <- CJ(unit = unit2$unit, year = year2$year)
out <- out[order(unit, year)]
out <- merge(out, unit2, by = 'unit')
out <- merge(out, year2, by = 'year')
out <- out[year >= first_year & year <= last_year]
out[, error := rnorm(.N, 0, 1)][,
posttreat := fifelse(treat == 1 & year >= treat_year, 1, 0)][,
rel_year := fifelse(treat == 1, year - treat_year, as.integer(NA))][,
tau := fifelse(posttreat == 1, .2, 0)][,
firm_age := year - first_year][,
log_age := log(firm_age + 1)]
out[, cumtau := cumsum(tau), by = 'unit']
# omitting the dummy_cols call
data.table(out)
}
# set.seed(74792766)
# dt <- make_dt()
# set.seed(74792766)
# ti <- make_tibble()
microbenchmark(make_tibble(), make_dt(), times = 3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment