Skip to content

Instantly share code, notes, and snippets.

@alekrutkowski
Last active June 2, 2022 13:33
Show Gist options
  • Save alekrutkowski/e26bf28f9f7a6bc638c7b3ffb27c20c5 to your computer and use it in GitHub Desktop.
Save alekrutkowski/e26bf28f9f7a6bc638c7b3ffb27c20c5 to your computer and use it in GitHub Desktop.
R functions for panel data extending packages `data.table` and `collapse`
# ## Example: ⮦ time=3 missing
# library(magrittr)
# data.table(time = c(1,2, 4,5,6,
# 1,2),
# my_x = c(11:15,
# 11,12),
# my_y = c(101,103,105,NA,109,
# 111,121),
# group = c(rep.int('a',5),
# rep.int('b',2))) %>%
# setPanel(xsection_dims='group', # xsection_dims can be a char vector of colnames
# time_dim='time') %>%
# addLags(c('my_x','my_y')) %>% # similarly addDiffs(); in both, argument n = lag or diff order, can be used
# addLag('my_x',2) %>%
# addDiff('my_y') %>%
# print()
# ## time my_x my_y group L.my_x L.my_y L2.my_x D.my_y
# ## 1: 1 11 101 a NA NA NA NA
# ## 2: 2 12 103 a 11 101 NA 2
# ## 3: 4 13 105 a NA NA 12 NA
# ## 4: 5 14 NA a 13 105 NA NA
# ## 5: 6 15 109 a 14 NA 13 NA
# ## 6: 1 11 111 b NA NA NA NA
# ## 7: 2 12 121 b 11 111 NA 10
#
# ## Non-standard evaluation versions (no quoting of names as strings needed)
# ## with dots at the end of the function names are available, e.g.:
# ## setPanel.(group,time) or setPanel.(c(groupvar1,groupvar2),timevar)
# ## addLag.(my_x) or addDiff.(my_y)
# ## No nse versions for the "plurals": addLags or addDiffs
library(data.table)
stopifnot('collapse' %in% dimnames(installed.packages())[[1]])
setPanel <- function(dt, xsection_dims, time_dim,
xsection_name='xsection_id') {
stopifnot(is.data.table(dt),
is.character(xsection_dims),
length(xsection_dims)>=1,
is.character(time_dim),
length(time_dim)==1,
all(c(xsection_dims,time_dim) %in% colnames(dt)),
is.character(xsection_name),
length(xsection_name)==1)
if (xsection_name!='xsection_id' && length(xsection_dims)==1)
warning('xsection_name="',xsection_name,'"" ignored,\n',
'used "',xsection_dims,'" instead.')
if (length(xsection_dims)>1) {
dt[, (xsection_name) := do.call(paste,
lapply(xsection_dims,
function(x) dt[[x]]))]
setattr(dt,'panel_dims',
c(xsection_id=xsection_name, time=time_dim))
} else
setattr(dt,'panel_dims',
c(xsection_id=xsection_dims, time=time_dim))
dt
}
unsetPanel <- function(dt, rm.LD=FALSE, rm.xs=FALSE, rm.t=FALSE) {
stopifnot(!is.null(attr(dt,'panel_dims')),
is.logical(rm.xs),
length(rm.xs)==1,
is.logical(rm.LD),
length(rm.LD)==1)
panel_dims <- attr(dt,'panel_dims')
if (rm.LD)
dt[, (grep('^L\\.|^D\\.|^L[0-9]{1,2}\\.|^D[0-9]{1,2}\\.',
colnames(dt), value=TRUE)) := NULL] # remove the generated lags or diffs
if (rm.xs) dt[, (panel_dims[1]) := NULL] # remove the cross-sectional id
if (rm.t) dt[, (panel_dims[2]) := NULL] # remove the time variable
setattr(dt,'panel_dims', NULL)
dt
}
addLag <- function(dt, x, n=1, fill=NA, stubs=TRUE,
pfix = ifelse(n==1,'L.',paste0('L',n,'.')), ...) {
stopifnot(is.data.table(dt),
!is.null(attr(dt,'panel_dims')),
is.character(x),
length(x)==1,
x %in% colnames(dt),
is.character(pfix),
length(pfix)==1)
panel_dims <- attr(dt,'panel_dims')
listOfArgs <- list(x=x,n=n, fill=fill, stubs=stubs) # needed to avoid interpretation of x, n, fill, stubs as dt's columns if such column names happen to be in dt
dt[, (paste0(pfix,x)) :=
collapse::L(dt[[listOfArgs$x]], n=listOfArgs$n,
g=dt[[panel_dims['xsection_id']]], t=dt[[panel_dims['time']]],
fill=listOfArgs$fill, stubs=listOfArgs$stubs, ...)]
dt
}
addDiff <- function(dt, x, n=1, fill=NA, stubs=TRUE,
pfix = ifelse(n==1,'D.',paste0('D',n,'.')), ...) {
stopifnot(is.data.table(dt),
!is.null(attr(dt,'panel_dims')),
is.character(x),
length(x)==1,
x %in% colnames(dt),
is.character(pfix),
length(pfix)==1)
panel_dims <- attr(dt,'panel_dims')
listOfArgs <- list(x=x,n=n, fill=fill, stubs=stubs) # needed to avoid interpretation of x, n, fill, stubs as dt's columns if such column names happen to be in dt
dt[, (paste0(pfix,x)) :=
dt[[listOfArgs$x]] -
collapse::L(dt[[listOfArgs$x]], n=listOfArgs$n,
g=dt[[panel_dims['xsection_id']]], t=dt[[panel_dims['time']]],
fill=listOfArgs$fill, stubs=listOfArgs$stubs, ...)]
dt
}
addLags <- function(dt, xs, ...)
Reduce(function(.dt, x, ...) addLag(.dt, x, ...),
init=dt,
x=xs)
addDiffs <- function(dt, xs, ...)
Reduce(function(.dt, x, ...) addDiff(.dt, x, ...),
init=dt,
x=xs)
modifyNames <- function(dt, f) {
cn <- colnames(dt)
setnames(dt, cn, f(cn))
dt
}
realGrowthPerc <- function(nominal_growth_perc, inflat_perc) {
n <- nominal_growth_perc/100
i <- inflat_perc/100
100*(n - i)/(1 + i) # == 100*{(1 + n)/(1 + i) - 1}
}
### Non-standard eval versions:
setPanel. <- function(dt, xsection_dims, time_dim, ...)
eval(bquote(
setPanel(
dt,
xsection_dims=.({
. <- bquote(.( as.character( substitute(xsection_dims))))
if (length(.)==1) . else .[-1]
}),
time=.(as.character(substitute(time_dim))),
...
)
))
addLag. <- function(dt, x, ...)
eval(bquote(
addLag(
dt,
.(as.character(substitute(x))),
...
)
))
addDiff. <- function(dt, x, ...)
eval(bquote(
addDiff(
dt,
.(as.character(substitute(x))),
...
)
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment