Last active
May 31, 2023 05:30
-
-
Save nanxstats/43ef32acbff6e946637fdda24a052710 to your computer and use it in GitHub Desktop.
Sparse index tracking with a two-stage procedure using {msaenet} and {CVXR}
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load data and split training/test set ---------------------------------------- | |
library("xts") | |
index2010 <- sparseIndexTracking::INDEX_2010 | |
x_tr <- index2010$X[1:126] | |
x_te <- index2010$X[127:252] | |
r_tr <- index2010$SP500[1:126] | |
r_te <- index2010$SP500[127:252] | |
# Wrapper functions for P&L plotting ------------------------------------------- | |
df_pnl <- function(x_te, beta, y_test, title) { | |
df <- cbind( | |
cumprod(1 + x_te %*% beta), | |
cumprod(1 + y_test) | |
) | |
names(df)[1] <- title | |
df | |
} | |
plot_pnl <- function(object, title = "Cumulative P&L") { | |
ggplot2::autoplot(object, facets = NULL) + | |
ggplot2::ggtitle(title) + | |
cowplot::theme_minimal_hgrid() + | |
ggsci::scale_color_d3() + | |
ggplot2::scale_y_continuous(breaks = seq(1, 1.5, .05), limits = c(1, 1.4)) + | |
ggplot2::scale_x_date(date_breaks = "1 months", date_labels = "%b") + | |
ggplot2::theme( | |
axis.title.x = ggplot2::element_blank(), | |
legend.position = "bottom" | |
) | |
} | |
# L0 + empirical tracking error ------------------------------------------------ | |
w_ete <- sparseIndexTracking::spIndexTrack( | |
x_tr, | |
r = r_tr, lambda = 1e-7, u = 0.5, measure = "ete" | |
) | |
names(w_ete[w_ete > 1e-6]) | |
data.frame(sort(w_ete, decreasing = TRUE), fix.empty.names = FALSE) | |
df_pnl(x_te, w_ete, r_te, "PortfolioETE") |> plot_pnl() | |
# Wrapper functions for extracting {msaenet} coefficients ---------------------- | |
get_beta <- function(object) { | |
beta <- coef(object) | |
names(beta) <- if (inherits(object, "msaenet.msaenet")) { | |
rownames(object$beta) | |
} else { | |
colnames(object$model$X) | |
} | |
beta | |
} | |
get_nzv <- function(object) { | |
beta <- get_beta(object) | |
names(beta[which(beta != 0)]) | |
} | |
# Constrained OLS with coefficients sum to one using {CVXR} -------------------- | |
library("CVXR") | |
storeg <- function(x, y) { | |
x <- as.matrix(x) | |
y <- as.vector(y) | |
p <- ncol(x) | |
beta <- Variable(p) | |
obj <- sum((y - x %*% beta)^2) | |
constr <- list(beta >= 0, sum(beta) == 1) | |
prob <- Problem(Minimize(obj), constr) | |
result <- solve(prob) | |
structure( | |
list( | |
result = result, | |
beta = result$getValue(beta) | |
), | |
class = "storeg" | |
) | |
} | |
# Get asset names and coefficients in the portfolio ---------------------------- | |
get_portfolio <- function(beta, nzv) { | |
as.vector(beta) |> | |
setNames(nzv) |> | |
sort(decreasing = TRUE) |> | |
data.frame(fix.empty.names = FALSE) | |
} | |
# Fit msaenet-storeg hybrid model ---------------------------------------------- | |
fit_enet <- msaenet::msaenet( | |
x = as.matrix(x_tr), | |
y = as.vector(r_tr), | |
family = "gaussian", | |
init = "ridge", | |
tune = "cv", | |
nsteps = 10, | |
lower.limits = 0, | |
verbose = FALSE, | |
seed = 42 | |
) | |
fit_enet |> get_nzv() | |
fit_enet_sto <- storeg(x_tr[, get_nzv(fit_enet)], r_tr) | |
get_portfolio(fit_enet_sto$beta, get_nzv(fit_enet)) | |
df_pnl(x_te[, get_nzv(fit_enet)], fit_enet_sto$beta, r_te, "Portfolio.msaenet") |> | |
plot_pnl() | |
# Stability of solution -------------------------------------------------------- | |
library("doParallel") | |
registerDoParallel(detectCores()) | |
fit_rep <- foreach::foreach(seed = 1:100) %dopar% { | |
msaenet::msaenet( | |
x = as.matrix(x_tr), | |
y = as.vector(r_tr), | |
family = "gaussian", | |
init = "ridge", | |
tune = "cv", | |
nsteps = 10, | |
lower.limits = 0, | |
verbose = FALSE, | |
seed = seed | |
) | |
} | |
as.data.frame(table(unlist(sapply(fit_rep, get_nzv)))) |> | |
ggplot2::ggplot(ggplot2::aes(x = Freq, y = Var1)) + | |
ggplot2::geom_point(size = 3, color = ggsci::pal_d3()(1)) + | |
ggplot2::scale_x_continuous( | |
name = "Selection frequency out of 100 experiments", | |
limits = c(0, 100), | |
expand = c(0, 5) | |
) + | |
ggplot2::scale_y_discrete(name = NULL, expand = c(0, 0.5)) + | |
cowplot::theme_minimal_grid() | |
# Fit msasnet-storeg hybrid model ---------------------------------------------- | |
fit_snet <- suppressWarnings(msaenet::msasnet( | |
x = as.matrix(x_tr), | |
y = as.vector(r_tr), | |
family = "gaussian", | |
init = "ridge", | |
tune = "cv", | |
nsteps = 10, | |
verbose = FALSE, | |
seed = 42 | |
)) | |
fit_snet |> get_nzv() | |
fit_snet_sto <- storeg(x_tr[, get_nzv(fit_snet)], r_tr) | |
get_portfolio(fit_snet_sto$beta, get_nzv(fit_snet)) | |
df_pnl(x_te[, get_nzv(fit_snet)], fit_snet_sto$beta, r_te, "Portfolio.msasnet") |> | |
plot_pnl() | |
# Fit msamnet-storeg hybrid model ---------------------------------------------- | |
fit_mnet <- suppressWarnings(msaenet::msamnet( | |
x = as.matrix(x_tr), | |
y = as.vector(r_tr), | |
family = "gaussian", | |
init = "ridge", | |
tune = "cv", | |
nsteps = 10, | |
verbose = FALSE, | |
seed = 42 | |
)) | |
fit_mnet |> get_nzv() | |
fit_mnet_sto <- storeg(x_tr[, get_nzv(fit_mnet)], r_tr) | |
get_portfolio(fit_mnet_sto$beta, get_nzv(fit_mnet)) | |
df_pnl(x_te[, get_nzv(fit_mnet)], fit_mnet_sto$beta, r_te, "Portfolio.msamnet") |> | |
plot_pnl() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment