Skip to content

Instantly share code, notes, and snippets.

@jrosell
Last active July 10, 2025 17:12
Show Gist options
  • Select an option

  • Save jrosell/bb14e902a466c66e5460cb7d9db310b2 to your computer and use it in GitHub Desktop.

Select an option

Save jrosell/bb14e902a466c66e5460cb7d9db310b2 to your computer and use it in GitHub Desktop.
Visualization of the predictor correlation matrix, significance and clustering.
load(url("https://github.com/topepo/FES/raw/refs/heads/master/Data_Sets/Ischemic_Stroke/stroke_data.RData"))
rlang::check_installed(c("tidyverse", "tidymodels", "corrplot"))
library(tidyverse)
library(tidymodels)
VC_preds <-
c("CALCVol", "CALCVolProp", "MATXVol", "MATXVolProp", "LRNCVol",
"LRNCVolProp", "MaxCALCArea", "MaxCALCAreaProp", "MaxDilationByArea",
"MaxMATXArea", "MaxMATXAreaProp", "MaxLRNCArea", "MaxLRNCAreaProp",
"MaxMaxWallThickness", "MaxRemodelingRatio", "MaxStenosisByArea",
"MaxWallArea", "WallVol", "MaxStenosisByDiameter")
risk_preds <-
c("age", "sex", "SmokingHistory", "AtrialFibrillation", "CoronaryArteryDisease",
"DiabetesHistory", "HypercholesterolemiaHistory", "HypertensionHistory")
# ------------------------------------------------------------------------------
stroke_train %>%
count(Stroke) %>%
mutate(Data = "Training") %>%
bind_rows(
stroke_test %>%
count(Stroke) %>%
mutate(Data = "Testing")
) %>%
spread(Stroke, n)
# ------------------------------------------------------------------------------
# https://bookdown.org/max/FES/numeric-one-to-one.html#numeric-one-to-one
fig_2_2_a <-
bind_rows(stroke_train, stroke_test) %>%
ggplot(aes(x = MaxLRNCArea)) +
geom_histogram(bins = 15, col = "#D53E4F", fill = "#D53E4F", alpha = .5) +
xlab("MaxLRNCArea") +
ylab("Frequency") +
ggtitle("(a)") +
theme_bw()
fig_2_2_b <-
recipe(Stroke ~ ., data = bind_rows(stroke_train, stroke_test)) %>%
step_YeoJohnson(all_predictors()) %>%
prep() %>%
juice() %>%
ggplot(aes(x = MaxLRNCArea)) +
geom_histogram(bins = 15, col = "#D53E4F", fill = "#D53E4F", alpha = .5) +
xlab("Transformed MaxLRNCArea") +
ylab("Frequency") +
ggtitle("(b)") +
theme_bw()
# ------------------------------------------------------------------------------
risk_train <-
recipe(Stroke ~ ., data = stroke_train) %>%
step_center(VC_preds) %>%
step_scale(VC_preds) %>%
step_YeoJohnson(VC_preds) %>%
prep() %>%
juice() %>%
select(-one_of(c("Stroke", "NASCET", risk_preds)))
risk_corr <- cor(risk_train)
# https://bookdown.org/max/FES/stroke-preprocessing.html#fig:stroke-corrMatrix
# corrplot(risk_corr, addgrid.col = rgb(0, 0, 0, .05), order = "hclust")
corrplot::corrplot(risk_corr, addgrid.col = rgb(0, 0, 0, .05), order = "hclust", method = 'ellipse', diag = FALSE, type = 'upper')
testRes <- corrplot::cor.mtest(risk_corr, conf.level = 0.95)
corrplot::corrplot(risk_corr, p.mat = testRes$p, sig.level = 0.10, addgrid.col = rgb(0, 0, 0, .05), order = 'hclust', method = 'ellipse', diag = FALSE, addrect = 10)
corrplot::corrplot(risk_corr, p.mat = testRes$p, sig.level = 0.10, addgrid.col = rgb(0, 0, 0, .05), order = "hclust", method = 'ellipse', diag = FALSE, type = 'upper')
@jrosell
Copy link
Author

jrosell commented Jul 10, 2025

correlation-plot clustering-correlation correlation-confidence

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment