Last active
July 10, 2025 17:12
-
-
Save jrosell/bb14e902a466c66e5460cb7d9db310b2 to your computer and use it in GitHub Desktop.
Visualization of the predictor correlation matrix, significance and clustering.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| load(url("https://github.com/topepo/FES/raw/refs/heads/master/Data_Sets/Ischemic_Stroke/stroke_data.RData")) | |
| rlang::check_installed(c("tidyverse", "tidymodels", "corrplot")) | |
| library(tidyverse) | |
| library(tidymodels) | |
| VC_preds <- | |
| c("CALCVol", "CALCVolProp", "MATXVol", "MATXVolProp", "LRNCVol", | |
| "LRNCVolProp", "MaxCALCArea", "MaxCALCAreaProp", "MaxDilationByArea", | |
| "MaxMATXArea", "MaxMATXAreaProp", "MaxLRNCArea", "MaxLRNCAreaProp", | |
| "MaxMaxWallThickness", "MaxRemodelingRatio", "MaxStenosisByArea", | |
| "MaxWallArea", "WallVol", "MaxStenosisByDiameter") | |
| risk_preds <- | |
| c("age", "sex", "SmokingHistory", "AtrialFibrillation", "CoronaryArteryDisease", | |
| "DiabetesHistory", "HypercholesterolemiaHistory", "HypertensionHistory") | |
| # ------------------------------------------------------------------------------ | |
| stroke_train %>% | |
| count(Stroke) %>% | |
| mutate(Data = "Training") %>% | |
| bind_rows( | |
| stroke_test %>% | |
| count(Stroke) %>% | |
| mutate(Data = "Testing") | |
| ) %>% | |
| spread(Stroke, n) | |
| # ------------------------------------------------------------------------------ | |
| # https://bookdown.org/max/FES/numeric-one-to-one.html#numeric-one-to-one | |
| fig_2_2_a <- | |
| bind_rows(stroke_train, stroke_test) %>% | |
| ggplot(aes(x = MaxLRNCArea)) + | |
| geom_histogram(bins = 15, col = "#D53E4F", fill = "#D53E4F", alpha = .5) + | |
| xlab("MaxLRNCArea") + | |
| ylab("Frequency") + | |
| ggtitle("(a)") + | |
| theme_bw() | |
| fig_2_2_b <- | |
| recipe(Stroke ~ ., data = bind_rows(stroke_train, stroke_test)) %>% | |
| step_YeoJohnson(all_predictors()) %>% | |
| prep() %>% | |
| juice() %>% | |
| ggplot(aes(x = MaxLRNCArea)) + | |
| geom_histogram(bins = 15, col = "#D53E4F", fill = "#D53E4F", alpha = .5) + | |
| xlab("Transformed MaxLRNCArea") + | |
| ylab("Frequency") + | |
| ggtitle("(b)") + | |
| theme_bw() | |
| # ------------------------------------------------------------------------------ | |
| risk_train <- | |
| recipe(Stroke ~ ., data = stroke_train) %>% | |
| step_center(VC_preds) %>% | |
| step_scale(VC_preds) %>% | |
| step_YeoJohnson(VC_preds) %>% | |
| prep() %>% | |
| juice() %>% | |
| select(-one_of(c("Stroke", "NASCET", risk_preds))) | |
| risk_corr <- cor(risk_train) | |
| # https://bookdown.org/max/FES/stroke-preprocessing.html#fig:stroke-corrMatrix | |
| # corrplot(risk_corr, addgrid.col = rgb(0, 0, 0, .05), order = "hclust") | |
| corrplot::corrplot(risk_corr, addgrid.col = rgb(0, 0, 0, .05), order = "hclust", method = 'ellipse', diag = FALSE, type = 'upper') | |
| testRes <- corrplot::cor.mtest(risk_corr, conf.level = 0.95) | |
| corrplot::corrplot(risk_corr, p.mat = testRes$p, sig.level = 0.10, addgrid.col = rgb(0, 0, 0, .05), order = 'hclust', method = 'ellipse', diag = FALSE, addrect = 10) | |
| corrplot::corrplot(risk_corr, p.mat = testRes$p, sig.level = 0.10, addgrid.col = rgb(0, 0, 0, .05), order = "hclust", method = 'ellipse', diag = FALSE, type = 'upper') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.