Created
July 3, 2024 22:50
-
-
Save MichelNivard/92ab612c4ae0d14ab167d99622cdd146 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# t-test vs wilcox vs ordinal | |
library(tidyverse) | |
library(multidplyr) # parallelize | |
library(rms) # ordinal regression | |
sample_size = 500 # N | |
# genarate paired sets and calculate p-values with different techniques | |
# reduce the iterations number to reduce time | |
simulate = function(iterations, seed = NULL) { | |
set.seed(seed) | |
replicate(iterations, (function() { | |
#### data with skewed distribution that changes after treatment | |
data = tibble( | |
subject = paste0("S", 1:sample_size), | |
# sample ordinal values with a made-up distribution | |
before = rnorm(sample_size), | |
# | |
after = sqrt(0.7)*before + sqrt(.1)*scale(before^2,center = T) + sqrt(0.2)*rnorm(sample_size) | |
) %>% | |
# cut into ordinal: | |
mutate(after = as.numeric(cut(after,breaks=c(-5,-.5,0.25,0.75,1.5,2,2.5,5))), | |
before = as.numeric(cut(before,breaks=c(-5,-.5,0.25,0.75,1.5,2,2.5,5)))) | |
#### NULL effect | |
data_null = tibble( | |
subject = paste0("S", 1:sample_size), | |
# sample ordinal values with a uniform distribution | |
before = rnorm(sample_size), | |
# change after treatment is noisy | |
after = sqrt(0.7)*before + sqrt(.1)*scale(before^2,center = T) + sqrt(0.2)*rnorm(sample_size) | |
) %>% | |
# cunt into semetric ordinal | |
mutate(after = as.numeric(cut(after,breaks=c(-5,-2.25,-1.5,-.5,.5,1.5,2.25,5))), | |
before = as.numeric(cut(before,breaks=c(-5,-2.25,-1.5,-.5,.5,1.5,2.25,5)))) | |
data_long = pivot_longer(data, before:after, names_to = "time", values_to = "measurement") | |
data_long_null = pivot_longer(data_null, before:after, names_to = "time", values_to = "measurement") | |
tibble( | |
p_t = t.test(data$before, data$after, paired = TRUE)$p.value, | |
p_w = wilcox.test(data$before, data$after, paired = TRUE)$p.value, | |
p_o = rms::orm(measurement ~ time, data_long, x=TRUE, y=TRUE) %>% | |
rms::robcov(cluster = data_long$subject) %>% | |
anova() %>% | |
as_tibble() %>% mutate(p = as.numeric(P)) %>% pull(p) %>% pluck(1), | |
p_t_null = t.test(data_null$before, data_null$after, paired = TRUE)$p.value, | |
p_w_null = wilcox.test(data_null$before, data_null$after, paired = TRUE)$p.value, | |
p_o_null = rms::orm(measurement ~ time, data_long_null, x=TRUE, y=TRUE) %>% | |
rms::robcov(cluster = data_long_null$subject) %>% | |
anova() %>% | |
as_tibble() %>% mutate(p = as.numeric(P)) %>% pull(p) %>% pluck(1) | |
) | |
})(), simplify = FALSE) %>% bind_rows() | |
} | |
# setup for parallel run | |
my_cluster = multidplyr::new_cluster(parallel::detectCores() - 1) | |
cluster_library(my_cluster, "rms") | |
cluster_library(my_cluster, "magrittr") | |
cluster_library(my_cluster, "dplyr") | |
cluster_library(my_cluster, "tidyr") | |
cluster_library(my_cluster, "purrr") | |
# copy variables to every cluster and run in parallel | |
start_time = Sys.time() | |
cluster_copy(my_cluster, "sample_size") | |
cluster_copy(my_cluster, "simulate") | |
cluster_assign(my_cluster, iterations = round(10000 / length(my_cluster))) | |
cluster_assign_each(my_cluster, seed = 8675309 + 1:length(my_cluster)) | |
ps = cluster_call(my_cluster, simulate(iterations, seed)) | |
ps = ps %>% bind_rows() | |
Sys.time() - start_time | |
##### results ##### | |
# what percent of p-values are below 0.05 | |
ps %>% | |
summarise( | |
t_test = mean(p_t<0.05) %>% scales::label_percent(.1)(), | |
wilcox_test = mean(p_w<0.05) %>% scales::label_percent(.1)(), | |
ordinal_regression = mean(p_o<0.05) %>% scales::label_percent(.1)(), | |
) | |
# t_test wilcox_test ordinal_regression | |
# t-test vs ordinal | |
ps %>% | |
mutate(t_yes_o_no = (p_t<0.05) & (p_o>0.05)) %>% | |
mutate(t_no_o_yes = (p_t>0.05) & (p_o<0.05)) %>% | |
summarise( | |
t_yes_o_no = mean(t_yes_o_no) %>% scales::label_percent(.1)(), | |
t_no_o_yes = mean(t_no_o_yes) %>% scales::label_percent(.1)() | |
) | |
# t_yes_o_no t_no_o_yes | |
# t-test vs wilcox | |
ps %>% | |
mutate(t_yes_w_no = (p_t<0.05) & (p_w>0.05)) %>% | |
mutate(t_no_w_yes = (p_t>0.05) & (p_w<0.05)) %>% | |
summarise( | |
t_yes_w_no = mean(t_yes_w_no) %>% scales::label_percent(.1)(), | |
t_no_w_yes = mean(t_no_w_yes) %>% scales::label_percent(.1)() | |
) | |
# t_yes_w_no t_no_w_yes | |
# wilcox vs ordinal | |
ps %>% | |
mutate(w_yes_o_no = (p_w<0.05) & (p_o>0.05)) %>% | |
mutate(w_no_o_yes = (p_w>0.05) & (p_o<0.05)) %>% | |
summarise( | |
w_yes_o_no = mean(w_yes_o_no) %>% scales::label_percent(.1)(), | |
w_no_o_yes = mean(w_no_o_yes) %>% scales::label_percent(.1)() | |
) | |
# w_yes_o_no w_no_o_yes | |
# t-test vs ordinal EXTREME | |
ps %>% | |
mutate(t_yes_o_no = (p_t<0.01) & (p_o>0.05)) %>% | |
mutate(t_no_o_yes = (p_t>0.05) & (p_o<0.01)) %>% | |
summarise( | |
t_yes_o_no = mean(t_yes_o_no) %>% scales::label_percent(.1)(), | |
t_no_o_yes = mean(t_no_o_yes) %>% scales::label_percent(.1)() | |
) | |
# t_yes_o_no t_no_o_yes | |
##### results for uniform distribution with null effect | |
# NULL: what percent of p-values are below 0.05 | |
ps %>% | |
summarise( | |
t_test = mean(p_t_null<0.05) %>% scales::label_percent(.1)(), | |
wilcox_test = mean(p_w_null<0.05) %>% scales::label_percent(.1)(), | |
ordinal_regression = mean(p_o_null<0.05) %>% scales::label_percent(.1)(), | |
) | |
# t_test wilcox_test ordinal_regression | |
# NULL: t-test vs ordinal | |
ps %>% | |
mutate(t_yes_o_no = (p_t_null<0.05) & (p_o_null>0.05)) %>% | |
mutate(t_no_o_yes = (p_t_null>0.05) & (p_o_null<0.05)) %>% | |
summarise( | |
t_yes_o_no = mean(t_yes_o_no) %>% scales::label_percent(.1)(), | |
t_no_o_yes = mean(t_no_o_yes) %>% scales::label_percent(.1)() | |
) | |
# t_yes_o_no t_no_o_yes |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment