Last active
April 8, 2025 19:56
-
-
Save seabbs/e1fcd58f0e749d48a3e98df23396dd98 to your computer and use it in GitHub Desktop.
The role of double censoring in discretising distributions. This script is adapted from the getting started vignette in primarycensored to show the difference between accounting and not accounting for primary censoring.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(primarycensored) | |
library(ggplot2) | |
n <- 1e4 | |
mean <- 10 | |
sd <- 0.5 | |
obs_time <- 30 | |
pwindow <- 1 | |
# Random samples without primary event censoring | |
samples <- rprimarycensored( | |
n, | |
rdist = rnorm, rprimary = runif, | |
pwindow = 0, swindow = 1, D = obs_time, | |
mean = mean, sd = sd | |
) | |
# Random samples with primary event censoring | |
samples_pc <- rprimarycensored( | |
n, | |
rdist = rnorm, rprimary = runif, | |
pwindow = 1, swindow = 1, D = obs_time, | |
mean = mean, sd = sd | |
) | |
# Calculate the PMF for the samples with secondary event censoring | |
samples_pmf <- data.frame( | |
pmf = table(samples) / sum(table(samples)) | |
) | |
# Calculate the PMF for the samples with primary event censoring | |
samples_pc_pmf <- data.frame( | |
pmf = table(samples_pc) / sum(table(samples_pc)) | |
) | |
# Compare the samples with and without secondary event censoring | |
# to the true distribution | |
ggplot() + | |
geom_col( | |
data = samples_pmf, | |
aes( | |
x = as.numeric(as.character(pmf.samples)), | |
y = pmf.Freq | |
), | |
fill = "#20b986", | |
col = "#252525", | |
alpha = 0.5, | |
width = 0.9, | |
just = 0 | |
) + | |
geom_col( | |
data = samples_pc_pmf, | |
aes( | |
x = as.numeric(as.character(pmf.samples_pc)), | |
y = pmf.Freq | |
), | |
fill = "#4e20b9", | |
col = "#252525", | |
alpha = 0.5, | |
width = 0.9, | |
just = 0 | |
) + | |
geom_function( | |
fun = dnorm, | |
args = list(mean = mean, sd = sd), | |
color = "#252525", | |
linewidth = 1 | |
) + | |
labs( | |
title = "Comparison of samples from a distribution", | |
x = "Delay", | |
y = "Density", | |
caption = paste0( | |
"Green bars: Samples without primary event censoring\n", | |
"Blue bars: Samples with primary event censoring\n", | |
"Black line: True distribution without truncation or censoring" | |
) | |
) + | |
scale_x_continuous(limits = c(0, 15)) + | |
scale_y_continuous( | |
expand = expansion(mult = c(0, 0.05)) | |
) + | |
theme_minimal() + | |
theme( | |
panel.grid.minor = element_blank(), | |
plot.title = element_text(hjust = 0.5) | |
) | |
# Calculate the theoretical PMF using dprimarycensored | |
theoretical_pmf <- dprimarycensored( | |
0:(obs_time - 1), | |
pdist = pnorm, dprimary = dunif, | |
pwindow = pwindow, swindow = 1, D = obs_time, | |
mean = mean, sd = sd | |
) | |
pmf_df <- data.frame( | |
x = 0:obs_time, | |
pmf = c(theoretical_pmf, 0) | |
) | |
# Calculate the theoretical PMF without primary internal censoring | |
theoretical_pmf_no_pc <- pnorm(1:obs_time, mean = mean, sd = sd) - | |
pnorm(0:(obs_time-1), mean = mean, sd = sd) | |
pmf_df_no_pc <- data.frame( | |
x = 0:obs_time, | |
pmf = c(theoretical_pmf_no_pc, 0) | |
) | |
# Plot the empirical and theoretical PMFs | |
ggplot() + | |
geom_col( | |
data = samples_pc_pmf, | |
aes( | |
x = as.numeric(as.character(pmf.samples_pc)), | |
y = pmf.Freq | |
), | |
fill = "#20b986", | |
col = "#252525", | |
alpha = 0.5, | |
width = 0.9, | |
just = 0 | |
) + | |
geom_step( | |
data = pmf_df, | |
aes(x = x, y = pmf), | |
color = "black", | |
linewidth = 1 | |
) + | |
geom_step( | |
data = pmf_df_no_pc, | |
aes(x = x, y = pmf), | |
color = "blue", | |
linewidth = 1, | |
linetype = "dashed" | |
) + | |
labs( | |
title = "Comparison of samples from a distribution", | |
x = "Delay", | |
y = "Density", | |
caption = paste0( | |
"Green bars: Empirical PMF from samples with secondary and primary event censoring\n", | |
"Black line: Theoretical PMF with primary censoring\n", | |
"Blue dashed line: Theoretical PMF without primary censoring" | |
) | |
) + | |
scale_y_continuous(expand = expansion(mult = c(0, 0.05))) + | |
theme_minimal() + | |
theme( | |
panel.grid.minor = element_blank(), | |
plot.title = element_text(hjust = 0.5) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This produces the following two plots that show that the PMF you get when you discretise with and without accounting for primary censoring is not the same.