Skip to content

Instantly share code, notes, and snippets.

@seabbs
Last active April 8, 2025 19:56
Show Gist options
  • Save seabbs/e1fcd58f0e749d48a3e98df23396dd98 to your computer and use it in GitHub Desktop.
Save seabbs/e1fcd58f0e749d48a3e98df23396dd98 to your computer and use it in GitHub Desktop.
The role of double censoring in discretising distributions. This script is adapted from the getting started vignette in primarycensored to show the difference between accounting and not accounting for primary censoring.
library(primarycensored)
library(ggplot2)
n <- 1e4
mean <- 10
sd <- 0.5
obs_time <- 30
pwindow <- 1
# Random samples without primary event censoring
samples <- rprimarycensored(
n,
rdist = rnorm, rprimary = runif,
pwindow = 0, swindow = 1, D = obs_time,
mean = mean, sd = sd
)
# Random samples with primary event censoring
samples_pc <- rprimarycensored(
n,
rdist = rnorm, rprimary = runif,
pwindow = 1, swindow = 1, D = obs_time,
mean = mean, sd = sd
)
# Calculate the PMF for the samples with secondary event censoring
samples_pmf <- data.frame(
pmf = table(samples) / sum(table(samples))
)
# Calculate the PMF for the samples with primary event censoring
samples_pc_pmf <- data.frame(
pmf = table(samples_pc) / sum(table(samples_pc))
)
# Compare the samples with and without secondary event censoring
# to the true distribution
ggplot() +
geom_col(
data = samples_pmf,
aes(
x = as.numeric(as.character(pmf.samples)),
y = pmf.Freq
),
fill = "#20b986",
col = "#252525",
alpha = 0.5,
width = 0.9,
just = 0
) +
geom_col(
data = samples_pc_pmf,
aes(
x = as.numeric(as.character(pmf.samples_pc)),
y = pmf.Freq
),
fill = "#4e20b9",
col = "#252525",
alpha = 0.5,
width = 0.9,
just = 0
) +
geom_function(
fun = dnorm,
args = list(mean = mean, sd = sd),
color = "#252525",
linewidth = 1
) +
labs(
title = "Comparison of samples from a distribution",
x = "Delay",
y = "Density",
caption = paste0(
"Green bars: Samples without primary event censoring\n",
"Blue bars: Samples with primary event censoring\n",
"Black line: True distribution without truncation or censoring"
)
) +
scale_x_continuous(limits = c(0, 15)) +
scale_y_continuous(
expand = expansion(mult = c(0, 0.05))
) +
theme_minimal() +
theme(
panel.grid.minor = element_blank(),
plot.title = element_text(hjust = 0.5)
)
# Calculate the theoretical PMF using dprimarycensored
theoretical_pmf <- dprimarycensored(
0:(obs_time - 1),
pdist = pnorm, dprimary = dunif,
pwindow = pwindow, swindow = 1, D = obs_time,
mean = mean, sd = sd
)
pmf_df <- data.frame(
x = 0:obs_time,
pmf = c(theoretical_pmf, 0)
)
# Calculate the theoretical PMF without primary internal censoring
theoretical_pmf_no_pc <- pnorm(1:obs_time, mean = mean, sd = sd) -
pnorm(0:(obs_time-1), mean = mean, sd = sd)
pmf_df_no_pc <- data.frame(
x = 0:obs_time,
pmf = c(theoretical_pmf_no_pc, 0)
)
# Plot the empirical and theoretical PMFs
ggplot() +
geom_col(
data = samples_pc_pmf,
aes(
x = as.numeric(as.character(pmf.samples_pc)),
y = pmf.Freq
),
fill = "#20b986",
col = "#252525",
alpha = 0.5,
width = 0.9,
just = 0
) +
geom_step(
data = pmf_df,
aes(x = x, y = pmf),
color = "black",
linewidth = 1
) +
geom_step(
data = pmf_df_no_pc,
aes(x = x, y = pmf),
color = "blue",
linewidth = 1,
linetype = "dashed"
) +
labs(
title = "Comparison of samples from a distribution",
x = "Delay",
y = "Density",
caption = paste0(
"Green bars: Empirical PMF from samples with secondary and primary event censoring\n",
"Black line: Theoretical PMF with primary censoring\n",
"Blue dashed line: Theoretical PMF without primary censoring"
)
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +
theme_minimal() +
theme(
panel.grid.minor = element_blank(),
plot.title = element_text(hjust = 0.5)
)
@seabbs
Copy link
Author

seabbs commented Feb 21, 2025

This produces the following two plots that show that the PMF you get when you discretise with and without accounting for primary censoring is not the same.

plot

plot2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment