seabbs · April 8, 2025 19:56 · seabbs · Feb 21, 2025
diff --git a/primary-censoring-what-does-it-do.R b/primary-censoring-what-does-it-do.R
 library(primarycensored)
 library(ggplot2)

 n <- 1e4
 mean <- 10
 sd <- 0.5
 obs_time <- 30
 pwindow <- 1

 # Random samples without primary event censoring
 samples <- rprimarycensored(
  n,
  rdist = rnorm, rprimary = runif,
  pwindow = 0, swindow = 1, D = obs_time,
  mean = mean, sd = sd
 )
 # Random samples with primary event censoring
 samples_pc <- rprimarycensored(
  n,
  rdist = rnorm, rprimary = runif,
  pwindow = 1, swindow = 1, D = obs_time,
  mean = mean, sd = sd
 )

 # Calculate the PMF for the samples with secondary event censoring
 samples_pmf <- data.frame(
  pmf = table(samples) / sum(table(samples))
 )

 # Calculate the PMF for the samples with primary event censoring
 samples_pc_pmf <- data.frame(
  pmf = table(samples_pc) / sum(table(samples_pc))
 )

 # Compare the samples with and without secondary event censoring
 # to the true distribution
 ggplot() +
  geom_col(
    data = samples_pmf,
    aes(
      x = as.numeric(as.character(pmf.samples)),
      y = pmf.Freq
    ),
    fill = "#20b986",
    col = "#252525",
    alpha = 0.5,
    width = 0.9,
    just = 0
  ) +
  geom_col(
    data = samples_pc_pmf,
    aes(
      x = as.numeric(as.character(pmf.samples_pc)),
      y = pmf.Freq
    ),
    fill = "#4e20b9",
    col = "#252525",
    alpha = 0.5,
    width = 0.9,
    just = 0
  ) +
  geom_function(
    fun = dnorm,
    args = list(mean = mean, sd = sd),
    color = "#252525",
    linewidth = 1
  ) +
  labs(
    title = "Comparison of samples from a distribution",
    x = "Delay",
    y = "Density",
    caption = paste0(
      "Green bars: Samples without primary event censoring\n",
      "Blue bars: Samples with primary event censoring\n",
      "Black line: True distribution without truncation or censoring"
    )
  ) +
  scale_x_continuous(limits = c(0, 15)) +
  scale_y_continuous(
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    panel.grid.minor = element_blank(),
    plot.title = element_text(hjust = 0.5)
  )

 # Calculate the theoretical PMF using dprimarycensored
 theoretical_pmf <- dprimarycensored(
  0:(obs_time - 1),
  pdist = pnorm, dprimary = dunif,
  pwindow = pwindow, swindow = 1, D = obs_time,
  mean = mean, sd = sd
 )

 pmf_df <- data.frame(
  x = 0:obs_time,
  pmf = c(theoretical_pmf, 0)
 )

 # Calculate the theoretical PMF without primary internal censoring
 theoretical_pmf_no_pc <- pnorm(1:obs_time, mean = mean, sd = sd) - 
  pnorm(0:(obs_time-1), mean = mean, sd = sd)

 pmf_df_no_pc <- data.frame(
  x = 0:obs_time,
  pmf = c(theoretical_pmf_no_pc, 0)
 )

 # Plot the empirical and theoretical PMFs
 ggplot() +
  geom_col(
    data = samples_pc_pmf,
    aes(
      x = as.numeric(as.character(pmf.samples_pc)),
      y = pmf.Freq
    ),
    fill = "#20b986",
    col = "#252525",
    alpha = 0.5,
    width = 0.9,
    just = 0
  ) +
  geom_step(
    data = pmf_df,
    aes(x = x, y = pmf),
    color = "black",
    linewidth = 1
  ) +
  geom_step(
    data = pmf_df_no_pc,
    aes(x = x, y = pmf),
    color = "blue",
    linewidth = 1,
    linetype = "dashed"
  ) +
  labs(
    title = "Comparison of samples from a distribution",
    x = "Delay", 
    y = "Density",
    caption = paste0(
      "Green bars: Empirical PMF from samples with secondary and primary event censoring\n",
      "Black line: Theoretical PMF with primary censoring\n",
      "Blue dashed line: Theoretical PMF without primary censoring"
    )
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +
  theme_minimal() +
  theme(
    panel.grid.minor = element_blank(),
    plot.title = element_text(hjust = 0.5)
  )
	library(primarycensored)
	library(ggplot2)

	n <- 1e4
	mean <- 10
	sd <- 0.5
	obs_time <- 30
	pwindow <- 1

	# Random samples without primary event censoring
	samples <- rprimarycensored(
	n,
	rdist = rnorm, rprimary = runif,
	pwindow = 0, swindow = 1, D = obs_time,
	mean = mean, sd = sd
	)
	# Random samples with primary event censoring
	samples_pc <- rprimarycensored(
	n,
	rdist = rnorm, rprimary = runif,
	pwindow = 1, swindow = 1, D = obs_time,
	mean = mean, sd = sd
	)

	# Calculate the PMF for the samples with secondary event censoring
	samples_pmf <- data.frame(
	pmf = table(samples) / sum(table(samples))
	)

	# Calculate the PMF for the samples with primary event censoring
	samples_pc_pmf <- data.frame(
	pmf = table(samples_pc) / sum(table(samples_pc))
	)

	# Compare the samples with and without secondary event censoring
	# to the true distribution
	ggplot() +
	geom_col(
	data = samples_pmf,
	aes(
	x = as.numeric(as.character(pmf.samples)),
	y = pmf.Freq
	),
	fill = "#20b986",
	col = "#252525",
	alpha = 0.5,
	width = 0.9,
	just = 0
	) +
	geom_col(
	data = samples_pc_pmf,
	aes(
	x = as.numeric(as.character(pmf.samples_pc)),
	y = pmf.Freq
	),
	fill = "#4e20b9",
	col = "#252525",
	alpha = 0.5,
	width = 0.9,
	just = 0
	) +
	geom_function(
	fun = dnorm,
	args = list(mean = mean, sd = sd),
	color = "#252525",
	linewidth = 1
	) +
	labs(
	title = "Comparison of samples from a distribution",
	x = "Delay",
	y = "Density",
	caption = paste0(
	"Green bars: Samples without primary event censoring\n",
	"Blue bars: Samples with primary event censoring\n",
	"Black line: True distribution without truncation or censoring"
	)
	) +
	scale_x_continuous(limits = c(0, 15)) +
	scale_y_continuous(
	expand = expansion(mult = c(0, 0.05))
	) +
	theme_minimal() +
	theme(
	panel.grid.minor = element_blank(),
	plot.title = element_text(hjust = 0.5)
	)

	# Calculate the theoretical PMF using dprimarycensored
	theoretical_pmf <- dprimarycensored(
	0:(obs_time - 1),
	pdist = pnorm, dprimary = dunif,
	pwindow = pwindow, swindow = 1, D = obs_time,
	mean = mean, sd = sd
	)

	pmf_df <- data.frame(
	x = 0:obs_time,
	pmf = c(theoretical_pmf, 0)
	)

	# Calculate the theoretical PMF without primary internal censoring
	theoretical_pmf_no_pc <- pnorm(1:obs_time, mean = mean, sd = sd) -
	pnorm(0:(obs_time-1), mean = mean, sd = sd)

	pmf_df_no_pc <- data.frame(
	x = 0:obs_time,
	pmf = c(theoretical_pmf_no_pc, 0)
	)

	# Plot the empirical and theoretical PMFs
	ggplot() +
	geom_col(
	data = samples_pc_pmf,
	aes(
	x = as.numeric(as.character(pmf.samples_pc)),
	y = pmf.Freq
	),
	fill = "#20b986",
	col = "#252525",
	alpha = 0.5,
	width = 0.9,
	just = 0
	) +
	geom_step(
	data = pmf_df,
	aes(x = x, y = pmf),
	color = "black",
	linewidth = 1
	) +
	geom_step(
	data = pmf_df_no_pc,
	aes(x = x, y = pmf),
	color = "blue",
	linewidth = 1,
	linetype = "dashed"
	) +
	labs(
	title = "Comparison of samples from a distribution",
	x = "Delay",
	y = "Density",
	caption = paste0(
	"Green bars: Empirical PMF from samples with secondary and primary event censoring\n",
	"Black line: Theoretical PMF with primary censoring\n",
	"Blue dashed line: Theoretical PMF without primary censoring"
	)
	) +
	scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +
	theme_minimal() +
	theme(
	panel.grid.minor = element_blank(),
	plot.title = element_text(hjust = 0.5)
	)