fdabl · October 18, 2024 13:08
diff --git a/gistfile1.txt b/gistfile1.txt
 library(ggplot2)
 library(RColorBrewer)

 # Roughly estimates the number of Covid infectious people at an ADE party(N = 900)
 # Main uncertainties:
 #   (1) Number of people infectious
 #   (2) Number of people a single infectious person infects (R_eff)
 #
 # For (1), we rely on the number of positive tests from last week. This was 1.5%. Generally, this is an underestimate
 # of the true number. Similarly, many people are coming to ADE abroad. We use a distribution over different values below.
 # For (2), we assume a uniform distribution ranging from 2 to 8. In poorly ventilated spaces, this could be much higher.

 # Assumptions:
 #     (1) Independence of people
 #     (2) Relationship between proportion of positive tests and proportion of infectious people
 #
 # Regarding (1), this yields a binomial distribution for the number of people X out of N that are infected.
 # Independence doesn't hold (party goers no each other but this might not be a huge deal).
 # Regarding (2), this depends on which population is being tested. If testing is done at random, then the proportion of
 # positive tests should be close to the prevalence of Covid infections. However, most people who do a test probable have
 # some kind of reason to do it, hence are not representative of the population. ChatGPT suggests a multiplier of between
 # 0.3 and 0.7 times the positivity rate when testing is targeted to get the prevalence. However, not everybody who has Covid
 # does a test, so the reported positivity rate likely underestimates the true positivity rate

 custom_theme <- theme_minimal() +
  theme(
    legend.position = 'top',
    plot.title = element_text(size = 14, hjust = 0.50)
  )

 get_number_infectious <- function(prevalence) {
  N <- 900
  x <- seq(0, 900, 1)
  probs <- dbinom(x, N, prevalence)
  data.frame(x = x, p = probs, prevalence = prevalence)
 }

 # In unventilated, crowded spaces R_eff can be very high
 get_number_infected <- function(prevalence, R_lo = 2, R_hi = 8) {
  N <- 900
  iter <- 10000
  
  number_infectious <- rbinom(iter, n, prevalence)
  R_eff <- runif(iter, R_lo, R_hi)
  data.frame(x = number_infectious * R_eff)
 }

 positivity_rate <- 0.015 # 1.5% were reported to test positive last week
 prevalence <- positivity_rate * 0.50 # mean of 0.3 and 0.7

 df_inf <- rbind(
  get_number_infectious(prevalence),
  # To account for underestimation of the positivity rate
  get_number_infectious(prevalence * 3),
  get_number_infectious(prevalence * 5)
 )

 cols <- brewer.pal(3, 'Set1')
 ggplot(df_inf, aes(x = x, y = p, fill = factor(prevalence))) +
  geom_bar(stat = 'identity') + 
  scale_fill_manual(name = 'Covid prevalence', values = cols) +
  xlim(c(0, 100)) +
  ggtitle('Number of people who could be infectious') +
  facet_wrap(~ prevalence) +
  xlab('Number of people') +
  ylab('Density') +
  custom_theme

 # Let's assume that 2.225% are infectious. How many might they infect?
 # We integrate this uncertainty over the uncertainty in R_eff
 set.seed(1)
 prevalence <- reported_positive * 0.50 * 3
 num_infected <- get_number_infected(prevalence)

 ggplot(num_infected, aes(x = x)) +
  geom_histogram() +
  xlab('Number of people getting infected') +
  ggtitle('Number of people getting infected') +
  ylab('Frequency') +
  custom_theme

 # Pretty high
 mean(num_infected$x)

 # Let's be more advanced and integrate uncertainty about the number of infectious people in there
 set.seed(1)
 iter <- 10000
 prevalence <- runif(iter, reported_positive * 0.50, reported_positive * 0.50 * 5)
 num_infected2 <- get_number_infected(prevalence)

 ggplot(num_infected2, aes(x = x)) +
  geom_histogram() +
  xlab('Number of people getting infected') +
  ggtitle('Number of people getting infected') +
  ylab('Frequency') +
  custom_theme

 # Same mean but fatter tails
 mean(num_infected2$x)

 # Probability that you get infected yourself (assuming equal mixing)
 mean(num_infected2$x) / N
	library(ggplot2)
	library(RColorBrewer)

	# Roughly estimates the number of Covid infectious people at an ADE party(N = 900)
	# Main uncertainties:
	# (1) Number of people infectious
	# (2) Number of people a single infectious person infects (R_eff)
	#
	# For (1), we rely on the number of positive tests from last week. This was 1.5%. Generally, this is an underestimate
	# of the true number. Similarly, many people are coming to ADE abroad. We use a distribution over different values below.
	# For (2), we assume a uniform distribution ranging from 2 to 8. In poorly ventilated spaces, this could be much higher.

	# Assumptions:
	# (1) Independence of people
	# (2) Relationship between proportion of positive tests and proportion of infectious people
	#
	# Regarding (1), this yields a binomial distribution for the number of people X out of N that are infected.
	# Independence doesn't hold (party goers no each other but this might not be a huge deal).
	# Regarding (2), this depends on which population is being tested. If testing is done at random, then the proportion of
	# positive tests should be close to the prevalence of Covid infections. However, most people who do a test probable have
	# some kind of reason to do it, hence are not representative of the population. ChatGPT suggests a multiplier of between
	# 0.3 and 0.7 times the positivity rate when testing is targeted to get the prevalence. However, not everybody who has Covid
	# does a test, so the reported positivity rate likely underestimates the true positivity rate

	custom_theme <- theme_minimal() +
	theme(
	legend.position = 'top',
	plot.title = element_text(size = 14, hjust = 0.50)
	)

	get_number_infectious <- function(prevalence) {
	N <- 900
	x <- seq(0, 900, 1)
	probs <- dbinom(x, N, prevalence)
	data.frame(x = x, p = probs, prevalence = prevalence)
	}

	# In unventilated, crowded spaces R_eff can be very high
	get_number_infected <- function(prevalence, R_lo = 2, R_hi = 8) {
	N <- 900
	iter <- 10000

	number_infectious <- rbinom(iter, n, prevalence)
	R_eff <- runif(iter, R_lo, R_hi)
	data.frame(x = number_infectious * R_eff)
	}

	positivity_rate <- 0.015 # 1.5% were reported to test positive last week
	prevalence <- positivity_rate * 0.50 # mean of 0.3 and 0.7

	df_inf <- rbind(
	get_number_infectious(prevalence),
	# To account for underestimation of the positivity rate
	get_number_infectious(prevalence * 3),
	get_number_infectious(prevalence * 5)
	)

	cols <- brewer.pal(3, 'Set1')
	ggplot(df_inf, aes(x = x, y = p, fill = factor(prevalence))) +
	geom_bar(stat = 'identity') +
	scale_fill_manual(name = 'Covid prevalence', values = cols) +
	xlim(c(0, 100)) +
	ggtitle('Number of people who could be infectious') +
	facet_wrap(~ prevalence) +
	xlab('Number of people') +
	ylab('Density') +
	custom_theme

	# Let's assume that 2.225% are infectious. How many might they infect?
	# We integrate this uncertainty over the uncertainty in R_eff
	set.seed(1)
	prevalence <- reported_positive * 0.50 * 3
	num_infected <- get_number_infected(prevalence)

	ggplot(num_infected, aes(x = x)) +
	geom_histogram() +
	xlab('Number of people getting infected') +
	ggtitle('Number of people getting infected') +
	ylab('Frequency') +
	custom_theme

	# Pretty high
	mean(num_infected$x)

	# Let's be more advanced and integrate uncertainty about the number of infectious people in there
	set.seed(1)
	iter <- 10000
	prevalence <- runif(iter, reported_positive * 0.50, reported_positive * 0.50 * 5)
	num_infected2 <- get_number_infected(prevalence)

	ggplot(num_infected2, aes(x = x)) +
	geom_histogram() +
	xlab('Number of people getting infected') +
	ggtitle('Number of people getting infected') +
	ylab('Frequency') +
	custom_theme

	# Same mean but fatter tails
	mean(num_infected2$x)

	# Probability that you get infected yourself (assuming equal mixing)
	mean(num_infected2$x) / N