flodebarre · May 18, 2024 21:23
diff --git a/gistfile1.txt b/gistfile1.txt
 #
 # Reply to Weissman 2024
 #
 # Code to draw the spatial locations of source-linked infections and 
 # secondary++ infections, and estimate median distance to the source (origin)
 #
 # Updates: 
 # ========
 #
 # This TOY MODEL is just meant to rebut Weissman's claim that  
 # >  "The mean-square displacement (MSD) from the HSM of the unlinked cases would 
 #     then be approximately the sum of the MSD of the linked cases and the MSD from 
 #     the linked cases of the remaining steps in which traceability was lost. Barring
 #     some unknown contrivance, the unlinked cases would then on average be farther
 #     away from HSM than the linked cases."
 # This toy model is NOT meant to simulate proper epidemiological dynamics in Wuhan; 
 #  it is missing the main point developed in our paper: that infections could happen
 #  around the source. This is not an appropriate model for this purpose. 
 # Our use of median distances may have contributed to the confusion. To be closer
 #  to Weissman's original description, and hopefully avoid further confusion, 
 #  we switched to plotting MSDs instead of medians.  
 # ---  


 # Initializations ####

 # Random seed for reproducibility
 seed <- 123456

 # Whether to rerun the simulations
 # (if FALSE, we just plot them)
 runsims <- TRUE
 if(!file.exists("props_NB.csv") & !runsims) stop("Error: Output file absent. 
 You need to run the simulations at least once to generate the output files, 
 or to be in the correct working directory to load the files") 

 # Function to draw the number of secondary infection for each primary infection
 # (or infection in the current generation)
 drawSecondaryCases <- function(n, kappa, R){
  # n: number of infections in the current generation
  # kappa: dispersion parameter of the NB
  # R: average numnber of secondary cases
  rnbinom(n = n, size = kappa, prob = kappa / (kappa + R))
 }

 # Function to draw spatial displacement from the parent
 drawDisplacement2D <- function(n, meanDist){
  # n: number of infections
  # meanDist: mean distance (1/rate)
  
  # Draw distances following an exponential distribution
  dd <- rexp(n, rate = 1/meanDist)
  # Draw angles
  ag <- runif(n = n, min = 0, max = 2*pi)
  
  # Return cartesian coordinates
  cbind(dd * cos(ag), dd * sin(ag)) 
 }

 # Function to add the offspring's displacement to the parent's original position
 addDisplacement2D <- function(posParent, nOffspring, meanDist){
  # posParent: position of the parent, vector of length 2
  # nOffspring: number of secondary infections
  # meanDist: average displacement from the parent
  
  stopifnot(length(posParent) == 2)
  stopifnot(meanDist >= 0)
  
  # Draw offspring relative positions
  tmp <- drawDisplacement2D(n = nOffspring, meanDist = meanDist)
  # Add them to the parent's position
  cbind(posParent[1] + tmp[, 1], posParent[2] + tmp[, 2])
 }

 # Function to run a simulation: 
 #   Starting at (0, 0) [source], 
 #   draw source-linked infections then generations of secondary infections
 runSim <- function(nGen = 2, n0 = 50, mD0 = 5, mD1 = 5, kappa = 0.1, R = 3, 
                   plot = TRUE){
  # nGen: total number of generations, including source-linked infections
  # n0: number of primary infections
  # mD0: mean displacement for the primary infections (from the source at (0, 0))
  # mD1: mean displaement for the secondary++ infections
  # kappa: dispersion parameter of the negative binomial for secondary infections
  # R: average number of secondary infections
  # plot: whether to plot the results
  
  # Initialize population list
  pop <- list()
  # Primary infections (will give source-linked cases)
  pop[[1]] <- drawDisplacement2D(n = n0, meanDist = mD0)
  if(plot){
    plot(pop[[1]], xlim = 10*c(-mD0, mD0), ylim = 10*c(-mD0, mD0))
    points(0, 0, col = "red", pch = "+")
  }
  # Next generations
  for(i in 2:nGen){
    # Draw numbers of offspring
    off <- drawSecondaryCases(n = nrow(pop[[i-1]]), kappa = kappa, R = R)
    # Identify those who reproduce
    irepro <- which(off > 0) # Indices of individuals who reproduced
    
    if(plot){
      # Individuals who reproduce
      points(pop[[i-1]][irepro, ], col = i, pch = 16)
    }
    
    # Draw positions of the offspring
    #  Initialize output vector with dummy line (later removed)
    newv <- matrix(c(0, 0), ncol = 2)
    for(io in irepro){
      # Compute displacements
      newpos <- addDisplacement2D(posParent = pop[[i-1]][io, ], 
                                  nOffspring = off[io], meanDist = mD1)
      # Add them to the new population vector
      newv <- rbind(newv, newpos)
      if(plot){ 
        # Links between parents and offspring
        arrows(x0 = rep(pop[[i-1]][io, 1], off[io]), 
               y0 = rep(pop[[i-1]][io, 2], off[io]), 
               x1 = newpos[, 1], y1 = newpos[, 2], code = 0)
      }
    }
    newv <- newv[-1, ] # Remove dummy line 
    pop[[i]] <- newv
    if(plot){
      # Plot new generation
      points(pop[[i]], col = i)
      Sys.sleep(1)
    }
  }
  # Return population list
  pop
 }

 # Function to replicate the simulation and compute median distances
 # (function extended to include other potential metrics)
 drawDistances <- function(nrep, metric = "median", squared = TRUE,...){
  # nrep: number of replicates 
  # metric: function to be applied (default: median)
  # squared: whether to square distances
  # ...: parameters of the runSim function
  
  # Define wrapper with our parameters
  medWrapper <- function(){
    # Draw population
    pop <- runSim(..., plot = FALSE)
    if(metric == "centrepoint"){
      # Compute position of the centrepoints and distances to source
      unlist(parallel::mclapply(pop, function(m){
        tmp <- apply(m, 2, median)
        tmp[1]^2 + tmp[2]^2
      }))
    }else{
      # Compute distances to the source
      exponent <- ifelse(squared, 1, 1/2) # Whether to square the distances or not
      alldistances <- parallel::mclapply(pop, function(m) apply(m, 1, function(v) (v[1]^2 + v[2]^2)^(exponent)))
      # Compute median distances for each generation
      unlist(lapply(alldistances, metric))
    }
  }
  # Replicate nrep times
  t(replicate(nrep, medWrapper()))
 }

 # Define values of mD1/mD0 to test (Global variables)
 bby <- 0.05 # interval for values of mD1/mD0
 xx0 <- 0.01 # smallest value (not 0)
 xx <- seq(0, 1, by = bby) # values of mD1/mD0 to test
 xx[1] <- xx0
 dx <- bby/20 # x shift for generations

 # Repeat for different values of mD1
 # nrep = 1000, nGen = 4, mD0 = 1, mD1 = x, kappa = 0.1, R = 2.5
 computeMetric <- function(nrep, ...){
  t0 <- Sys.time()
  # Set seed for reproducibility
  set.seed(seed)
  # Compute median distances
  vmed <- lapply(xx, function(x) 
    drawDistances(nrep = nrep, mD1 = x, ...))
  vCompare <- list()
  for(igen in 2:ncol(vmed[[1]])){
    vCompare[[igen]] <- unlist(lapply(vmed, function(m) mean(m[, igen] < m[, 1], na.rm = TRUE)))
  }
  t1 <- Sys.time()
  cat("The calculation for ", nrep, " replicates took", t1 - t0, "\n")
  # Return list of results
  vCompare
 }

 # Function to generate the plots from a given list of results
 plotDistances <- function(vCompare, nrep, saveplot = TRUE, filename, 
                                prefix = ""){
  # vCompare: output list of distances to the source
  # nrep: number of replicates behind each point (for error bars)
  # saveplot: whether to save the plot as PDF
  # filename: name of the output file
  # prefix: prefix to be added to the output file name
  
  # Initialize plot
  if(saveplot){
    pdf(paste0(prefix, filename, ".pdf"), width = 4, height = 4)
  }
  par(xpd = FALSE, las = 1)
  par(mar = c(3.5, 3.5, 0.5, 0.5), mgp = c(2.25, 0.75, 0))
  plot(0, xlim = c(0, 1 + dx * (1 + length(vCompare))), ylim = c(0, 0.65), 
       xaxs = "i", yaxs = "i", 
       xlab = "Relative distance of secondary infections", 
       ylab = "Proportion of draws", 
       type = "n", frame.plot = FALSE)
  # add horizontal line at 5%
  abline(h = 0.05, col = gray(0.6), lty = 2)
  par(xpd = TRUE)
  # Define colors for the different generations of secondary cases
  colors <- MetBrewer::met.brewer("Hokusai3", n = length(vCompare)-1, direction = -1)
  # Plot results for each generation
  for(igen in 2:length(vCompare)){
    v <- vCompare[[igen]] # Subset of results for this generation
    # Plot proportion of simulations with unlinked closer than linked
    points(xx + (igen - 2) * dx, v, pch = 16, col = colors[igen - 1], type = "o", lwd = 0.5)
    # Add error bars
    arrows(x0 = xx + (igen - 2) * dx, 
           x1 = xx + (igen - 2) * dx, 
           y0 = v + 1.96 * sqrt(v * (1 - v) / nrep), 
           y1 = v - 1.96 * sqrt(v * (1 - v) / nrep), code = 0, 
           col = colors[igen - 1])
  }
  # Legend
  legend("topright", col = colors, legend = seq_along(colors), title = "Generation", 
         pch = 16, box.lwd = -1)
  par(xpd = FALSE)
  if(saveplot) dev.off()
 }

 # Function to save output as csv
 saveOutput <- function(v, filename, prefix = ""){
  # Reformat output as data frame
  tmp <- as.data.frame(cbind(xx, matrix(unlist(v), nrow = length(v[[2]]))))
  names(tmp) <- c("relDist", 2:ncol(tmp))
  # Export it as csv
  write.csv(tmp, file = paste0(prefix, filename, ".csv"), row.names = FALSE)
 }

 # Run simulations ####

 # |- Parameters ####

 # Number of replicates
 nreps <- 1000

 # Other parameters
 ngen <- 4 # Number of unlinked generations
 md0 <- 1 # Mean displacement for the primary infections
 kappaNB <- 0.4 # Overdispersion parameter, Negative Binomial
 kappaPoisson <- 100 # Overdispersion parameter, approx Poisson
 Re <- 2.5 # Average number of secondary infections
 sq <- TRUE # Whether to square the distances (TRUE for MSD)
 mtc <- "mean" # Metric ("mean" for MSD)

 # |- With Poisson-like distribution of offspring ####

 # Mean square displacement
 if(runsims){
  v0 <- computeMetric(nrep = nreps, nGen = ngen, mD0 = md0, kappa = kappaPoisson, R = Re, metric = mtc, squared = sq)
  saveOutput(v0, "props_Poisson")
 }else{
  v0 <- read.csv("props_Poisson.csv")
 }

 # |- With negative binomial distribution of offspring number ####

 # Mean square displacement
 if(runsims){
  vNB <- computeMetric(nrep = nreps, nGen = ngen, mD0 = md0, kappa = kappaNB, R = Re, metric = mtc, squared = sq)
  saveOutput(vNB, "props_NB")
 }else{
  vNB <- read.csv("props_NB.csv")
 }

 # Plot ####

 plotDistances(v0, nrep = nreps, filename = "props_Poisson")
 plotDistances(vNB, nrep = nreps, filename = "props_NB")
	#
	# Reply to Weissman 2024
	#
	# Code to draw the spatial locations of source-linked infections and
	# secondary++ infections, and estimate median distance to the source (origin)
	#
	# Updates:
	# ========
	#
	# This TOY MODEL is just meant to rebut Weissman's claim that
	# > "The mean-square displacement (MSD) from the HSM of the unlinked cases would
	# then be approximately the sum of the MSD of the linked cases and the MSD from
	# the linked cases of the remaining steps in which traceability was lost. Barring
	# some unknown contrivance, the unlinked cases would then on average be farther
	# away from HSM than the linked cases."
	# This toy model is NOT meant to simulate proper epidemiological dynamics in Wuhan;
	# it is missing the main point developed in our paper: that infections could happen
	# around the source. This is not an appropriate model for this purpose.
	# Our use of median distances may have contributed to the confusion. To be closer
	# to Weissman's original description, and hopefully avoid further confusion,
	# we switched to plotting MSDs instead of medians.
	# ---


	# Initializations ####

	# Random seed for reproducibility
	seed <- 123456

	# Whether to rerun the simulations
	# (if FALSE, we just plot them)
	runsims <- TRUE
	if(!file.exists("props_NB.csv") & !runsims) stop("Error: Output file absent.
	You need to run the simulations at least once to generate the output files,
	or to be in the correct working directory to load the files")

	# Function to draw the number of secondary infection for each primary infection
	# (or infection in the current generation)
	drawSecondaryCases <- function(n, kappa, R){
	# n: number of infections in the current generation
	# kappa: dispersion parameter of the NB
	# R: average numnber of secondary cases
	rnbinom(n = n, size = kappa, prob = kappa / (kappa + R))
	}

	# Function to draw spatial displacement from the parent
	drawDisplacement2D <- function(n, meanDist){
	# n: number of infections
	# meanDist: mean distance (1/rate)

	# Draw distances following an exponential distribution
	dd <- rexp(n, rate = 1/meanDist)
	# Draw angles
	ag <- runif(n = n, min = 0, max = 2*pi)

	# Return cartesian coordinates
	cbind(dd * cos(ag), dd * sin(ag))
	}

	# Function to add the offspring's displacement to the parent's original position
	addDisplacement2D <- function(posParent, nOffspring, meanDist){
	# posParent: position of the parent, vector of length 2
	# nOffspring: number of secondary infections
	# meanDist: average displacement from the parent

	stopifnot(length(posParent) == 2)
	stopifnot(meanDist >= 0)

	# Draw offspring relative positions
	tmp <- drawDisplacement2D(n = nOffspring, meanDist = meanDist)
	# Add them to the parent's position
	cbind(posParent[1] + tmp[, 1], posParent[2] + tmp[, 2])
	}

	# Function to run a simulation:
	# Starting at (0, 0) [source],
	# draw source-linked infections then generations of secondary infections
	runSim <- function(nGen = 2, n0 = 50, mD0 = 5, mD1 = 5, kappa = 0.1, R = 3,
	plot = TRUE){
	# nGen: total number of generations, including source-linked infections
	# n0: number of primary infections
	# mD0: mean displacement for the primary infections (from the source at (0, 0))
	# mD1: mean displaement for the secondary++ infections
	# kappa: dispersion parameter of the negative binomial for secondary infections
	# R: average number of secondary infections
	# plot: whether to plot the results

	# Initialize population list
	pop <- list()
	# Primary infections (will give source-linked cases)
	pop[[1]] <- drawDisplacement2D(n = n0, meanDist = mD0)
	if(plot){
	plot(pop[[1]], xlim = 10c(-mD0, mD0), ylim = 10c(-mD0, mD0))
	points(0, 0, col = "red", pch = "+")
	}
	# Next generations
	for(i in 2:nGen){
	# Draw numbers of offspring
	off <- drawSecondaryCases(n = nrow(pop[[i-1]]), kappa = kappa, R = R)
	# Identify those who reproduce
	irepro <- which(off > 0) # Indices of individuals who reproduced

	if(plot){
	# Individuals who reproduce
	points(pop[[i-1]][irepro, ], col = i, pch = 16)
	}

	# Draw positions of the offspring
	# Initialize output vector with dummy line (later removed)
	newv <- matrix(c(0, 0), ncol = 2)
	for(io in irepro){
	# Compute displacements
	newpos <- addDisplacement2D(posParent = pop[[i-1]][io, ],
	nOffspring = off[io], meanDist = mD1)
	# Add them to the new population vector
	newv <- rbind(newv, newpos)
	if(plot){
	# Links between parents and offspring
	arrows(x0 = rep(pop[[i-1]][io, 1], off[io]),
	y0 = rep(pop[[i-1]][io, 2], off[io]),
	x1 = newpos[, 1], y1 = newpos[, 2], code = 0)
	}
	}
	newv <- newv[-1, ] # Remove dummy line
	pop[[i]] <- newv
	if(plot){
	# Plot new generation
	points(pop[[i]], col = i)
	Sys.sleep(1)
	}
	}
	# Return population list
	pop
	}

	# Function to replicate the simulation and compute median distances
	# (function extended to include other potential metrics)
	drawDistances <- function(nrep, metric = "median", squared = TRUE,...){
	# nrep: number of replicates
	# metric: function to be applied (default: median)
	# squared: whether to square distances
	# ...: parameters of the runSim function

	# Define wrapper with our parameters
	medWrapper <- function(){
	# Draw population
	pop <- runSim(..., plot = FALSE)
	if(metric == "centrepoint"){
	# Compute position of the centrepoints and distances to source
	unlist(parallel::mclapply(pop, function(m){
	tmp <- apply(m, 2, median)
	tmp[1]^2 + tmp[2]^2
	}))
	}else{
	# Compute distances to the source
	exponent <- ifelse(squared, 1, 1/2) # Whether to square the distances or not
	alldistances <- parallel::mclapply(pop, function(m) apply(m, 1, function(v) (v[1]^2 + v[2]^2)^(exponent)))
	# Compute median distances for each generation
	unlist(lapply(alldistances, metric))
	}
	}
	# Replicate nrep times
	t(replicate(nrep, medWrapper()))
	}

	# Define values of mD1/mD0 to test (Global variables)
	bby <- 0.05 # interval for values of mD1/mD0
	xx0 <- 0.01 # smallest value (not 0)
	xx <- seq(0, 1, by = bby) # values of mD1/mD0 to test
	xx[1] <- xx0
	dx <- bby/20 # x shift for generations

	# Repeat for different values of mD1
	# nrep = 1000, nGen = 4, mD0 = 1, mD1 = x, kappa = 0.1, R = 2.5
	computeMetric <- function(nrep, ...){
	t0 <- Sys.time()
	# Set seed for reproducibility
	set.seed(seed)
	# Compute median distances
	vmed <- lapply(xx, function(x)
	drawDistances(nrep = nrep, mD1 = x, ...))
	vCompare <- list()
	for(igen in 2:ncol(vmed[[1]])){
	vCompare[[igen]] <- unlist(lapply(vmed, function(m) mean(m[, igen] < m[, 1], na.rm = TRUE)))
	}
	t1 <- Sys.time()
	cat("The calculation for ", nrep, " replicates took", t1 - t0, "\n")
	# Return list of results
	vCompare
	}

	# Function to generate the plots from a given list of results
	plotDistances <- function(vCompare, nrep, saveplot = TRUE, filename,
	prefix = ""){
	# vCompare: output list of distances to the source
	# nrep: number of replicates behind each point (for error bars)
	# saveplot: whether to save the plot as PDF
	# filename: name of the output file
	# prefix: prefix to be added to the output file name

	# Initialize plot
	if(saveplot){
	pdf(paste0(prefix, filename, ".pdf"), width = 4, height = 4)
	}
	par(xpd = FALSE, las = 1)
	par(mar = c(3.5, 3.5, 0.5, 0.5), mgp = c(2.25, 0.75, 0))
	plot(0, xlim = c(0, 1 + dx * (1 + length(vCompare))), ylim = c(0, 0.65),
	xaxs = "i", yaxs = "i",
	xlab = "Relative distance of secondary infections",
	ylab = "Proportion of draws",
	type = "n", frame.plot = FALSE)
	# add horizontal line at 5%
	abline(h = 0.05, col = gray(0.6), lty = 2)
	par(xpd = TRUE)
	# Define colors for the different generations of secondary cases
	colors <- MetBrewer::met.brewer("Hokusai3", n = length(vCompare)-1, direction = -1)
	# Plot results for each generation
	for(igen in 2:length(vCompare)){
	v <- vCompare[[igen]] # Subset of results for this generation
	# Plot proportion of simulations with unlinked closer than linked
	points(xx + (igen - 2) * dx, v, pch = 16, col = colors[igen - 1], type = "o", lwd = 0.5)
	# Add error bars
	arrows(x0 = xx + (igen - 2) * dx,
	x1 = xx + (igen - 2) * dx,
	y0 = v + 1.96 * sqrt(v * (1 - v) / nrep),
	y1 = v - 1.96 * sqrt(v * (1 - v) / nrep), code = 0,
	col = colors[igen - 1])
	}
	# Legend
	legend("topright", col = colors, legend = seq_along(colors), title = "Generation",
	pch = 16, box.lwd = -1)
	par(xpd = FALSE)
	if(saveplot) dev.off()
	}

	# Function to save output as csv
	saveOutput <- function(v, filename, prefix = ""){
	# Reformat output as data frame
	tmp <- as.data.frame(cbind(xx, matrix(unlist(v), nrow = length(v[[2]]))))
	names(tmp) <- c("relDist", 2:ncol(tmp))
	# Export it as csv
	write.csv(tmp, file = paste0(prefix, filename, ".csv"), row.names = FALSE)
	}

	# Run simulations ####

	# \|- Parameters ####

	# Number of replicates
	nreps <- 1000

	# Other parameters
	ngen <- 4 # Number of unlinked generations
	md0 <- 1 # Mean displacement for the primary infections
	kappaNB <- 0.4 # Overdispersion parameter, Negative Binomial
	kappaPoisson <- 100 # Overdispersion parameter, approx Poisson
	Re <- 2.5 # Average number of secondary infections
	sq <- TRUE # Whether to square the distances (TRUE for MSD)
	mtc <- "mean" # Metric ("mean" for MSD)

	# \|- With Poisson-like distribution of offspring ####

	# Mean square displacement
	if(runsims){
	v0 <- computeMetric(nrep = nreps, nGen = ngen, mD0 = md0, kappa = kappaPoisson, R = Re, metric = mtc, squared = sq)
	saveOutput(v0, "props_Poisson")
	}else{
	v0 <- read.csv("props_Poisson.csv")
	}

	# \|- With negative binomial distribution of offspring number ####

	# Mean square displacement
	if(runsims){
	vNB <- computeMetric(nrep = nreps, nGen = ngen, mD0 = md0, kappa = kappaNB, R = Re, metric = mtc, squared = sq)
	saveOutput(vNB, "props_NB")
	}else{
	vNB <- read.csv("props_NB.csv")
	}

	# Plot ####

	plotDistances(v0, nrep = nreps, filename = "props_Poisson")
	plotDistances(vNB, nrep = nreps, filename = "props_NB")