Skip to content

Instantly share code, notes, and snippets.

@flodebarre
Last active May 18, 2024 21:23
Show Gist options
  • Save flodebarre/0af4ff60d375a322c1ede9c15bb5ed14 to your computer and use it in GitHub Desktop.
Save flodebarre/0af4ff60d375a322c1ede9c15bb5ed14 to your computer and use it in GitHub Desktop.
Weissman_reply
#
# Reply to Weissman 2024
#
# Code to draw the spatial locations of source-linked infections and
# secondary++ infections, and estimate median distance to the source (origin)
#
# Updates:
# ========
#
# This TOY MODEL is just meant to rebut Weissman's claim that
# > "The mean-square displacement (MSD) from the HSM of the unlinked cases would
# then be approximately the sum of the MSD of the linked cases and the MSD from
# the linked cases of the remaining steps in which traceability was lost. Barring
# some unknown contrivance, the unlinked cases would then on average be farther
# away from HSM than the linked cases."
# This toy model is NOT meant to simulate proper epidemiological dynamics in Wuhan;
# it is missing the main point developed in our paper: that infections could happen
# around the source. This is not an appropriate model for this purpose.
# Our use of median distances may have contributed to the confusion. To be closer
# to Weissman's original description, and hopefully avoid further confusion,
# we switched to plotting MSDs instead of medians.
# ---
# Initializations ####
# Random seed for reproducibility
seed <- 123456
# Whether to rerun the simulations
# (if FALSE, we just plot them)
runsims <- TRUE
if(!file.exists("props_NB.csv") & !runsims) stop("Error: Output file absent.
You need to run the simulations at least once to generate the output files,
or to be in the correct working directory to load the files")
# Function to draw the number of secondary infection for each primary infection
# (or infection in the current generation)
drawSecondaryCases <- function(n, kappa, R){
# n: number of infections in the current generation
# kappa: dispersion parameter of the NB
# R: average numnber of secondary cases
rnbinom(n = n, size = kappa, prob = kappa / (kappa + R))
}
# Function to draw spatial displacement from the parent
drawDisplacement2D <- function(n, meanDist){
# n: number of infections
# meanDist: mean distance (1/rate)
# Draw distances following an exponential distribution
dd <- rexp(n, rate = 1/meanDist)
# Draw angles
ag <- runif(n = n, min = 0, max = 2*pi)
# Return cartesian coordinates
cbind(dd * cos(ag), dd * sin(ag))
}
# Function to add the offspring's displacement to the parent's original position
addDisplacement2D <- function(posParent, nOffspring, meanDist){
# posParent: position of the parent, vector of length 2
# nOffspring: number of secondary infections
# meanDist: average displacement from the parent
stopifnot(length(posParent) == 2)
stopifnot(meanDist >= 0)
# Draw offspring relative positions
tmp <- drawDisplacement2D(n = nOffspring, meanDist = meanDist)
# Add them to the parent's position
cbind(posParent[1] + tmp[, 1], posParent[2] + tmp[, 2])
}
# Function to run a simulation:
# Starting at (0, 0) [source],
# draw source-linked infections then generations of secondary infections
runSim <- function(nGen = 2, n0 = 50, mD0 = 5, mD1 = 5, kappa = 0.1, R = 3,
plot = TRUE){
# nGen: total number of generations, including source-linked infections
# n0: number of primary infections
# mD0: mean displacement for the primary infections (from the source at (0, 0))
# mD1: mean displaement for the secondary++ infections
# kappa: dispersion parameter of the negative binomial for secondary infections
# R: average number of secondary infections
# plot: whether to plot the results
# Initialize population list
pop <- list()
# Primary infections (will give source-linked cases)
pop[[1]] <- drawDisplacement2D(n = n0, meanDist = mD0)
if(plot){
plot(pop[[1]], xlim = 10*c(-mD0, mD0), ylim = 10*c(-mD0, mD0))
points(0, 0, col = "red", pch = "+")
}
# Next generations
for(i in 2:nGen){
# Draw numbers of offspring
off <- drawSecondaryCases(n = nrow(pop[[i-1]]), kappa = kappa, R = R)
# Identify those who reproduce
irepro <- which(off > 0) # Indices of individuals who reproduced
if(plot){
# Individuals who reproduce
points(pop[[i-1]][irepro, ], col = i, pch = 16)
}
# Draw positions of the offspring
# Initialize output vector with dummy line (later removed)
newv <- matrix(c(0, 0), ncol = 2)
for(io in irepro){
# Compute displacements
newpos <- addDisplacement2D(posParent = pop[[i-1]][io, ],
nOffspring = off[io], meanDist = mD1)
# Add them to the new population vector
newv <- rbind(newv, newpos)
if(plot){
# Links between parents and offspring
arrows(x0 = rep(pop[[i-1]][io, 1], off[io]),
y0 = rep(pop[[i-1]][io, 2], off[io]),
x1 = newpos[, 1], y1 = newpos[, 2], code = 0)
}
}
newv <- newv[-1, ] # Remove dummy line
pop[[i]] <- newv
if(plot){
# Plot new generation
points(pop[[i]], col = i)
Sys.sleep(1)
}
}
# Return population list
pop
}
# Function to replicate the simulation and compute median distances
# (function extended to include other potential metrics)
drawDistances <- function(nrep, metric = "median", squared = TRUE,...){
# nrep: number of replicates
# metric: function to be applied (default: median)
# squared: whether to square distances
# ...: parameters of the runSim function
# Define wrapper with our parameters
medWrapper <- function(){
# Draw population
pop <- runSim(..., plot = FALSE)
if(metric == "centrepoint"){
# Compute position of the centrepoints and distances to source
unlist(parallel::mclapply(pop, function(m){
tmp <- apply(m, 2, median)
tmp[1]^2 + tmp[2]^2
}))
}else{
# Compute distances to the source
exponent <- ifelse(squared, 1, 1/2) # Whether to square the distances or not
alldistances <- parallel::mclapply(pop, function(m) apply(m, 1, function(v) (v[1]^2 + v[2]^2)^(exponent)))
# Compute median distances for each generation
unlist(lapply(alldistances, metric))
}
}
# Replicate nrep times
t(replicate(nrep, medWrapper()))
}
# Define values of mD1/mD0 to test (Global variables)
bby <- 0.05 # interval for values of mD1/mD0
xx0 <- 0.01 # smallest value (not 0)
xx <- seq(0, 1, by = bby) # values of mD1/mD0 to test
xx[1] <- xx0
dx <- bby/20 # x shift for generations
# Repeat for different values of mD1
# nrep = 1000, nGen = 4, mD0 = 1, mD1 = x, kappa = 0.1, R = 2.5
computeMetric <- function(nrep, ...){
t0 <- Sys.time()
# Set seed for reproducibility
set.seed(seed)
# Compute median distances
vmed <- lapply(xx, function(x)
drawDistances(nrep = nrep, mD1 = x, ...))
vCompare <- list()
for(igen in 2:ncol(vmed[[1]])){
vCompare[[igen]] <- unlist(lapply(vmed, function(m) mean(m[, igen] < m[, 1], na.rm = TRUE)))
}
t1 <- Sys.time()
cat("The calculation for ", nrep, " replicates took", t1 - t0, "\n")
# Return list of results
vCompare
}
# Function to generate the plots from a given list of results
plotDistances <- function(vCompare, nrep, saveplot = TRUE, filename,
prefix = ""){
# vCompare: output list of distances to the source
# nrep: number of replicates behind each point (for error bars)
# saveplot: whether to save the plot as PDF
# filename: name of the output file
# prefix: prefix to be added to the output file name
# Initialize plot
if(saveplot){
pdf(paste0(prefix, filename, ".pdf"), width = 4, height = 4)
}
par(xpd = FALSE, las = 1)
par(mar = c(3.5, 3.5, 0.5, 0.5), mgp = c(2.25, 0.75, 0))
plot(0, xlim = c(0, 1 + dx * (1 + length(vCompare))), ylim = c(0, 0.65),
xaxs = "i", yaxs = "i",
xlab = "Relative distance of secondary infections",
ylab = "Proportion of draws",
type = "n", frame.plot = FALSE)
# add horizontal line at 5%
abline(h = 0.05, col = gray(0.6), lty = 2)
par(xpd = TRUE)
# Define colors for the different generations of secondary cases
colors <- MetBrewer::met.brewer("Hokusai3", n = length(vCompare)-1, direction = -1)
# Plot results for each generation
for(igen in 2:length(vCompare)){
v <- vCompare[[igen]] # Subset of results for this generation
# Plot proportion of simulations with unlinked closer than linked
points(xx + (igen - 2) * dx, v, pch = 16, col = colors[igen - 1], type = "o", lwd = 0.5)
# Add error bars
arrows(x0 = xx + (igen - 2) * dx,
x1 = xx + (igen - 2) * dx,
y0 = v + 1.96 * sqrt(v * (1 - v) / nrep),
y1 = v - 1.96 * sqrt(v * (1 - v) / nrep), code = 0,
col = colors[igen - 1])
}
# Legend
legend("topright", col = colors, legend = seq_along(colors), title = "Generation",
pch = 16, box.lwd = -1)
par(xpd = FALSE)
if(saveplot) dev.off()
}
# Function to save output as csv
saveOutput <- function(v, filename, prefix = ""){
# Reformat output as data frame
tmp <- as.data.frame(cbind(xx, matrix(unlist(v), nrow = length(v[[2]]))))
names(tmp) <- c("relDist", 2:ncol(tmp))
# Export it as csv
write.csv(tmp, file = paste0(prefix, filename, ".csv"), row.names = FALSE)
}
# Run simulations ####
# |- Parameters ####
# Number of replicates
nreps <- 1000
# Other parameters
ngen <- 4 # Number of unlinked generations
md0 <- 1 # Mean displacement for the primary infections
kappaNB <- 0.4 # Overdispersion parameter, Negative Binomial
kappaPoisson <- 100 # Overdispersion parameter, approx Poisson
Re <- 2.5 # Average number of secondary infections
sq <- TRUE # Whether to square the distances (TRUE for MSD)
mtc <- "mean" # Metric ("mean" for MSD)
# |- With Poisson-like distribution of offspring ####
# Mean square displacement
if(runsims){
v0 <- computeMetric(nrep = nreps, nGen = ngen, mD0 = md0, kappa = kappaPoisson, R = Re, metric = mtc, squared = sq)
saveOutput(v0, "props_Poisson")
}else{
v0 <- read.csv("props_Poisson.csv")
}
# |- With negative binomial distribution of offspring number ####
# Mean square displacement
if(runsims){
vNB <- computeMetric(nrep = nreps, nGen = ngen, mD0 = md0, kappa = kappaNB, R = Re, metric = mtc, squared = sq)
saveOutput(vNB, "props_NB")
}else{
vNB <- read.csv("props_NB.csv")
}
# Plot ####
plotDistances(v0, nrep = nreps, filename = "props_Poisson")
plotDistances(vNB, nrep = nreps, filename = "props_NB")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment