Last active
May 18, 2024 21:23
-
-
Save flodebarre/0af4ff60d375a322c1ede9c15bb5ed14 to your computer and use it in GitHub Desktop.
Weissman_reply
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Reply to Weissman 2024 | |
# | |
# Code to draw the spatial locations of source-linked infections and | |
# secondary++ infections, and estimate median distance to the source (origin) | |
# | |
# Updates: | |
# ======== | |
# | |
# This TOY MODEL is just meant to rebut Weissman's claim that | |
# > "The mean-square displacement (MSD) from the HSM of the unlinked cases would | |
# then be approximately the sum of the MSD of the linked cases and the MSD from | |
# the linked cases of the remaining steps in which traceability was lost. Barring | |
# some unknown contrivance, the unlinked cases would then on average be farther | |
# away from HSM than the linked cases." | |
# This toy model is NOT meant to simulate proper epidemiological dynamics in Wuhan; | |
# it is missing the main point developed in our paper: that infections could happen | |
# around the source. This is not an appropriate model for this purpose. | |
# Our use of median distances may have contributed to the confusion. To be closer | |
# to Weissman's original description, and hopefully avoid further confusion, | |
# we switched to plotting MSDs instead of medians. | |
# --- | |
# Initializations #### | |
# Random seed for reproducibility | |
seed <- 123456 | |
# Whether to rerun the simulations | |
# (if FALSE, we just plot them) | |
runsims <- TRUE | |
if(!file.exists("props_NB.csv") & !runsims) stop("Error: Output file absent. | |
You need to run the simulations at least once to generate the output files, | |
or to be in the correct working directory to load the files") | |
# Function to draw the number of secondary infection for each primary infection | |
# (or infection in the current generation) | |
drawSecondaryCases <- function(n, kappa, R){ | |
# n: number of infections in the current generation | |
# kappa: dispersion parameter of the NB | |
# R: average numnber of secondary cases | |
rnbinom(n = n, size = kappa, prob = kappa / (kappa + R)) | |
} | |
# Function to draw spatial displacement from the parent | |
drawDisplacement2D <- function(n, meanDist){ | |
# n: number of infections | |
# meanDist: mean distance (1/rate) | |
# Draw distances following an exponential distribution | |
dd <- rexp(n, rate = 1/meanDist) | |
# Draw angles | |
ag <- runif(n = n, min = 0, max = 2*pi) | |
# Return cartesian coordinates | |
cbind(dd * cos(ag), dd * sin(ag)) | |
} | |
# Function to add the offspring's displacement to the parent's original position | |
addDisplacement2D <- function(posParent, nOffspring, meanDist){ | |
# posParent: position of the parent, vector of length 2 | |
# nOffspring: number of secondary infections | |
# meanDist: average displacement from the parent | |
stopifnot(length(posParent) == 2) | |
stopifnot(meanDist >= 0) | |
# Draw offspring relative positions | |
tmp <- drawDisplacement2D(n = nOffspring, meanDist = meanDist) | |
# Add them to the parent's position | |
cbind(posParent[1] + tmp[, 1], posParent[2] + tmp[, 2]) | |
} | |
# Function to run a simulation: | |
# Starting at (0, 0) [source], | |
# draw source-linked infections then generations of secondary infections | |
runSim <- function(nGen = 2, n0 = 50, mD0 = 5, mD1 = 5, kappa = 0.1, R = 3, | |
plot = TRUE){ | |
# nGen: total number of generations, including source-linked infections | |
# n0: number of primary infections | |
# mD0: mean displacement for the primary infections (from the source at (0, 0)) | |
# mD1: mean displaement for the secondary++ infections | |
# kappa: dispersion parameter of the negative binomial for secondary infections | |
# R: average number of secondary infections | |
# plot: whether to plot the results | |
# Initialize population list | |
pop <- list() | |
# Primary infections (will give source-linked cases) | |
pop[[1]] <- drawDisplacement2D(n = n0, meanDist = mD0) | |
if(plot){ | |
plot(pop[[1]], xlim = 10*c(-mD0, mD0), ylim = 10*c(-mD0, mD0)) | |
points(0, 0, col = "red", pch = "+") | |
} | |
# Next generations | |
for(i in 2:nGen){ | |
# Draw numbers of offspring | |
off <- drawSecondaryCases(n = nrow(pop[[i-1]]), kappa = kappa, R = R) | |
# Identify those who reproduce | |
irepro <- which(off > 0) # Indices of individuals who reproduced | |
if(plot){ | |
# Individuals who reproduce | |
points(pop[[i-1]][irepro, ], col = i, pch = 16) | |
} | |
# Draw positions of the offspring | |
# Initialize output vector with dummy line (later removed) | |
newv <- matrix(c(0, 0), ncol = 2) | |
for(io in irepro){ | |
# Compute displacements | |
newpos <- addDisplacement2D(posParent = pop[[i-1]][io, ], | |
nOffspring = off[io], meanDist = mD1) | |
# Add them to the new population vector | |
newv <- rbind(newv, newpos) | |
if(plot){ | |
# Links between parents and offspring | |
arrows(x0 = rep(pop[[i-1]][io, 1], off[io]), | |
y0 = rep(pop[[i-1]][io, 2], off[io]), | |
x1 = newpos[, 1], y1 = newpos[, 2], code = 0) | |
} | |
} | |
newv <- newv[-1, ] # Remove dummy line | |
pop[[i]] <- newv | |
if(plot){ | |
# Plot new generation | |
points(pop[[i]], col = i) | |
Sys.sleep(1) | |
} | |
} | |
# Return population list | |
pop | |
} | |
# Function to replicate the simulation and compute median distances | |
# (function extended to include other potential metrics) | |
drawDistances <- function(nrep, metric = "median", squared = TRUE,...){ | |
# nrep: number of replicates | |
# metric: function to be applied (default: median) | |
# squared: whether to square distances | |
# ...: parameters of the runSim function | |
# Define wrapper with our parameters | |
medWrapper <- function(){ | |
# Draw population | |
pop <- runSim(..., plot = FALSE) | |
if(metric == "centrepoint"){ | |
# Compute position of the centrepoints and distances to source | |
unlist(parallel::mclapply(pop, function(m){ | |
tmp <- apply(m, 2, median) | |
tmp[1]^2 + tmp[2]^2 | |
})) | |
}else{ | |
# Compute distances to the source | |
exponent <- ifelse(squared, 1, 1/2) # Whether to square the distances or not | |
alldistances <- parallel::mclapply(pop, function(m) apply(m, 1, function(v) (v[1]^2 + v[2]^2)^(exponent))) | |
# Compute median distances for each generation | |
unlist(lapply(alldistances, metric)) | |
} | |
} | |
# Replicate nrep times | |
t(replicate(nrep, medWrapper())) | |
} | |
# Define values of mD1/mD0 to test (Global variables) | |
bby <- 0.05 # interval for values of mD1/mD0 | |
xx0 <- 0.01 # smallest value (not 0) | |
xx <- seq(0, 1, by = bby) # values of mD1/mD0 to test | |
xx[1] <- xx0 | |
dx <- bby/20 # x shift for generations | |
# Repeat for different values of mD1 | |
# nrep = 1000, nGen = 4, mD0 = 1, mD1 = x, kappa = 0.1, R = 2.5 | |
computeMetric <- function(nrep, ...){ | |
t0 <- Sys.time() | |
# Set seed for reproducibility | |
set.seed(seed) | |
# Compute median distances | |
vmed <- lapply(xx, function(x) | |
drawDistances(nrep = nrep, mD1 = x, ...)) | |
vCompare <- list() | |
for(igen in 2:ncol(vmed[[1]])){ | |
vCompare[[igen]] <- unlist(lapply(vmed, function(m) mean(m[, igen] < m[, 1], na.rm = TRUE))) | |
} | |
t1 <- Sys.time() | |
cat("The calculation for ", nrep, " replicates took", t1 - t0, "\n") | |
# Return list of results | |
vCompare | |
} | |
# Function to generate the plots from a given list of results | |
plotDistances <- function(vCompare, nrep, saveplot = TRUE, filename, | |
prefix = ""){ | |
# vCompare: output list of distances to the source | |
# nrep: number of replicates behind each point (for error bars) | |
# saveplot: whether to save the plot as PDF | |
# filename: name of the output file | |
# prefix: prefix to be added to the output file name | |
# Initialize plot | |
if(saveplot){ | |
pdf(paste0(prefix, filename, ".pdf"), width = 4, height = 4) | |
} | |
par(xpd = FALSE, las = 1) | |
par(mar = c(3.5, 3.5, 0.5, 0.5), mgp = c(2.25, 0.75, 0)) | |
plot(0, xlim = c(0, 1 + dx * (1 + length(vCompare))), ylim = c(0, 0.65), | |
xaxs = "i", yaxs = "i", | |
xlab = "Relative distance of secondary infections", | |
ylab = "Proportion of draws", | |
type = "n", frame.plot = FALSE) | |
# add horizontal line at 5% | |
abline(h = 0.05, col = gray(0.6), lty = 2) | |
par(xpd = TRUE) | |
# Define colors for the different generations of secondary cases | |
colors <- MetBrewer::met.brewer("Hokusai3", n = length(vCompare)-1, direction = -1) | |
# Plot results for each generation | |
for(igen in 2:length(vCompare)){ | |
v <- vCompare[[igen]] # Subset of results for this generation | |
# Plot proportion of simulations with unlinked closer than linked | |
points(xx + (igen - 2) * dx, v, pch = 16, col = colors[igen - 1], type = "o", lwd = 0.5) | |
# Add error bars | |
arrows(x0 = xx + (igen - 2) * dx, | |
x1 = xx + (igen - 2) * dx, | |
y0 = v + 1.96 * sqrt(v * (1 - v) / nrep), | |
y1 = v - 1.96 * sqrt(v * (1 - v) / nrep), code = 0, | |
col = colors[igen - 1]) | |
} | |
# Legend | |
legend("topright", col = colors, legend = seq_along(colors), title = "Generation", | |
pch = 16, box.lwd = -1) | |
par(xpd = FALSE) | |
if(saveplot) dev.off() | |
} | |
# Function to save output as csv | |
saveOutput <- function(v, filename, prefix = ""){ | |
# Reformat output as data frame | |
tmp <- as.data.frame(cbind(xx, matrix(unlist(v), nrow = length(v[[2]])))) | |
names(tmp) <- c("relDist", 2:ncol(tmp)) | |
# Export it as csv | |
write.csv(tmp, file = paste0(prefix, filename, ".csv"), row.names = FALSE) | |
} | |
# Run simulations #### | |
# |- Parameters #### | |
# Number of replicates | |
nreps <- 1000 | |
# Other parameters | |
ngen <- 4 # Number of unlinked generations | |
md0 <- 1 # Mean displacement for the primary infections | |
kappaNB <- 0.4 # Overdispersion parameter, Negative Binomial | |
kappaPoisson <- 100 # Overdispersion parameter, approx Poisson | |
Re <- 2.5 # Average number of secondary infections | |
sq <- TRUE # Whether to square the distances (TRUE for MSD) | |
mtc <- "mean" # Metric ("mean" for MSD) | |
# |- With Poisson-like distribution of offspring #### | |
# Mean square displacement | |
if(runsims){ | |
v0 <- computeMetric(nrep = nreps, nGen = ngen, mD0 = md0, kappa = kappaPoisson, R = Re, metric = mtc, squared = sq) | |
saveOutput(v0, "props_Poisson") | |
}else{ | |
v0 <- read.csv("props_Poisson.csv") | |
} | |
# |- With negative binomial distribution of offspring number #### | |
# Mean square displacement | |
if(runsims){ | |
vNB <- computeMetric(nrep = nreps, nGen = ngen, mD0 = md0, kappa = kappaNB, R = Re, metric = mtc, squared = sq) | |
saveOutput(vNB, "props_NB") | |
}else{ | |
vNB <- read.csv("props_NB.csv") | |
} | |
# Plot #### | |
plotDistances(v0, nrep = nreps, filename = "props_Poisson") | |
plotDistances(vNB, nrep = nreps, filename = "props_NB") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment