Skip to content

Instantly share code, notes, and snippets.

@rBatt
Created July 12, 2016 17:46
Show Gist options
  • Save rBatt/0d67f3cab8279da17edd6dab593b9c14 to your computer and use it in GitHub Desktop.
Save rBatt/0d67f3cab8279da17edd6dab593b9c14 to your computer and use it in GitHub Desktop.
how to bootstrap the overlap data, and also checking out why you can't just use the probabilities
set.seed(1337)
mean_true_sppA <- 0.25
mean_true_sppB <- 0.75
grid_w <- 5
grid_h <- 5
environment <- seq(0.8, 1.2, length.out=grid_w*grid_h) # makes it so the 2 distributions are not independent
n_reps <- 20
probs_true_sppA <- plogis(rnorm(grid_w*grid_h, qlogis(mean_true_sppA*environment))) # the GAM model estimates these values
probs_true_sppB <- plogis(rnorm(grid_w*grid_h, qlogis(mean_true_sppB*environment)))
probs_true_matA <- matrix(probs_true_sppA, nrow=grid_h, ncol=grid_w) # arrange in matrix to emphasize spatial
probs_true_matB <- matrix(probs_true_sppB, nrow=grid_h, ncol=grid_w)
generate_realization <- function(p){matrix(rbinom(grid_w*grid_h, size=1, prob=c(p)), nrow=grid_h, ncol=grid_w)}
data_sppA <- generate_realization(probs_true_sppA) # geographic presence/ absence of species A
data_sppB <- generate_realization(probs_true_sppB) # p/a species B
range_true_sppA <- mean(data_sppA) # % occupancy
range_true_sppB <- mean(data_sppB)
x_in_y <- function(x, y){mean(x[which(y==1L, arr.ind=TRUE)])}
A_in_B_data <- x_in_y(data_sppA,data_sppB) # how many sites occupied by B contain A
B_in_A_data <- x_in_y(data_sppB,data_sppA)
# ==============================================================================
# = Does it matter if we calculate overlap or range from 1/0 vs probabilities? =
# ==============================================================================
replicate_dataA <- replicate(n_reps, generate_realization(probs_true_matA)) # this is essentially the bootstrapping step
replicate_dataB <- replicate(n_reps, generate_realization(probs_true_matB))
replicate_dataA_list <- unlist(apply(replicate_dataA, 3, list), rec=F) # list of matrices, instead of array
replicate_dataB_list <- unlist(apply(replicate_dataB, 3, list), rec=F)
# ---- range for each species, from 1/0 ----
range_repA <- mean(replicate_dataA) # this is what you'd do with the bootstrap/ monte carlo sims to get range
range_repB <- mean(replicate_dataB)
# range for each species, from probabilities
range_probA <- mean(probs_true_matA)
range_probB <- mean(probs_true_matB)
# ---- overlap, from 1/0 ----
A_in_B_rep <- mean(mapply(x_in_y, replicate_dataA_list, replicate_dataB_list)) # this is what you'd do with the boostrap/ monte carlo sims to get overlap
B_in_A_rep <- mean(mapply(x_in_y, replicate_dataB_list, replicate_dataA_list))
# ---- overlap, from probabilities ----
# okay, this depends on whether or not the two are independent
# Generally, the intersection of A with B (A in B) = p(A|B)*p(B)
# If we naively assume A & B are independent, then
# p(A|B) = p(A), and then (A in B) = p(A)*p(B);
# therefore, (B in A) = p(B)*p(A) = (A in B)
mean(probs_true_matA*probs_true_matB)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment