rBatt · July 12, 2016 17:46
diff --git a/bootstrap_overlap.R b/bootstrap_overlap.R
 set.seed(1337)

 mean_true_sppA <- 0.25
 mean_true_sppB <- 0.75
 grid_w <- 5
 grid_h <- 5
 environment <- seq(0.8, 1.2, length.out=grid_w*grid_h) # makes it so the 2 distributions are not independent
 n_reps <- 20

 probs_true_sppA <- plogis(rnorm(grid_w*grid_h, qlogis(mean_true_sppA*environment))) # the GAM model estimates these values
 probs_true_sppB <- plogis(rnorm(grid_w*grid_h, qlogis(mean_true_sppB*environment)))
 probs_true_matA <- matrix(probs_true_sppA, nrow=grid_h, ncol=grid_w) # arrange in matrix to emphasize spatial
 probs_true_matB <- matrix(probs_true_sppB, nrow=grid_h, ncol=grid_w)

 generate_realization <- function(p){matrix(rbinom(grid_w*grid_h, size=1, prob=c(p)), nrow=grid_h, ncol=grid_w)}
 data_sppA <- generate_realization(probs_true_sppA) # geographic presence/ absence of species A
 data_sppB <- generate_realization(probs_true_sppB) # p/a species B
 range_true_sppA <- mean(data_sppA) # % occupancy
 range_true_sppB <- mean(data_sppB)
 x_in_y <- function(x, y){mean(x[which(y==1L, arr.ind=TRUE)])}
 A_in_B_data <- x_in_y(data_sppA,data_sppB) # how many sites occupied by B contain A
 B_in_A_data <- x_in_y(data_sppB,data_sppA)


 # ==============================================================================
 # = Does it matter if we calculate overlap or range from 1/0 vs probabilities? =
 # ==============================================================================
 replicate_dataA <- replicate(n_reps, generate_realization(probs_true_matA)) # this is essentially the bootstrapping step
 replicate_dataB <- replicate(n_reps, generate_realization(probs_true_matB))

 replicate_dataA_list <- unlist(apply(replicate_dataA, 3, list), rec=F) # list of matrices, instead of array
 replicate_dataB_list <- unlist(apply(replicate_dataB, 3, list), rec=F)

 # ---- range for each species, from 1/0 ----
 range_repA <- mean(replicate_dataA) # this is what you'd do with the bootstrap/ monte carlo sims to get range
 range_repB <- mean(replicate_dataB)

 # range for each species, from probabilities
 range_probA <- mean(probs_true_matA)
 range_probB <- mean(probs_true_matB)

 # ---- overlap, from 1/0 ----
 A_in_B_rep <- mean(mapply(x_in_y, replicate_dataA_list, replicate_dataB_list)) # this is what you'd do with the boostrap/ monte carlo sims to get overlap
 B_in_A_rep <- mean(mapply(x_in_y, replicate_dataB_list, replicate_dataA_list))

 # ---- overlap, from probabilities ----
 # okay, this depends on whether or not the two are independent
 # Generally, the intersection of A with B (A in B) = p(A|B)*p(B)
 # If we naively assume A & B are independent, then
 # p(A|B) = p(A), and then (A in B) = p(A)*p(B);
 # therefore, (B in A) = p(B)*p(A) = (A in B)
 mean(probs_true_matA*probs_true_matB)
	set.seed(1337)

	mean_true_sppA <- 0.25
	mean_true_sppB <- 0.75
	grid_w <- 5
	grid_h <- 5
	environment <- seq(0.8, 1.2, length.out=grid_w*grid_h) # makes it so the 2 distributions are not independent
	n_reps <- 20

	probs_true_sppA <- plogis(rnorm(grid_wgrid_h, qlogis(mean_true_sppAenvironment))) # the GAM model estimates these values
	probs_true_sppB <- plogis(rnorm(grid_wgrid_h, qlogis(mean_true_sppBenvironment)))
	probs_true_matA <- matrix(probs_true_sppA, nrow=grid_h, ncol=grid_w) # arrange in matrix to emphasize spatial
	probs_true_matB <- matrix(probs_true_sppB, nrow=grid_h, ncol=grid_w)

	generate_realization <- function(p){matrix(rbinom(grid_w*grid_h, size=1, prob=c(p)), nrow=grid_h, ncol=grid_w)}
	data_sppA <- generate_realization(probs_true_sppA) # geographic presence/ absence of species A
	data_sppB <- generate_realization(probs_true_sppB) # p/a species B
	range_true_sppA <- mean(data_sppA) # % occupancy
	range_true_sppB <- mean(data_sppB)
	x_in_y <- function(x, y){mean(x[which(y==1L, arr.ind=TRUE)])}
	A_in_B_data <- x_in_y(data_sppA,data_sppB) # how many sites occupied by B contain A
	B_in_A_data <- x_in_y(data_sppB,data_sppA)


	# ==============================================================================
	# = Does it matter if we calculate overlap or range from 1/0 vs probabilities? =
	# ==============================================================================
	replicate_dataA <- replicate(n_reps, generate_realization(probs_true_matA)) # this is essentially the bootstrapping step
	replicate_dataB <- replicate(n_reps, generate_realization(probs_true_matB))

	replicate_dataA_list <- unlist(apply(replicate_dataA, 3, list), rec=F) # list of matrices, instead of array
	replicate_dataB_list <- unlist(apply(replicate_dataB, 3, list), rec=F)

	# ---- range for each species, from 1/0 ----
	range_repA <- mean(replicate_dataA) # this is what you'd do with the bootstrap/ monte carlo sims to get range
	range_repB <- mean(replicate_dataB)

	# range for each species, from probabilities
	range_probA <- mean(probs_true_matA)
	range_probB <- mean(probs_true_matB)

	# ---- overlap, from 1/0 ----
	A_in_B_rep <- mean(mapply(x_in_y, replicate_dataA_list, replicate_dataB_list)) # this is what you'd do with the boostrap/ monte carlo sims to get overlap
	B_in_A_rep <- mean(mapply(x_in_y, replicate_dataB_list, replicate_dataA_list))

	# ---- overlap, from probabilities ----
	# okay, this depends on whether or not the two are independent
	# Generally, the intersection of A with B (A in B) = p(A\|B)*p(B)
	# If we naively assume A & B are independent, then
	# p(A\|B) = p(A), and then (A in B) = p(A)*p(B);
	# therefore, (B in A) = p(B)*p(A) = (A in B)
	mean(probs_true_matA*probs_true_matB)