|
#' covstop: a function to implement coverage-based stopping |
|
#' |
|
#' @param comm a site (rows)-by-species (columns) data.frame |
|
#' @param type whether the output should be in number of individuals ("individuals") or in |
|
#' number of sampling units ("samples") |
|
#' @param Cn desired level of coverage |
|
#' @param se whether bootstrapped standard errors should be returned |
|
#' @param knots a number of knots of computation, default is 40 |
|
#' |
|
#' @return the number of individuals or samples required to meet the desired level of coverage |
|
#' |
|
#' @examples |
|
#' comm <- sample(100) |
|
#' |
|
#' comm[sample(1:100, 50, replace = F)] <- 0 |
|
#' |
|
#' comm <- matrix(comm, nrow = 10) |
|
#' |
|
#' covstop(comm) |
|
#' |
|
covstop <- function(comm, type = "samples", Cn = 0.90, se = FALSE, knots = 40) { |
|
|
|
if(Cn > 1) stop("Coverage level must be between (0, 1)") |
|
|
|
if(type == "samples") comm <- data.frame(N = nrow(comm), t(colSums(comm > 0))) |
|
|
|
comm <- split(comm, 1:nrow(comm)) |
|
|
|
sc <- sapply(comm, function(Spec) { |
|
|
|
Spec <- as.numeric(Spec) |
|
|
|
endpoint <- 2 * sum(Spec) |
|
|
|
if(type == "individuals") { |
|
|
|
n <- sum(Spec) |
|
|
|
if (endpoint <= n) { |
|
|
|
m <- floor(seq(1, endpoint, length = floor(knots))) |
|
|
|
} else { |
|
|
|
m <- c(floor(seq(1, sum(Spec) - 1, length.out = floor(knots/2) - 1)), sum(Spec), floor(seq(sum(Spec) + 1, to = endpoint, length.out = floor(knots/2)))) |
|
|
|
} |
|
|
|
m <- c(1, m[-1]) |
|
|
|
C <- Chat.Ind(Spec, m) |
|
|
|
# if(se == TRUE) { |
|
# |
|
# Prob.hat <- EstiBootComm.Ind(Spec) |
|
# |
|
# Abun.Mat <- rmultinom(nboot, n, Prob.hat) |
|
# |
|
# error.C <- qnorm(1 - (1 - 0.95) / 2) * apply(apply(Abun.Mat, 2, function(x) Chat.Ind(x, m)), 1, sd, na.rm = TRUE) |
|
# |
|
# } |
|
|
|
} else if(type == "samples") { |
|
|
|
nT <- Spec[1] |
|
|
|
if(endpoint <= nT) { |
|
|
|
m <- floor(seq(1, endpoint, length.out = floor(knots))) |
|
|
|
} else { |
|
|
|
m <- c(floor(seq(1, nT - 1, length.out = floor(knots/2) - 1)), nT, floor(seq(nT + 1, to = endpoint, length.out = floor(knots/2)))) |
|
|
|
} |
|
|
|
m <- c(1, m[-1]) |
|
|
|
C <- Chat.Sam(Spec, m) |
|
|
|
} |
|
|
|
|
|
m[which.min(C <= Cn)] |
|
|
|
} ) |
|
|
|
# if(se == TRUE) |
|
# |
|
# paste0("Number of ", type, " needed to reach ", Cn*100, "% coverage: ", as.numeric(sc[which.max(sc)]), "+/-", as.numeric(error.C)) |
|
|
|
x <- as.numeric(sc[which.max(sc)]) |
|
|
|
# print(paste0("Number of ", type, " needed to reach ", Cn*100, "% coverage: ", x)) |
|
|
|
x |
|
|
|
} |
|
|
|
### helper functions from iNEXT |
|
### credit: T.C. Hsieh |
|
Chat.Ind <- function (x, m) { |
|
x <- x[x > 0] |
|
n <- sum(x) |
|
f1 <- sum(x == 1) |
|
f2 <- sum(x == 2) |
|
f0.hat <- ifelse(f2 == 0, (n - 1)/n * f1 * (f1 - 1)/2, (n - |
|
1)/n * f1^2/2/f2) |
|
A <- ifelse(f1 > 0, n * f0.hat/(n * f0.hat + f1), 1) |
|
Sub <- function(m) { |
|
if (m < n) { |
|
xx <- x[(n - x) >= m] |
|
out <- 1 - sum(xx/n * exp(lgamma(n - xx + 1) - lgamma(n - |
|
xx - m + 1) - lgamma(n) + lgamma(n - m))) |
|
} |
|
if (m == n) |
|
out <- 1 - f1/n * A |
|
if (m > n) |
|
out <- 1 - f1/n * A^(m - n + 1) |
|
out |
|
} |
|
sapply(m, Sub) |
|
} |
|
|
|
Chat.Sam <- function(x, t){ |
|
nT <- x[1] |
|
y <- x[-1] |
|
y <- y[y>0] |
|
U <- sum(y) |
|
Q1 <- sum(y == 1) |
|
Q2 <- sum(y == 2) |
|
Q0.hat <- ifelse(Q2 == 0, (nT - 1) / nT * Q1 * (Q1 - 1) / 2, (nT - 1) / nT * Q1 ^ 2/ 2 / Q2) #estimation of unseen species via Chao2 |
|
A <- ifelse(Q1>0, nT*Q0.hat/(nT*Q0.hat+Q1), 1) |
|
Sub <- function(t){ |
|
if(t < nT) { |
|
yy <- y[(nT-y)>=t] |
|
out <- 1 - sum(yy / U * exp(lgamma(nT-yy+1)-lgamma(nT-yy-t+1)-lgamma(nT)+lgamma(nT-t))) |
|
} |
|
#if(t < nT) out <- 1 - sum(y / U * exp(lchoose(nT - y, t) - lchoose(nT - 1, t))) |
|
if(t == nT) out <- 1 - Q1 / U * A |
|
if(t > nT) out <- 1 - Q1 / U * A^(t - nT + 1) |
|
out |
|
} |
|
sapply(t, Sub) |
|
} |
|
|
|
EstiBootComm.Ind <- function(Spec) |
|
{ |
|
Sobs <- sum(Spec > 0) #observed species |
|
n <- sum(Spec) #sample size |
|
f1 <- sum(Spec == 1) #singleton |
|
f2 <- sum(Spec == 2) #doubleton |
|
f0.hat <- ifelse(f2 == 0, (n - 1) / n * f1 * (f1 - 1) / 2, (n - 1) / n * f1 ^ 2/ 2 / f2) #estimation of unseen species via Chao1 |
|
A <- ifelse(f1>0, n*f0.hat/(n*f0.hat+f1), 1) |
|
a <- f1/n*A |
|
b <- sum(Spec / n * (1 - Spec / n) ^ n) |
|
if(f0.hat==0){ |
|
w <- 0 |
|
if(sum(Spec>0)==1){ |
|
warning("This site has only one species. Estimation is not robust.") |
|
} |
|
}else{ |
|
w <- a / b #adjusted factor for rare species in the sample |
|
} |
|
Prob.hat <- Spec / n * (1 - w * (1 - Spec / n) ^ n) #estimation of relative abundance of observed species in the sample |
|
Prob.hat.Unse <- rep(a/ceiling(f0.hat), ceiling(f0.hat)) #estimation of relative abundance of unseen species in the sample |
|
return(sort(c(Prob.hat, Prob.hat.Unse), decreasing=TRUE)) #Output: a vector of estimated relative abundance |
|
} |
|
|
|
EstiBootComm.Sam <- function(Spec) |
|
{ |
|
nT <- Spec[1] |
|
Spec <- Spec[-1] |
|
Sobs <- sum(Spec > 0) #observed species |
|
Q1 <- sum(Spec == 1) #singleton |
|
Q2 <- sum(Spec == 2) #doubleton |
|
Q0.hat <- ifelse(Q2 == 0, (nT - 1) / nT * Q1 * (Q1 - 1) / 2, (nT - 1) / nT * Q1 ^ 2/ 2 / Q2) #estimation of unseen species via Chao2 |
|
A <- ifelse(Q1>0, nT*Q0.hat/(nT*Q0.hat+Q1), 1) |
|
a <- Q1/nT*A |
|
b <- sum(Spec / nT * (1 - Spec / nT) ^ nT) |
|
|
|
if(Q0.hat==0){ |
|
w <- 0 |
|
if(sum(Spec>0)==1){ |
|
warning("This site has only one species. Estimation is not robust.") |
|
} |
|
}else{ |
|
w <- a / b #adjusted factor for rare species in the sample |
|
} |
|
|
|
Prob.hat <- Spec / nT * (1 - w * (1 - Spec / nT) ^ nT) #estimation of detection probability of observed species in the sample |
|
Prob.hat.Unse <- rep(a/ceiling(Q0.hat), ceiling(Q0.hat)) #estimation of detection probability of unseen species in the sample |
|
return(sort(c(Prob.hat, Prob.hat.Unse), decreasing=TRUE)) #Output: a vector of estimated detection probability |
|
} |