jslefche · February 17, 2021 14:29
diff --git a/README.md b/README.md
diff --git a/covstop.R b/covstop.R
 #' covstop: a function to implement coverage-based stopping
 #' 
 #' @param comm a site (rows)-by-species (columns) data.frame
 #' @param type whether the output should be in number of individuals ("individuals") or in
 #' number of sampling units ("samples")
 #' @param Cn desired level of coverage
 #' @param se whether bootstrapped standard errors should be returned
 #' @param knots a number of knots of computation, default is 40
 #' 
 #' @return the number of individuals or samples required to meet the desired level of coverage
 #' 
 #' @examples
 #' comm <- sample(100)
 #' 
 #' comm[sample(1:100, 50, replace = F)] <- 0
 #' 
 #' comm <- matrix(comm, nrow = 10)
 #' 
 #' covstop(comm)
 #' 
 covstop <- function(comm, type = "samples", Cn = 0.90, se = FALSE, knots = 40) {
  
  if(Cn > 1) stop("Coverage level must be between (0, 1)")
  
  if(type == "samples") comm <- data.frame(N = nrow(comm), t(colSums(comm > 0)))
  
  comm <- split(comm, 1:nrow(comm))
  
  sc <- sapply(comm, function(Spec) {
    
    Spec <- as.numeric(Spec)
    
    endpoint <- 2 * sum(Spec)
  
    if(type == "individuals") {
      
      n <- sum(Spec)
      
      if (endpoint <= n) {
        
          m <- floor(seq(1, endpoint, length = floor(knots)))
          
        } else {
          
          m <- c(floor(seq(1, sum(Spec) - 1, length.out = floor(knots/2) - 1)), sum(Spec), floor(seq(sum(Spec) + 1, to = endpoint, length.out = floor(knots/2))))
          
          }
      
      m <- c(1, m[-1])
      
      C <- Chat.Ind(Spec, m)
      
      # if(se == TRUE) {
      #   
      #   Prob.hat <- EstiBootComm.Ind(Spec)
      #   
      #   Abun.Mat <- rmultinom(nboot, n, Prob.hat)
      #   
      #   error.C <- qnorm(1 - (1 - 0.95) / 2) * apply(apply(Abun.Mat, 2, function(x) Chat.Ind(x, m)), 1, sd, na.rm = TRUE)
      #   
      # }
        
    } else if(type == "samples") {
      
      nT <- Spec[1]
      
      if(endpoint <= nT) {
        
          m <- floor(seq(1, endpoint, length.out = floor(knots)))
          
          } else {
            
            m <- c(floor(seq(1, nT - 1, length.out = floor(knots/2) - 1)), nT, floor(seq(nT + 1, to = endpoint, length.out = floor(knots/2))))
            
            }
      
        m <- c(1, m[-1])
 
      C <- Chat.Sam(Spec, m)
      
    }
    
    
    m[which.min(C <= Cn)]
    
  } )
  
  # if(se == TRUE) 
  #   
  #   paste0("Number of ", type, " needed to reach ", Cn*100, "% coverage: ", as.numeric(sc[which.max(sc)]), "+/-", as.numeric(error.C))
  
  x <- as.numeric(sc[which.max(sc)])
  
  # print(paste0("Number of ", type, " needed to reach ", Cn*100, "% coverage: ", x))
  
  x

 }

 ### helper functions from iNEXT 
 ### credit: T.C. Hsieh
 Chat.Ind <- function (x, m) {
  x <- x[x > 0]
  n <- sum(x)
  f1 <- sum(x == 1)
  f2 <- sum(x == 2)
  f0.hat <- ifelse(f2 == 0, (n - 1)/n * f1 * (f1 - 1)/2, (n - 
                                                            1)/n * f1^2/2/f2)
  A <- ifelse(f1 > 0, n * f0.hat/(n * f0.hat + f1), 1)
  Sub <- function(m) {
    if (m < n) {
      xx <- x[(n - x) >= m]
      out <- 1 - sum(xx/n * exp(lgamma(n - xx + 1) - lgamma(n - 
                                                              xx - m + 1) - lgamma(n) + lgamma(n - m)))
    }
    if (m == n) 
      out <- 1 - f1/n * A
    if (m > n) 
      out <- 1 - f1/n * A^(m - n + 1)
    out
  }
  sapply(m, Sub)
 }

 Chat.Sam <- function(x, t){
  nT <- x[1]
  y <- x[-1]
  y <- y[y>0]
  U <- sum(y)
  Q1 <- sum(y == 1)
  Q2 <- sum(y == 2)
  Q0.hat <- ifelse(Q2 == 0, (nT - 1) / nT * Q1 * (Q1 - 1) / 2, (nT - 1) / nT * Q1 ^ 2/ 2 / Q2)  #estimation of unseen species via Chao2
  A <- ifelse(Q1>0, nT*Q0.hat/(nT*Q0.hat+Q1), 1)
  Sub <- function(t){
    if(t < nT) {
      yy <- y[(nT-y)>=t]
      out <- 1 - sum(yy / U * exp(lgamma(nT-yy+1)-lgamma(nT-yy-t+1)-lgamma(nT)+lgamma(nT-t)))     
    }
    #if(t < nT) out <- 1 - sum(y / U * exp(lchoose(nT - y, t) - lchoose(nT - 1, t)))
    if(t == nT) out <- 1 - Q1 / U * A
    if(t > nT) out <- 1 - Q1 / U * A^(t - nT + 1)
    out
  }
  sapply(t, Sub)		
 }

 EstiBootComm.Ind <- function(Spec)
 {
  Sobs <- sum(Spec > 0)   #observed species
  n <- sum(Spec)        #sample size
  f1 <- sum(Spec == 1)   #singleton 
  f2 <- sum(Spec == 2)   #doubleton
  f0.hat <- ifelse(f2 == 0, (n - 1) / n * f1 * (f1 - 1) / 2, (n - 1) / n * f1 ^ 2/ 2 / f2)  #estimation of unseen species via Chao1
  A <- ifelse(f1>0, n*f0.hat/(n*f0.hat+f1), 1)
  a <- f1/n*A
  b <- sum(Spec / n * (1 - Spec / n) ^ n)
  if(f0.hat==0){
    w <- 0
    if(sum(Spec>0)==1){
      warning("This site has only one species. Estimation is not robust.")
    }
  }else{
    w <- a / b      	#adjusted factor for rare species in the sample
  }
  Prob.hat <- Spec / n * (1 - w * (1 - Spec / n) ^ n)					#estimation of relative abundance of observed species in the sample
  Prob.hat.Unse <- rep(a/ceiling(f0.hat), ceiling(f0.hat))  	#estimation of relative abundance of unseen species in the sample
  return(sort(c(Prob.hat, Prob.hat.Unse), decreasing=TRUE))		  							#Output: a vector of estimated relative abundance
 }

 EstiBootComm.Sam <- function(Spec)
 {
  nT <- Spec[1]
  Spec <- Spec[-1]
  Sobs <- sum(Spec > 0)   #observed species
  Q1 <- sum(Spec == 1) 	#singleton 
  Q2 <- sum(Spec == 2) 	#doubleton
  Q0.hat <- ifelse(Q2 == 0, (nT - 1) / nT * Q1 * (Q1 - 1) / 2, (nT - 1) / nT * Q1 ^ 2/ 2 / Q2)	#estimation of unseen species via Chao2
  A <- ifelse(Q1>0, nT*Q0.hat/(nT*Q0.hat+Q1), 1)
  a <- Q1/nT*A
  b <- sum(Spec / nT * (1 - Spec / nT) ^ nT)
  
  if(Q0.hat==0){
    w <- 0
    if(sum(Spec>0)==1){
      warning("This site has only one species. Estimation is not robust.")
    }
  }else{
    w <- a / b      	#adjusted factor for rare species in the sample
  }
  
  Prob.hat <- Spec / nT * (1 - w * (1 - Spec / nT) ^ nT)					#estimation of detection probability of observed species in the sample
  Prob.hat.Unse <- rep(a/ceiling(Q0.hat), ceiling(Q0.hat))  	#estimation of detection probability of unseen species in the sample
  return(sort(c(Prob.hat, Prob.hat.Unse), decreasing=TRUE))									#Output: a vector of estimated detection probability
 }
	#' covstop: a function to implement coverage-based stopping
	#'
	#' @param comm a site (rows)-by-species (columns) data.frame
	#' @param type whether the output should be in number of individuals ("individuals") or in
	#' number of sampling units ("samples")
	#' @param Cn desired level of coverage
	#' @param se whether bootstrapped standard errors should be returned
	#' @param knots a number of knots of computation, default is 40
	#'
	#' @return the number of individuals or samples required to meet the desired level of coverage
	#'
	#' @examples
	#' comm <- sample(100)
	#'
	#' comm[sample(1:100, 50, replace = F)] <- 0
	#'
	#' comm <- matrix(comm, nrow = 10)
	#'
	#' covstop(comm)
	#'
	covstop <- function(comm, type = "samples", Cn = 0.90, se = FALSE, knots = 40) {

	if(Cn > 1) stop("Coverage level must be between (0, 1)")

	if(type == "samples") comm <- data.frame(N = nrow(comm), t(colSums(comm > 0)))

	comm <- split(comm, 1:nrow(comm))

	sc <- sapply(comm, function(Spec) {

	Spec <- as.numeric(Spec)

	endpoint <- 2 * sum(Spec)

	if(type == "individuals") {

	n <- sum(Spec)

	if (endpoint <= n) {

	m <- floor(seq(1, endpoint, length = floor(knots)))

	} else {

	m <- c(floor(seq(1, sum(Spec) - 1, length.out = floor(knots/2) - 1)), sum(Spec), floor(seq(sum(Spec) + 1, to = endpoint, length.out = floor(knots/2))))

	}

	m <- c(1, m[-1])

	C <- Chat.Ind(Spec, m)

	# if(se == TRUE) {
	#
	# Prob.hat <- EstiBootComm.Ind(Spec)
	#
	# Abun.Mat <- rmultinom(nboot, n, Prob.hat)
	#
	# error.C <- qnorm(1 - (1 - 0.95) / 2) * apply(apply(Abun.Mat, 2, function(x) Chat.Ind(x, m)), 1, sd, na.rm = TRUE)
	#
	# }

	} else if(type == "samples") {

	nT <- Spec[1]

	if(endpoint <= nT) {

	m <- floor(seq(1, endpoint, length.out = floor(knots)))

	} else {

	m <- c(floor(seq(1, nT - 1, length.out = floor(knots/2) - 1)), nT, floor(seq(nT + 1, to = endpoint, length.out = floor(knots/2))))

	}

	m <- c(1, m[-1])

	C <- Chat.Sam(Spec, m)

	}


	m[which.min(C <= Cn)]

	} )

	# if(se == TRUE)
	#
	# paste0("Number of ", type, " needed to reach ", Cn*100, "% coverage: ", as.numeric(sc[which.max(sc)]), "+/-", as.numeric(error.C))

	x <- as.numeric(sc[which.max(sc)])

	# print(paste0("Number of ", type, " needed to reach ", Cn*100, "% coverage: ", x))

	x

	}

	### helper functions from iNEXT
	### credit: T.C. Hsieh
	Chat.Ind <- function (x, m) {
	x <- x[x > 0]
	n <- sum(x)
	f1 <- sum(x == 1)
	f2 <- sum(x == 2)
	f0.hat <- ifelse(f2 == 0, (n - 1)/n * f1 * (f1 - 1)/2, (n -
	1)/n * f1^2/2/f2)
	A <- ifelse(f1 > 0, n * f0.hat/(n * f0.hat + f1), 1)
	Sub <- function(m) {
	if (m < n) {
	xx <- x[(n - x) >= m]
	out <- 1 - sum(xx/n * exp(lgamma(n - xx + 1) - lgamma(n -
	xx - m + 1) - lgamma(n) + lgamma(n - m)))
	}
	if (m == n)
	out <- 1 - f1/n * A
	if (m > n)
	out <- 1 - f1/n * A^(m - n + 1)
	out
	}
	sapply(m, Sub)
	}

	Chat.Sam <- function(x, t){
	nT <- x[1]
	y <- x[-1]
	y <- y[y>0]
	U <- sum(y)
	Q1 <- sum(y == 1)
	Q2 <- sum(y == 2)
	Q0.hat <- ifelse(Q2 == 0, (nT - 1) / nT * Q1 * (Q1 - 1) / 2, (nT - 1) / nT * Q1 ^ 2/ 2 / Q2) #estimation of unseen species via Chao2
	A <- ifelse(Q1>0, nTQ0.hat/(nTQ0.hat+Q1), 1)
	Sub <- function(t){
	if(t < nT) {
	yy <- y[(nT-y)>=t]
	out <- 1 - sum(yy / U * exp(lgamma(nT-yy+1)-lgamma(nT-yy-t+1)-lgamma(nT)+lgamma(nT-t)))
	}
	#if(t < nT) out <- 1 - sum(y / U * exp(lchoose(nT - y, t) - lchoose(nT - 1, t)))
	if(t == nT) out <- 1 - Q1 / U * A
	if(t > nT) out <- 1 - Q1 / U * A^(t - nT + 1)
	out
	}
	sapply(t, Sub)
	}

	EstiBootComm.Ind <- function(Spec)
	{
	Sobs <- sum(Spec > 0) #observed species
	n <- sum(Spec) #sample size
	f1 <- sum(Spec == 1) #singleton
	f2 <- sum(Spec == 2) #doubleton
	f0.hat <- ifelse(f2 == 0, (n - 1) / n * f1 * (f1 - 1) / 2, (n - 1) / n * f1 ^ 2/ 2 / f2) #estimation of unseen species via Chao1
	A <- ifelse(f1>0, nf0.hat/(nf0.hat+f1), 1)
	a <- f1/n*A
	b <- sum(Spec / n * (1 - Spec / n) ^ n)
	if(f0.hat==0){
	w <- 0
	if(sum(Spec>0)==1){
	warning("This site has only one species. Estimation is not robust.")
	}
	}else{
	w <- a / b #adjusted factor for rare species in the sample
	}
	Prob.hat <- Spec / n * (1 - w * (1 - Spec / n) ^ n) #estimation of relative abundance of observed species in the sample
	Prob.hat.Unse <- rep(a/ceiling(f0.hat), ceiling(f0.hat)) #estimation of relative abundance of unseen species in the sample
	return(sort(c(Prob.hat, Prob.hat.Unse), decreasing=TRUE)) #Output: a vector of estimated relative abundance
	}

	EstiBootComm.Sam <- function(Spec)
	{
	nT <- Spec[1]
	Spec <- Spec[-1]
	Sobs <- sum(Spec > 0) #observed species
	Q1 <- sum(Spec == 1) #singleton
	Q2 <- sum(Spec == 2) #doubleton
	Q0.hat <- ifelse(Q2 == 0, (nT - 1) / nT * Q1 * (Q1 - 1) / 2, (nT - 1) / nT * Q1 ^ 2/ 2 / Q2) #estimation of unseen species via Chao2
	A <- ifelse(Q1>0, nTQ0.hat/(nTQ0.hat+Q1), 1)
	a <- Q1/nT*A
	b <- sum(Spec / nT * (1 - Spec / nT) ^ nT)

	if(Q0.hat==0){
	w <- 0
	if(sum(Spec>0)==1){
	warning("This site has only one species. Estimation is not robust.")
	}
	}else{
	w <- a / b #adjusted factor for rare species in the sample
	}

	Prob.hat <- Spec / nT * (1 - w * (1 - Spec / nT) ^ nT) #estimation of detection probability of observed species in the sample
	Prob.hat.Unse <- rep(a/ceiling(Q0.hat), ceiling(Q0.hat)) #estimation of detection probability of unseen species in the sample
	return(sort(c(Prob.hat, Prob.hat.Unse), decreasing=TRUE)) #Output: a vector of estimated detection probability
	}