MJacobs1985 · September 30, 2021 18:35
diff --git a/Full simulation b/Full simulation
 #### SIMULATION ESTIMATING THE INFLUENCE OF RANDOMLY SELECTING FISH TAKING INTO ACCOUNT THE EFFECT SIZE ####
 ## Now, lets see what happens if we increase the population mean of a diet (so the true mean, not in a sample)
 ## We keep the variation proportionate to the mean using the coefficient of variation (sd / mean)
 #  First, change columns names of population mean and population sd to make sure we do not make mistakes
 samplemean<-tapply(ARCdata$gain, ARCdata$Tank, mean, na.rm=TRUE)
 samplesd<-tapply(ARCdata$gain, ARCdata$Tank, sd, na.rm=TRUE)
 popmean<-as.array(popmean)
 popsd<-as.array(popsd)
 for (i in 1:12){
  rownames(popmean)[i]<-rownames(samplemean)[i]
  rownames(popsd)[i]<-rownames(samplesd)[i]
 }

 ### Get CV for each of the tanks based on population values
 print(popcv<-(popsd/popmean))

 #### NOW SIMULATE EVERYTHING AND PUT IT IN A CURVE ####
 ##Simulated Power Curve for Effect Size (ES) by number of fish random selected (k):
 es=seq(0,51,1)    # added gain on top of tanks in diet D (0-50 by 1) - this will affect the F-value 
 k=35              # number of maximum fish randomly selected
 n=1000            # number of maximum simulations per k
 ## The number of simulations are of the magnitude es*k*n --> 50*35*100=178500 

 x=array(data=NA, 
        dim=c(n,k,length(es)), 
        dimnames =list(NULL,NULL,es)) # 3-dimensional matrix in which to store the values

 for (q in es) {
  for (j in 1:k){
      for (i in 1:n){
        ARCdatasim<-ARCdata
        ARCdatasim$gain[ARCdatasim$Tank==121]<-rnorm(35, mean=popmean[2]+q, sd=((popmean[2]+q)*popcv[2]))
        ARCdatasim$gain[ARCdatasim$Tank==126]<-rnorm(35, mean=popmean[6]+q, sd=((popmean[6]+q)*popcv[6]))
        ARCdatasim$gain[ARCdatasim$Tank==129]<-rnorm(35, mean=popmean[9]+q, sd=((popmean[9]+q)*popcv[9]))
        ARCdatanew<-lapply(split(ARCdatasim, ARCdatasim$Tank),
                           function(subdf) 
                             subdf[sample(1:nrow(subdf), j, replace=FALSE),]) 
        new<-do.call(rbind, lapply(ARCdatanew, data.frame, stringsAsFactors=FALSE))
        fit<-lm(gain~Diet,data=new)
        x[i,j,q]<-unlist(summary(aov(fit)))[[9]]
              }
      }
 }

 # Check if analyses went well, by looking at meas at different levels
 apply(x,3, mean) # if different then okay

 ## Summarize power values
 power <- function(x){sum(x<=0.05)/n *100}
 powervalue<-as.data.frame(apply(x,c(2:3), FUN=power)) # provides for each effect size the power of each k
 powervalue<-powervalue[,c(0:51)]
 powervalue<-setDT(powervalue, keep.rownames = TRUE)[]
 powervalue<-as.data.frame(powervalue)
 colnames(powervalue)[1]<-"N"
 powervalue<-melt(powervalue)
 colnames(powervalue)[2]<-"ES"
 colnames(powervalue)[3]<-"Power"
 powervalue$ES<-as.numeric(levels(powervalue$ES))[powervalue$ES]
	#### SIMULATION ESTIMATING THE INFLUENCE OF RANDOMLY SELECTING FISH TAKING INTO ACCOUNT THE EFFECT SIZE ####
	## Now, lets see what happens if we increase the population mean of a diet (so the true mean, not in a sample)
	## We keep the variation proportionate to the mean using the coefficient of variation (sd / mean)
	# First, change columns names of population mean and population sd to make sure we do not make mistakes
	samplemean<-tapply(ARCdata$gain, ARCdata$Tank, mean, na.rm=TRUE)
	samplesd<-tapply(ARCdata$gain, ARCdata$Tank, sd, na.rm=TRUE)
	popmean<-as.array(popmean)
	popsd<-as.array(popsd)
	for (i in 1:12){
	rownames(popmean)[i]<-rownames(samplemean)[i]
	rownames(popsd)[i]<-rownames(samplesd)[i]
	}

	### Get CV for each of the tanks based on population values
	print(popcv<-(popsd/popmean))

	#### NOW SIMULATE EVERYTHING AND PUT IT IN A CURVE ####
	##Simulated Power Curve for Effect Size (ES) by number of fish random selected (k):
	es=seq(0,51,1) # added gain on top of tanks in diet D (0-50 by 1) - this will affect the F-value
	k=35 # number of maximum fish randomly selected
	n=1000 # number of maximum simulations per k
	## The number of simulations are of the magnitude eskn --> 5035100=178500

	x=array(data=NA,
	dim=c(n,k,length(es)),
	dimnames =list(NULL,NULL,es)) # 3-dimensional matrix in which to store the values

	for (q in es) {
	for (j in 1:k){
	for (i in 1:n){
	ARCdatasim<-ARCdata
	ARCdatasim$gain[ARCdatasim$Tank==121]<-rnorm(35, mean=popmean[2]+q, sd=((popmean[2]+q)*popcv[2]))
	ARCdatasim$gain[ARCdatasim$Tank==126]<-rnorm(35, mean=popmean[6]+q, sd=((popmean[6]+q)*popcv[6]))
	ARCdatasim$gain[ARCdatasim$Tank==129]<-rnorm(35, mean=popmean[9]+q, sd=((popmean[9]+q)*popcv[9]))
	ARCdatanew<-lapply(split(ARCdatasim, ARCdatasim$Tank),
	function(subdf)
	subdf[sample(1:nrow(subdf), j, replace=FALSE),])
	new<-do.call(rbind, lapply(ARCdatanew, data.frame, stringsAsFactors=FALSE))
	fit<-lm(gain~Diet,data=new)
	x[i,j,q]<-unlist(summary(aov(fit)))[[9]]
	}
	}
	}

	# Check if analyses went well, by looking at meas at different levels
	apply(x,3, mean) # if different then okay

	## Summarize power values
	power <- function(x){sum(x<=0.05)/n *100}
	powervalue<-as.data.frame(apply(x,c(2:3), FUN=power)) # provides for each effect size the power of each k
	powervalue<-powervalue[,c(0:51)]
	powervalue<-setDT(powervalue, keep.rownames = TRUE)[]
	powervalue<-as.data.frame(powervalue)
	colnames(powervalue)[1]<-"N"
	powervalue<-melt(powervalue)
	colnames(powervalue)[2]<-"ES"
	colnames(powervalue)[3]<-"Power"
	powervalue$ES<-as.numeric(levels(powervalue$ES))[powervalue$ES]