anpefi · September 22, 2015 08:33
diff --git a/AMOVA3states.R b/AMOVA3states.R
 ### AMOVA between the three methylation types obtained by the package msap


 # AMOVA requires a distance matrix between all samples. 
 # Such distance matrix could be only be built with quantitative data or with binary data. 
 # In the case of three category variables, we cannot set a distance between two samples 
 # (is distance between pattern I and III larger than between II and III?). 
 # This is why population studies of MSAP needs to join patterns II and III as methylated in order to 
 # build a binary variable to be analysed by AMOVA/PCoA. 
 # Anyway, we can made a hack *assuming that distance between different states is equal to 1 
 # (independently of which states they are)*. 
 # The interpretation of such a distance matrix would be tricky as you give the same value to I-II difference 
 # that to II-III or I-III. 
 # For this we could use the Gower (1971) dissimilarity for mixed variables and then apply the 
 # AMOVA function implmented in msap.

 ### HOW TO USE
 # 1. run msap for your data and store it in an object (i.e. test1): 
 #       test1 <- msap("YOURDATA.csv", name="TEST", do.pcoa = F, do.amova = F, do.cluster = F, no.bands = "u", do.shannon = F )
 # 2. Source this script and run the AMOVA3states() function on test1
 #	source("AMOVA3states.R")
 #	AMOVA3states(test1)



 #There is a function in the package FD to compute Gower dissimilairty (gowdis)
 require(FD)  
 # We will use the function diffAmova from msap, although it is not exported
 # in the msap NAMESPACE, so we should source it from my web:
 source(file = "http://webs.uvigo.es/anpefi/scripts/diffAmova.R" )

 #We also need an adaptation of the pcoa() function
 pcoa <- function(DM, groups, name){
  ntt <- length(levels(groups))
  #PCoA
  pcol <- cmdscale(DM, eig=T)

 	var <- pcol$eig / sum(pcol$eig) *100
 	var
 	var1 <- round(var[1], digits=1)
 	var2 <- round(var[2], digits=1)

 	pcol$points<-as.data.frame(pcol$points)
 	spcoo<-split(pcol$points, groups)
  maxX <- max(pcol$points$V1)
 	minX <- min(pcol$points$V1)
 	maxY <- max(pcol$points$V2)
 	minY <- min(pcol$points$V2)

 	par(bty = 'n')
 	plot(0,0, main=name, type = "n",xlab=paste("C1 (",var1,"%)"),
       ylab=paste("C2 (",var2,"%)"), 
       xlim=c(minX-10^floor(log10(abs(minX))),
              maxX+10^floor(log10(abs(maxX)))), 
       ylim=c(minY-10^floor(log10(abs(minY))),
              maxY+10^floor(log10(abs(maxY)))), 
       frame=TRUE, cex=1.5)
 	bgcolors<-rainbow(ntt+1)[-2] #No yellow?
 	symbs <- c(21,22,23,24,25) #What if we have > 7 groups???
 	for(i in 1:ntt){
 		points(spcoo[[i]], pch=21, col="black", bg=bgcolors[i])
 	}	
 	s.class(pcol$points, groups, cpoint=0, 
          col=bgcolors, add.plot=TRUE)
 	

 }

 # Built a function wrap to take methylation patterns 
 # and obtain the Gower similarity and the AMOVA
 AMOVA3states <- function(L){
  kk<-do.call(rbind,L$patterns)
  kk[kk=="u"]<- 1
  kk[kk=="h"]<- 2
  kk[kk=="i"]<- 3
  kk[kk=="f"]<- NA
  diffAmova(gowdis(kk), L$groups, 4, T)
  pcoa(gowdis(kk), L$groups, "PCoA" )
 }
	### AMOVA between the three methylation types obtained by the package msap


	# AMOVA requires a distance matrix between all samples.
	# Such distance matrix could be only be built with quantitative data or with binary data.
	# In the case of three category variables, we cannot set a distance between two samples
	# (is distance between pattern I and III larger than between II and III?).
	# This is why population studies of MSAP needs to join patterns II and III as methylated in order to
	# build a binary variable to be analysed by AMOVA/PCoA.
	# Anyway, we can made a hack *assuming that distance between different states is equal to 1
	# (independently of which states they are)*.
	# The interpretation of such a distance matrix would be tricky as you give the same value to I-II difference
	# that to II-III or I-III.
	# For this we could use the Gower (1971) dissimilarity for mixed variables and then apply the
	# AMOVA function implmented in msap.

	### HOW TO USE
	# 1. run msap for your data and store it in an object (i.e. test1):
	# test1 <- msap("YOURDATA.csv", name="TEST", do.pcoa = F, do.amova = F, do.cluster = F, no.bands = "u", do.shannon = F )
	# 2. Source this script and run the AMOVA3states() function on test1
	# source("AMOVA3states.R")
	# AMOVA3states(test1)



	#There is a function in the package FD to compute Gower dissimilairty (gowdis)
	require(FD)
	# We will use the function diffAmova from msap, although it is not exported
	# in the msap NAMESPACE, so we should source it from my web:
	source(file = "http://webs.uvigo.es/anpefi/scripts/diffAmova.R" )

	#We also need an adaptation of the pcoa() function
	pcoa <- function(DM, groups, name){
	ntt <- length(levels(groups))
	#PCoA
	pcol <- cmdscale(DM, eig=T)

	var <- pcol$eig / sum(pcol$eig) *100
	var
	var1 <- round(var[1], digits=1)
	var2 <- round(var[2], digits=1)

	pcol$points<-as.data.frame(pcol$points)
	spcoo<-split(pcol$points, groups)
	maxX <- max(pcol$points$V1)
	minX <- min(pcol$points$V1)
	maxY <- max(pcol$points$V2)
	minY <- min(pcol$points$V2)

	par(bty = 'n')
	plot(0,0, main=name, type = "n",xlab=paste("C1 (",var1,"%)"),
	ylab=paste("C2 (",var2,"%)"),
	xlim=c(minX-10^floor(log10(abs(minX))),
	maxX+10^floor(log10(abs(maxX)))),
	ylim=c(minY-10^floor(log10(abs(minY))),
	maxY+10^floor(log10(abs(maxY)))),
	frame=TRUE, cex=1.5)
	bgcolors<-rainbow(ntt+1)[-2] #No yellow?
	symbs <- c(21,22,23,24,25) #What if we have > 7 groups???
	for(i in 1:ntt){
	points(spcoo[[i]], pch=21, col="black", bg=bgcolors[i])
	}
	s.class(pcol$points, groups, cpoint=0,
	col=bgcolors, add.plot=TRUE)


	}

	# Built a function wrap to take methylation patterns
	# and obtain the Gower similarity and the AMOVA
	AMOVA3states <- function(L){
	kk<-do.call(rbind,L$patterns)
	kk[kk=="u"]<- 1
	kk[kk=="h"]<- 2
	kk[kk=="i"]<- 3
	kk[kk=="f"]<- NA
	diffAmova(gowdis(kk), L$groups, 4, T)
	pcoa(gowdis(kk), L$groups, "PCoA" )
	}