willpearse · June 20, 2019 19:29
diff --git a/macroevol_classic_intro.R b/macroevol_classic_intro.R
 # Packages
 library(geiger)
 library(caper)
 library(phytools)

 # Simulate phylogeny and some traits
 tree <- sim.bdtree(n=100)
 traits <- sim.char(tree, .01, nsim=7)[,1,]

 # Fuss around to make a discrete character
 traits[,7] <- as.numeric(traits[,7] > 1)
 traits <- as.data.frame(traits)
 names(traits) <- letters[seq_len(ncol(traits))]
 traits$g <- ifelse(traits$g==1, "present", "absent")

 # Make a comparative.data object to house everything
 # - I do this because you absolutely do not want to get your phylogeny and trait data out of alignment. I realise that it makes the code below look a little worse at times (c.data$data$a instead of data$a), but is that really a big price to pay for being sure of your results?...
 traits$species <- rownames(traits)
 c.data <- comparative.data(tree, traits, species)

 # Run a phylogenetic and a non-phylogenetic PCA
 # - Notice we're dropping the seventh column, which is not numeric
 pca <- prcomp(c.data$data[,-7], center=TRUE, scale=TRUE)
 p.pca <- phyl.pca(c.data$phy, c.data$data[,-7], method="lambda")

 par(mfrow=c(1,2))
 biplot(pca)
 biplot(p.pca)
 #...Remember that absolute values don't matter here, but the relative
 #   loadings do (i.e., reversed signs of arrows don't matter, it's the
 #   correlations between traits that matter)

 # Estimate phylogenetic signal in individual traits in lots of ways
 white.noise <- fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), model="white") # (No signal)
 brownian.motion <- fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), model="BM") # Random drift
 pagel.lam <- fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), model="lambda") # As BM, but allows for intermediate between no signal and random
 pagel.delta <- fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), model="delta") # Change through time
 pagel.kappa <- fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), model="kappa") # Adaptive radiation (requires good sampling to phylogeny)
 orn.uhlen <- fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), model="OU") # Ornstein Uhlenbeck; could be adpative evolution, or could be constraint
 early.burst <- fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), model="EB") # Sort of like a Delta
 trend <- fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), model="trend") # Also sort of like a Delta...
 #... you can look at the outputs yourself by typing their names
 #... you can also extract the OPTimised model parameters like this
 white.noise
 white.noise$opt

 sort(setNames(sapply(list(white.noise, brownian.motion, pagel.lam, pagel.delta, pagel.kappa, orn.uhlen), function(x) x$opt$aic), c("white.noise", "brownian.motion", "pagel.lam", "pagel.delta", "pagel.kappa", "orn.uhlen")))
 #...and use AIC to find the best-fitting model for your traits (lowest AIC value)
 #...note that, here it's Brownian motion, because that's how I simulated the traits!

 # Estimate signal in discrete traits
 fitDiscrete(tree, setNames(c.data$data$g, rownames(c.data$data)), transform="lambda")
 #...gives you Pagel's Lambda (...or delta, or...) values to understand whether phylogeny matters
 #...the Q matrix is the rate of transitions between states. You can fit more complex versions of this; check the help file (and check among them with AIC as above)

 # Incorporating standard errors in trait estimates using fitContinuous and fitDiscrete
 imaginary.se <- runif(nrow(c.data$data))
 fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), SE=setNames(imaginary.se,rownames(c.data$data)), model="BM")
 #...remember that R has no SE function, so here is one:
 calc.se <- function(x) x / sqrt(length(x))
 #...or something similar.

 # Calculating Blomberg's K (and Pagel's Lambda) in phytools
 # - because these aren't models, but rather metrics, fitContinuous won't do this for you
 phylosig(c.data$phy, c.data$data$a, "K", test=TRUE)
 phylosig(c.data$phy, c.data$data$a, "lambda", se=imaginary.se)

 # Use Fritz's D as essentially Pagel's Lambda but for binary traits
 phylo.d(c.data, binvar=g)
 #...0 -- > BM, 1 --> random. Surprise, it's indistinguishable from BM (Felsenstein's threshold model) because... that's how I simulated it :D

 # Advanced: multivariate models of trait evolution
 library(mvMORPH)
 mvBM(c.data$phy, c.data$data[,-7])
 #...this gives the rates of independent (diagonals) and correlated (off-diagonals) of traits. Complex, but fun to play with (and takes a while to fit!!!).
	# Packages
	library(geiger)
	library(caper)
	library(phytools)

	# Simulate phylogeny and some traits
	tree <- sim.bdtree(n=100)
	traits <- sim.char(tree, .01, nsim=7)[,1,]

	# Fuss around to make a discrete character
	traits[,7] <- as.numeric(traits[,7] > 1)
	traits <- as.data.frame(traits)
	names(traits) <- letters[seq_len(ncol(traits))]
	traits$g <- ifelse(traits$g==1, "present", "absent")

	# Make a comparative.data object to house everything
	# - I do this because you absolutely do not want to get your phylogeny and trait data out of alignment. I realise that it makes the code below look a little worse at times (c.data$data$a instead of data$a), but is that really a big price to pay for being sure of your results?...
	traits$species <- rownames(traits)
	c.data <- comparative.data(tree, traits, species)

	# Run a phylogenetic and a non-phylogenetic PCA
	# - Notice we're dropping the seventh column, which is not numeric
	pca <- prcomp(c.data$data[,-7], center=TRUE, scale=TRUE)
	p.pca <- phyl.pca(c.data$phy, c.data$data[,-7], method="lambda")

	par(mfrow=c(1,2))
	biplot(pca)
	biplot(p.pca)
	#...Remember that absolute values don't matter here, but the relative
	# loadings do (i.e., reversed signs of arrows don't matter, it's the
	# correlations between traits that matter)

	# Estimate phylogenetic signal in individual traits in lots of ways
	white.noise <- fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), model="white") # (No signal)
	brownian.motion <- fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), model="BM") # Random drift
	pagel.lam <- fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), model="lambda") # As BM, but allows for intermediate between no signal and random
	pagel.delta <- fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), model="delta") # Change through time
	pagel.kappa <- fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), model="kappa") # Adaptive radiation (requires good sampling to phylogeny)
	orn.uhlen <- fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), model="OU") # Ornstein Uhlenbeck; could be adpative evolution, or could be constraint
	early.burst <- fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), model="EB") # Sort of like a Delta
	trend <- fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), model="trend") # Also sort of like a Delta...
	#... you can look at the outputs yourself by typing their names
	#... you can also extract the OPTimised model parameters like this
	white.noise
	white.noise$opt

	sort(setNames(sapply(list(white.noise, brownian.motion, pagel.lam, pagel.delta, pagel.kappa, orn.uhlen), function(x) x$opt$aic), c("white.noise", "brownian.motion", "pagel.lam", "pagel.delta", "pagel.kappa", "orn.uhlen")))
	#...and use AIC to find the best-fitting model for your traits (lowest AIC value)
	#...note that, here it's Brownian motion, because that's how I simulated the traits!

	# Estimate signal in discrete traits
	fitDiscrete(tree, setNames(c.data$data$g, rownames(c.data$data)), transform="lambda")
	#...gives you Pagel's Lambda (...or delta, or...) values to understand whether phylogeny matters
	#...the Q matrix is the rate of transitions between states. You can fit more complex versions of this; check the help file (and check among them with AIC as above)

	# Incorporating standard errors in trait estimates using fitContinuous and fitDiscrete
	imaginary.se <- runif(nrow(c.data$data))
	fitContinuous(c.data$phy, setNames(c.data$data$a,rownames(c.data$data)), SE=setNames(imaginary.se,rownames(c.data$data)), model="BM")
	#...remember that R has no SE function, so here is one:
	calc.se <- function(x) x / sqrt(length(x))
	#...or something similar.

	# Calculating Blomberg's K (and Pagel's Lambda) in phytools
	# - because these aren't models, but rather metrics, fitContinuous won't do this for you
	phylosig(c.data$phy, c.data$data$a, "K", test=TRUE)
	phylosig(c.data$phy, c.data$data$a, "lambda", se=imaginary.se)

	# Use Fritz's D as essentially Pagel's Lambda but for binary traits
	phylo.d(c.data, binvar=g)
	#...0 -- > BM, 1 --> random. Surprise, it's indistinguishable from BM (Felsenstein's threshold model) because... that's how I simulated it :D

	# Advanced: multivariate models of trait evolution
	library(mvMORPH)
	mvBM(c.data$phy, c.data$data[,-7])
	#...this gives the rates of independent (diagonals) and correlated (off-diagonals) of traits. Complex, but fun to play with (and takes a while to fit!!!).