richfitz · August 16, 2013 03:25
diff --git a/asr.R b/asr.R
 # There are a number of problems here:

 # 1. I don't see how to convert from states above 50% probability to
 # number of events

 # 2. What ace returns are conditional likelihoods, not the marginal
 # likelihoods (which is what you need for ASR).  The conditional
 # likelihood is just the probability of the data descended from that
 # node, not the probability that the node is in that state, taking
 # into account the *other* nodes in the tree.  I've not managed to
 # cook up a really convincing case of why they're different, but
 # you'll see a quantitative difference, if not a qualitative one,
 # here.

 # 3. You should not do reconstructions conditional on the ML
 # parameters (or if you *do* do that, recognise that's what you're
 # doing).  Far better is to average over the paramter uncertainty.

 library(diversitree) # can install off CRAN

 set.seed(1)
 # Simulate a tree under a yule (pure birth) model where trait
 # evolution from 0->1 is more likely than the reverse.  100 species in
 # the tree.
 phy <- tree.bisse(c(.1, .1, 0, 0, .02, .01), max.taxa=100)

 # Extract and plot the true history:
 h <- history.from.sim.discrete(phy, 0:1)
 plot(h, phy)

 # Little function to process the history to count the changes.
 count.changes <- function(obj) {
  ret <- table(unlist(lapply(obj$history, function(x) diff(x[,2]))))
  ret <- ret[c("-1", "1")]
  ret[is.na(ret)] <- 0
  names(ret) <- c("1to0", "0to1")
  ret[2:1]
 }

 # 9 gains and 2 losses of state 1 (red in plot)
 count.changes(h)

 # Fit the model with ACE
 fit.ace <- ace(phy$tip.state, phy, "discrete", model="ARD")

 # Reconstructed states:
 plot(h, phy)
 nodelabels(thermo=fit.ace$lik.anc[,1], piecol=c(1, 2), cex=.5, adj=0)

 # Run an actual ASR.  First, fit the model with diversitree's
 # functions (should give basically the same answer)
 lik <- make.mk2(phy, phy$tip.state)
 fit <- find.mle(lik, fit.ace$rates, root=ROOT.FLAT)

 # Estimated ancestral states:
 st.est <- asr.marginal(lik, coef(fit))[1,]

 # The numbers that ace produces are on the left, that marginal
 # ancestral state reconstruction produces are on the right.
 plot(h, phy)
 nodelabels(thermo=fit.ace$lik.anc[,1], piecol=c(1, 2), cex=.5, adj=0)
 nodelabels(thermo=st.est, piecol=c(1, 2), cex=.5, adj=1)

 plot(st.est ~ fit.ace$lik.anc[,1], xlab="Conditional likelihood",
     ylab="Marginal likelihood")
 abline(0, 1)

 # What you probably want to do to is stochastic character mapping, if
 # you really want to try and count changes.  This function will do
 # stochastic ancestral state reconstruction for the model defined
 # above:
 asr <- make.asr.stoch(lik)

 # This gives an estimated number of gains and losses that is not
 # terrible
 set.seed(1)
 count.changes(asr(coef(fit)))

 # Repeat a bunch of times -- this is the estimated number of
 # gains/losses *conditional on the ML parameters*.
 ans <- replicate(1000, count.changes(asr(coef(fit))))
 # Averaged over a number of trials, this actually looks quite good:
 rowMeans(ans)

 # Things get more awkward if you average over parameter uncertainty.
 # I'm using an exponential prior on the rate of trait evolution, and a
 # proposal width w that won't actually affect the rate of chain mixing
 # much.
 prior <- make.prior.exponential(1 / 0.05)
 samples <- mcmc(lik, coef(fit), 10000, prior=prior, w=.05,
                print.every=500)
 ans.mc <- apply(coef(samples), 1, function(x) count.changes(asr(x)))
 # We're guessing more 1->0 transitions now, because there is
 # uncertainty about the rate estimation.
 rowMeans(ans.mc)

 ## Quick squizz at the estimated number of changes
 nn <- apply(ans.mc, 1, table)
 plot(nn[[1]], xlim=c(0, 20),
     panel.last=abline(v=count.changes(h), col=c(1, 2)))
 lines(nn[[1]], type="l", lwd=1, lty=3)
 lines(nn[[2]], col="red")
 lines(nn[[2]], col="red", type="l", lwd=1, lty=3)
	# There are a number of problems here:

	# 1. I don't see how to convert from states above 50% probability to
	# number of events

	# 2. What ace returns are conditional likelihoods, not the marginal
	# likelihoods (which is what you need for ASR). The conditional
	# likelihood is just the probability of the data descended from that
	# node, not the probability that the node is in that state, taking
	# into account the other nodes in the tree. I've not managed to
	# cook up a really convincing case of why they're different, but
	# you'll see a quantitative difference, if not a qualitative one,
	# here.

	# 3. You should not do reconstructions conditional on the ML
	# parameters (or if you do do that, recognise that's what you're
	# doing). Far better is to average over the paramter uncertainty.

	library(diversitree) # can install off CRAN

	set.seed(1)
	# Simulate a tree under a yule (pure birth) model where trait
	# evolution from 0->1 is more likely than the reverse. 100 species in
	# the tree.
	phy <- tree.bisse(c(.1, .1, 0, 0, .02, .01), max.taxa=100)

	# Extract and plot the true history:
	h <- history.from.sim.discrete(phy, 0:1)
	plot(h, phy)

	# Little function to process the history to count the changes.
	count.changes <- function(obj) {
	ret <- table(unlist(lapply(obj$history, function(x) diff(x[,2]))))
	ret <- ret[c("-1", "1")]
	ret[is.na(ret)] <- 0
	names(ret) <- c("1to0", "0to1")
	ret[2:1]
	}

	# 9 gains and 2 losses of state 1 (red in plot)
	count.changes(h)

	# Fit the model with ACE
	fit.ace <- ace(phy$tip.state, phy, "discrete", model="ARD")

	# Reconstructed states:
	plot(h, phy)
	nodelabels(thermo=fit.ace$lik.anc[,1], piecol=c(1, 2), cex=.5, adj=0)

	# Run an actual ASR. First, fit the model with diversitree's
	# functions (should give basically the same answer)
	lik <- make.mk2(phy, phy$tip.state)
	fit <- find.mle(lik, fit.ace$rates, root=ROOT.FLAT)

	# Estimated ancestral states:
	st.est <- asr.marginal(lik, coef(fit))[1,]

	# The numbers that ace produces are on the left, that marginal
	# ancestral state reconstruction produces are on the right.
	plot(h, phy)
	nodelabels(thermo=fit.ace$lik.anc[,1], piecol=c(1, 2), cex=.5, adj=0)
	nodelabels(thermo=st.est, piecol=c(1, 2), cex=.5, adj=1)

	plot(st.est ~ fit.ace$lik.anc[,1], xlab="Conditional likelihood",
	ylab="Marginal likelihood")
	abline(0, 1)

	# What you probably want to do to is stochastic character mapping, if
	# you really want to try and count changes. This function will do
	# stochastic ancestral state reconstruction for the model defined
	# above:
	asr <- make.asr.stoch(lik)

	# This gives an estimated number of gains and losses that is not
	# terrible
	set.seed(1)
	count.changes(asr(coef(fit)))

	# Repeat a bunch of times -- this is the estimated number of
	# gains/losses conditional on the ML parameters.
	ans <- replicate(1000, count.changes(asr(coef(fit))))
	# Averaged over a number of trials, this actually looks quite good:
	rowMeans(ans)

	# Things get more awkward if you average over parameter uncertainty.
	# I'm using an exponential prior on the rate of trait evolution, and a
	# proposal width w that won't actually affect the rate of chain mixing
	# much.
	prior <- make.prior.exponential(1 / 0.05)
	samples <- mcmc(lik, coef(fit), 10000, prior=prior, w=.05,
	print.every=500)
	ans.mc <- apply(coef(samples), 1, function(x) count.changes(asr(x)))
	# We're guessing more 1->0 transitions now, because there is
	# uncertainty about the rate estimation.
	rowMeans(ans.mc)

	## Quick squizz at the estimated number of changes
	nn <- apply(ans.mc, 1, table)
	plot(nn[[1]], xlim=c(0, 20),
	panel.last=abline(v=count.changes(h), col=c(1, 2)))
	lines(nn[[1]], type="l", lwd=1, lty=3)
	lines(nn[[2]], col="red")
	lines(nn[[2]], col="red", type="l", lwd=1, lty=3)
No results found