Created
August 16, 2013 03:25
-
-
Save richfitz/6247033 to your computer and use it in GitHub Desktop.
Quick and dirty ancestral state reconstruction and change counting
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # There are a number of problems here: | |
| # 1. I don't see how to convert from states above 50% probability to | |
| # number of events | |
| # 2. What ace returns are conditional likelihoods, not the marginal | |
| # likelihoods (which is what you need for ASR). The conditional | |
| # likelihood is just the probability of the data descended from that | |
| # node, not the probability that the node is in that state, taking | |
| # into account the *other* nodes in the tree. I've not managed to | |
| # cook up a really convincing case of why they're different, but | |
| # you'll see a quantitative difference, if not a qualitative one, | |
| # here. | |
| # 3. You should not do reconstructions conditional on the ML | |
| # parameters (or if you *do* do that, recognise that's what you're | |
| # doing). Far better is to average over the paramter uncertainty. | |
| library(diversitree) # can install off CRAN | |
| set.seed(1) | |
| # Simulate a tree under a yule (pure birth) model where trait | |
| # evolution from 0->1 is more likely than the reverse. 100 species in | |
| # the tree. | |
| phy <- tree.bisse(c(.1, .1, 0, 0, .02, .01), max.taxa=100) | |
| # Extract and plot the true history: | |
| h <- history.from.sim.discrete(phy, 0:1) | |
| plot(h, phy) | |
| # Little function to process the history to count the changes. | |
| count.changes <- function(obj) { | |
| ret <- table(unlist(lapply(obj$history, function(x) diff(x[,2])))) | |
| ret <- ret[c("-1", "1")] | |
| ret[is.na(ret)] <- 0 | |
| names(ret) <- c("1to0", "0to1") | |
| ret[2:1] | |
| } | |
| # 9 gains and 2 losses of state 1 (red in plot) | |
| count.changes(h) | |
| # Fit the model with ACE | |
| fit.ace <- ace(phy$tip.state, phy, "discrete", model="ARD") | |
| # Reconstructed states: | |
| plot(h, phy) | |
| nodelabels(thermo=fit.ace$lik.anc[,1], piecol=c(1, 2), cex=.5, adj=0) | |
| # Run an actual ASR. First, fit the model with diversitree's | |
| # functions (should give basically the same answer) | |
| lik <- make.mk2(phy, phy$tip.state) | |
| fit <- find.mle(lik, fit.ace$rates, root=ROOT.FLAT) | |
| # Estimated ancestral states: | |
| st.est <- asr.marginal(lik, coef(fit))[1,] | |
| # The numbers that ace produces are on the left, that marginal | |
| # ancestral state reconstruction produces are on the right. | |
| plot(h, phy) | |
| nodelabels(thermo=fit.ace$lik.anc[,1], piecol=c(1, 2), cex=.5, adj=0) | |
| nodelabels(thermo=st.est, piecol=c(1, 2), cex=.5, adj=1) | |
| plot(st.est ~ fit.ace$lik.anc[,1], xlab="Conditional likelihood", | |
| ylab="Marginal likelihood") | |
| abline(0, 1) | |
| # What you probably want to do to is stochastic character mapping, if | |
| # you really want to try and count changes. This function will do | |
| # stochastic ancestral state reconstruction for the model defined | |
| # above: | |
| asr <- make.asr.stoch(lik) | |
| # This gives an estimated number of gains and losses that is not | |
| # terrible | |
| set.seed(1) | |
| count.changes(asr(coef(fit))) | |
| # Repeat a bunch of times -- this is the estimated number of | |
| # gains/losses *conditional on the ML parameters*. | |
| ans <- replicate(1000, count.changes(asr(coef(fit)))) | |
| # Averaged over a number of trials, this actually looks quite good: | |
| rowMeans(ans) | |
| # Things get more awkward if you average over parameter uncertainty. | |
| # I'm using an exponential prior on the rate of trait evolution, and a | |
| # proposal width w that won't actually affect the rate of chain mixing | |
| # much. | |
| prior <- make.prior.exponential(1 / 0.05) | |
| samples <- mcmc(lik, coef(fit), 10000, prior=prior, w=.05, | |
| print.every=500) | |
| ans.mc <- apply(coef(samples), 1, function(x) count.changes(asr(x))) | |
| # We're guessing more 1->0 transitions now, because there is | |
| # uncertainty about the rate estimation. | |
| rowMeans(ans.mc) | |
| ## Quick squizz at the estimated number of changes | |
| nn <- apply(ans.mc, 1, table) | |
| plot(nn[[1]], xlim=c(0, 20), | |
| panel.last=abline(v=count.changes(h), col=c(1, 2))) | |
| lines(nn[[1]], type="l", lwd=1, lty=3) | |
| lines(nn[[2]], col="red") | |
| lines(nn[[2]], col="red", type="l", lwd=1, lty=3) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment