Skip to content

Instantly share code, notes, and snippets.

@benmarwick
Last active March 9, 2018 20:02
Show Gist options
  • Save benmarwick/6081694 to your computer and use it in GitHub Desktop.
Save benmarwick/6081694 to your computer and use it in GitHub Desktop.
Phylogenetic Analysis of Material Culture in R
#
# preliminaries...
# get R: http://www.r-project.org/
# get RStudio: http://www.rstudio.org/
#
# useful websites for ideas and instructions...
# http://cran.r-project.org/web/views/Phylogenetics.html annotated list of relevant R packages
# http://www.r-phylo.org/wiki/Main_Page
# http://bodegaphylo.wikispot.org/Phylogenetics_and_Comparative_Methods_in_R
# https://nunn.rc.fas.harvard.edu/groups/anthrotree2011/wiki/76142/Lecture_Materials.html
# https://nunn.rc.fas.harvard.edu/groups/anthrotree2010/wiki/83f37/Lecture_materials.html
# http://fishlab.ucdavis.edu/?page_id=111
# http://www.eeb.uconn.edu/eebedia/index.php/Phylogenetics:_Syllabus
#
# Prepare a NEX file by typing the character state matrix into text editor
# and import this NEX file to the R workspace
#
# clear the workspace
rm(list = ls())
library(phangorn) # load library phangorn 'Phylogenetic analysis in R'
b <- read.nexus.data("infileb12.txt.nex") # make sure this nex file is in wd
b.pd <- phyDat(b, type="USER", levels=c(0:2)) # convert nex to phyDat format
b.pd #check what went in....
7 sequences with 17 character and 17 different site patterns.
The states are 0 1 2
#
# looks good so far
# get some basic attributes of the data and make some trees
#
# returns the parsimony score of a tree
b.pd.pscore <- pratchet(b.pd,np=100, method="fitch")
# maximum parsimony score (pscore) = 36
#
# returns all most parsimonious trees in an object of class multiPhylo
b.pd.bab <- bab(b.pd)
#
# plots all most parsimonious trees
library(ape)
plot(b.bab, type="phylogram", layout=length(b.bab))
#
# find all trees, exhaustive and branch & bound MP searches
# and returns parsimony scores
library(phytools)
b.pd.e <- exhaustiveMP(b.pd, method="exhaustive")
# searches 945 trees, keeps 3
# find length of the kept trees
b.pd.e.pscores <- NULL
for(i in 1:length(b.pd.e))
{
b.pd.e.pscores[i] <- attr(b.pd.e[[i]],"pscore")
}
b.pd.e.pscores
# all are 36 long
#
# find all trees, another method
b.pd.bb <- exhaustiveMP(b.pd, method="branch.and.bound")
# returns 3 phylogenetic trees, how long are they?
b.pd.bb.pscores <- NULL
for(i in 1:length(b.pd.bb))
{
b.pd.bb.pscores[i] <- attr(b.pd.bb[[i]],"pscore")
}
b.pd.bb.pscores
# the same, 36
#
# have a look a the trees resulting from the exhaustive search... they look good, best one is 3 because it appears to be rooted consistent with stratigraphic data
plot(b.pd.e, type="phylogram", layout=4)
#
# get parsimony score of the first tree
attr(b.pd.e[[3]],"pscore")
# 36 again
# another method to get parsimony score of the first tree
parsimony(b.pd.e[[3]],b.pd)
# still 36
#
# get consistency index for that tree
CI(b.pd.e[[3]],b.pd)
# 0.7222222
#
# get retention index for that tree
RI(b.pd.e[[3]],b.pd)
# 0.6
# get tip labels for modeling
tips <- b.pd.e[[3]]$tip.label
#NEXUS
Begin taxa;
Dimensions ntax=7;
taxlabels
taxaDV
taxaKH
taxaTH
taxaSU
taxaAY
taxaLA
taxaAX
;
end;
begin characters;
Dimensions nchar=17;
Format symbols = "012";
matrix
taxaDV 10000001001220011
taxaKH 20000011011200200
taxaTH 20110101010220010
taxaSU 12102222121212011
taxaAY 20112222120221001
taxaLA 10122112121221002
taxaAX 20212212221211222
;
End;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment