benmarwick · March 9, 2018 20:02
diff --git a/Buddha-cladistics.R b/Buddha-cladistics.R
 Phylogenetic Analysis of Material Culture in R

 #
 # preliminaries... 
 # get R: http://www.r-project.org/
 # get RStudio: http://www.rstudio.org/
 #
 # useful websites for ideas and instructions...
 # http://cran.r-project.org/web/views/Phylogenetics.html annotated list of relevant R packages
 # http://www.r-phylo.org/wiki/Main_Page
 # http://bodegaphylo.wikispot.org/Phylogenetics_and_Comparative_Methods_in_R
 # https://nunn.rc.fas.harvard.edu/groups/anthrotree2011/wiki/76142/Lecture_Materials.html
 # https://nunn.rc.fas.harvard.edu/groups/anthrotree2010/wiki/83f37/Lecture_materials.html
 # http://fishlab.ucdavis.edu/?page_id=111
 # http://www.eeb.uconn.edu/eebedia/index.php/Phylogenetics:_Syllabus
 #
 # Prepare a NEX file by typing the character state matrix into text editor 
 # and import this NEX file to the R workspace
 #
 # clear the workspace
 rm(list = ls())
 library(phangorn) # load library phangorn 'Phylogenetic analysis in R'
 b <- read.nexus.data("infileb12.txt.nex") # make sure this nex file is in wd
 b.pd <- phyDat(b, type="USER", levels=c(0:2)) # convert nex to phyDat format
 b.pd #check what went in....

 7 sequences with 17 character and 17 different site patterns.
 The states are 0 1 2
 #
 # looks good so far
 # get some basic attributes of the data and make some trees
 #
 # returns the parsimony score of a tree
 b.pd.pscore <- pratchet(b.pd,np=100, method="fitch") 
 #  maximum parsimony score (pscore) = 36
 #
 # returns all most parsimonious trees in an object of class multiPhylo
 b.pd.bab <- bab(b.pd) 
 #
 # plots all most parsimonious trees
 library(ape)
 plot(b.bab, type="phylogram", layout=length(b.bab)) 
 #
 # find all trees, exhaustive and branch & bound MP searches
 # and returns parsimony scores
 library(phytools)
 b.pd.e <- exhaustiveMP(b.pd, method="exhaustive")
 # searches 945 trees, keeps 3
 # find length of the kept trees
 b.pd.e.pscores <- NULL
 for(i in 1:length(b.pd.e))
    {
    b.pd.e.pscores[i] <- attr(b.pd.e[[i]],"pscore")
    }
 b.pd.e.pscores
 # all are 36 long
 #
 # find all trees, another method
 b.pd.bb <- exhaustiveMP(b.pd, method="branch.and.bound") 
 # returns 3 phylogenetic trees, how long are they?
 b.pd.bb.pscores <- NULL
 for(i in 1:length(b.pd.bb))
    {
    b.pd.bb.pscores[i] <- attr(b.pd.bb[[i]],"pscore")
    }
 b.pd.bb.pscores
 # the same, 36
 #
 #  have a look a the trees resulting from the exhaustive search... they look good, best one is 3 because it appears to be rooted consistent with stratigraphic data
 plot(b.pd.e, type="phylogram", layout=4) 
 #
 #  get parsimony score of the first tree
 attr(b.pd.e[[3]],"pscore") 
 # 36 again
 # another method to get parsimony score of the first tree
 parsimony(b.pd.e[[3]],b.pd) 
 # still 36
 #
 # get consistency index for that tree
 CI(b.pd.e[[3]],b.pd)
 # 0.7222222
 #
 # get retention index for that tree
 RI(b.pd.e[[3]],b.pd) 
 # 0.6

 # get tip labels for modeling
 tips <- b.pd.e[[3]]$tip.label


diff --git a/infileb12.txt.nex b/infileb12.txt.nex
 #NEXUS
 Begin taxa;
 Dimensions ntax=7;
 taxlabels

 taxaDV    
 taxaKH    
 taxaTH    
 taxaSU    
 taxaAY    
 taxaLA    
 taxaAX
 ;
 end;
 begin characters;
 Dimensions nchar=17;
 Format symbols = "012";
 matrix
 taxaDV    10000001001220011
 taxaKH    20000011011200200
 taxaTH    20110101010220010
 taxaSU    12102222121212011
 taxaAY    20112222120221001
 taxaLA    10122112121221002
 taxaAX    20212212221211222
 ;
 End;
	Phylogenetic Analysis of Material Culture in R

	#
	# preliminaries...
	# get R: http://www.r-project.org/
	# get RStudio: http://www.rstudio.org/
	#
	# useful websites for ideas and instructions...
	# http://cran.r-project.org/web/views/Phylogenetics.html annotated list of relevant R packages
	# http://www.r-phylo.org/wiki/Main_Page
	# http://bodegaphylo.wikispot.org/Phylogenetics_and_Comparative_Methods_in_R
	# https://nunn.rc.fas.harvard.edu/groups/anthrotree2011/wiki/76142/Lecture_Materials.html
	# https://nunn.rc.fas.harvard.edu/groups/anthrotree2010/wiki/83f37/Lecture_materials.html
	# http://fishlab.ucdavis.edu/?page_id=111
	# http://www.eeb.uconn.edu/eebedia/index.php/Phylogenetics:_Syllabus
	#
	# Prepare a NEX file by typing the character state matrix into text editor
	# and import this NEX file to the R workspace
	#
	# clear the workspace
	rm(list = ls())
	library(phangorn) # load library phangorn 'Phylogenetic analysis in R'
	b <- read.nexus.data("infileb12.txt.nex") # make sure this nex file is in wd
	b.pd <- phyDat(b, type="USER", levels=c(0:2)) # convert nex to phyDat format
	b.pd #check what went in....

	7 sequences with 17 character and 17 different site patterns.
	The states are 0 1 2
	#
	# looks good so far
	# get some basic attributes of the data and make some trees
	#
	# returns the parsimony score of a tree
	b.pd.pscore <- pratchet(b.pd,np=100, method="fitch")
	# maximum parsimony score (pscore) = 36
	#
	# returns all most parsimonious trees in an object of class multiPhylo
	b.pd.bab <- bab(b.pd)
	#
	# plots all most parsimonious trees
	library(ape)
	plot(b.bab, type="phylogram", layout=length(b.bab))
	#
	# find all trees, exhaustive and branch & bound MP searches
	# and returns parsimony scores
	library(phytools)
	b.pd.e <- exhaustiveMP(b.pd, method="exhaustive")
	# searches 945 trees, keeps 3
	# find length of the kept trees
	b.pd.e.pscores <- NULL
	for(i in 1:length(b.pd.e))
	{
	b.pd.e.pscores[i] <- attr(b.pd.e[[i]],"pscore")
	}
	b.pd.e.pscores
	# all are 36 long
	#
	# find all trees, another method
	b.pd.bb <- exhaustiveMP(b.pd, method="branch.and.bound")
	# returns 3 phylogenetic trees, how long are they?
	b.pd.bb.pscores <- NULL
	for(i in 1:length(b.pd.bb))
	{
	b.pd.bb.pscores[i] <- attr(b.pd.bb[[i]],"pscore")
	}
	b.pd.bb.pscores
	# the same, 36
	#
	# have a look a the trees resulting from the exhaustive search... they look good, best one is 3 because it appears to be rooted consistent with stratigraphic data
	plot(b.pd.e, type="phylogram", layout=4)
	#
	# get parsimony score of the first tree
	attr(b.pd.e[[3]],"pscore")
	# 36 again
	# another method to get parsimony score of the first tree
	parsimony(b.pd.e[[3]],b.pd)
	# still 36
	#
	# get consistency index for that tree
	CI(b.pd.e[[3]],b.pd)
	# 0.7222222
	#
	# get retention index for that tree
	RI(b.pd.e[[3]],b.pd)
	# 0.6

	# get tip labels for modeling
	tips <- b.pd.e[[3]]$tip.label
	#NEXUS
	Begin taxa;
	Dimensions ntax=7;
	taxlabels

	taxaDV
	taxaKH
	taxaTH
	taxaSU
	taxaAY
	taxaLA
	taxaAX
	;
	end;
	begin characters;
	Dimensions nchar=17;
	Format symbols = "012";
	matrix
	taxaDV 10000001001220011
	taxaKH 20000011011200200
	taxaTH 20110101010220010
	taxaSU 12102222121212011
	taxaAY 20112222120221001
	taxaLA 10122112121221002
	taxaAX 20212212221211222
	;
	End;