Created
June 9, 2016 13:27
-
-
Save philippbayer/ca403353c50710465c97cc5521d6fc60 to your computer and use it in GitHub Desktop.
A small example on how to use mash to make a wrong UPGMA tree
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # download | |
| wget http://www.gutenberg.org/cache/epub/2265/pg2265.txt -O Hamlet.txt | |
| wget http://www.gutenberg.org/cache/epub/1120/pg1120.txt -o Caesar.txt | |
| wget http://www.gutenberg.org/cache/epub/1120/pg1120.txt -O Caesar.txt | |
| wget http://www.gutenberg.org/cache/epub/1121/pg1121.txt -O as_you_like_it.txt | |
| wget http://www.gutenberg.org/cache/epub/1129/pg1129.txt -O macbeth.txt | |
| wget http://www.gutenberg.org/files/5500/5500-0.txt -O Advancement_of_Learning.txt | |
| wget http://www.gutenberg.org/cache/epub/2434/pg2434.txt -O New_Atlantis.txt | |
| wget http://www.gutenberg.org/files/52190/52190-0.txt -O Ecce_homo.txt | |
| wget http://www.gutenberg.org/files/52263/52263-0.txt -O Twilight_of_Idols.txt | |
| wget http://www.gutenberg.org/files/51356/51356-0.txt -O Birth_of_Tragedy.txt | |
| wget http://www.gutenberg.org/cache/epub/38145/pg38145.txt -O Human_All_Too_Human.txt | |
| wget http://www.gutenberg.org/files/23044/23044-0.txt -O Merry_Wives.txt | |
| wget http://www.gutenberg.org/files/22791/22791-0.txt -O Henry_V.txt | |
| # make all sketches, large enough to not get distance=1 | |
| for l in *txt; do ./mash-Linux64-v1.1/mash sketch -a -s 50000 $l; done | |
| # make distances, keep only actual distance (but make sure to look at p-values!) | |
| for l in *msh; do for b in *msh; do ./mash-Linux64-v1.1/mash dist $l $b; done; done | cut -f 1,2,3 > Distance_table.txt | |
| # maybe manually edit titles in Distance_table.txt at this point | |
| # Then run the Rscript | |
| library("phangorn") | |
| a <- read.table("Distance_table.txt", stringsAsFactors=F, sep="\t") | |
| # make into matrix | |
| matrix <- reshape(a, direction="wide", idvar="V2", timevar="V1") | |
| # make into distance matrix, drops labels too for some reason | |
| distance <- as.dist(matrix[,-1], upper=F, diag=F) | |
| # get labels for columns/rows back | |
| attr(distance, "Labels") <- matrix[,1] | |
| # draw tree | |
| png("tree.png") | |
| plot(upgma(distance)) | |
| dev.off() | |
| # frolick |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment