Last active
April 27, 2021 13:39
-
-
Save josephwb/02157adac3eddfbf0f297a01b8cb768e to your computer and use it in GitHub Desktop.
like ape::keep.tip, but can also keep internal nodes (which become new tips)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ape); | |
# like keep.tip, but you can keep internal nodes toooooo | |
keep.nodes <- function (phy, nds) { | |
# stats | |
Ntip <- length(phy$tip.label); | |
NEWROOT <- Ntip + 1; | |
Nnode <- phy$Nnode; | |
hasNL <- !is.null(phy$node.label); | |
# first, determine nature of passed in nds | |
# does not support mix of indices and names. pick one! | |
# if labels are passed in rather than indices, convert | |
if (!is.numeric(nds)) { | |
mm <- match(nds, phy$tip.label); | |
if (any(is.na(mm))) { | |
idx <- which(is.na(mm)); | |
if (!hasNL) { | |
stop("Error: nds do not all map to tip.labels, and node.label not present"); | |
} else { | |
nl <- match(nds[idx], phy$node.label); | |
if (any(is.na(nl))) { | |
stop("Failed to match nds to node.label"); | |
} else { | |
# extract node index from label | |
mm[idx] <- nl + Ntip; | |
} | |
} | |
} | |
nds <- mm; | |
} | |
# get all nodes to be retained (including mrcas) | |
allnds <- mrca(phy, full=T)[nds,]; | |
# put in nicer form | |
allnds <- unique(as.numeric(allnds)); | |
# find which edges to retain | |
keep <- which(phy$edge[,1] %in% allnds & phy$edge[,2] %in% allnds); | |
phy$edge <- phy$edge[keep,]; | |
phy$edge.length <- phy$edge.length[keep]; | |
phy$tip.label <- phy$tip.label[phy$edge[,2][phy$edge[,2] <= Ntip]] | |
## find the new terminal edges | |
TERMS <- !(phy$edge[, 2] %in% phy$edge[, 1]); | |
## get the old No. of the nodes and tips that become tips: | |
oldNo.ofNewTips <- phy$edge[TERMS, 2]; | |
n <- length(oldNo.ofNewTips); | |
phy$edge[TERMS, 2] <- rank(phy$edge[TERMS, 2]); | |
# need new tip labels for internal nodes that become terminals | |
# this will use node.label if present, otherwise will be NewTip* | |
node2tip <- oldNo.ofNewTips[oldNo.ofNewTips > Ntip]; | |
new.tip.label <- | |
if (!length(node2tip)) { | |
character(0); | |
} else { | |
if (!hasNL) { | |
paste0("NewTip", seq(1:length(node2tip))); | |
} else { | |
phy$node.label[node2tip - Ntip]; | |
} | |
} | |
phy$tip.label <- c(phy$tip.label, new.tip.label); | |
# get rid of node labels that will not exist in new tree | |
if (!is.null(phy$node.label)) { | |
phy$node.label <- phy$node.label[unique(phy$edge[,1]) - Ntip]; | |
} | |
# reindex everything | |
phy$Nnode <- dim(phy$edge)[1] - n + 1L; | |
newNb <- integer(Ntip + Nnode); | |
newNb[NEWROOT] <- n + 1L; | |
sndcol <- phy$edge[, 2] > n; | |
newNb[sort(phy$edge[sndcol, 2])] <- (n + 2):(n + phy$Nnode); | |
phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]]; | |
phy$edge[, 1] <- newNb[phy$edge[, 1]]; | |
return(phy); | |
} | |
############################################################# | |
## example data | |
# test tree string | |
tstr <- "(((((f:0.1,g:0.1):0.9,e:1.0):0.3,d:1.3):0.4,c:1.7):1.3,b:3.0);"; | |
# tree with node labels (purposely left some blank for testing) | |
tstr2 <- "(((((f:0.1,g:0.1):0.9,e:1.0)h:0.3,d:1.3)i:0.4,c:1.7)a:1.3,b:3.0);"; | |
phy <- read.tree(text=tstr); | |
phy2 <- read.tree(text=tstr2); | |
plot(phy); axisPhylo(); nodelabels(); | |
# objective: keep b, c, and 9 (making this a terminal) | |
############################################################# | |
## example 1: tree with no existing node labels | |
# get the node ids for the terminal you want to keep | |
nds <- match(c("b", "c"), phy$tip.label); | |
# add in the internal nodes to keep. you can find these indices with getMRCA | |
nds <- c(nds, getMRCA(phy, c("d", "g"))); | |
# tree with no node labels | |
fie <- keep.nodes(phy, nds); | |
plot(fie); axisPhylo(); | |
############################################################# | |
## example 2: tree HAS existing node labels ## | |
# label version | |
nds2 <- c("b", "c", "i"); | |
# now, one with node labels | |
fie2 <- keep.nodes(phy2, nds); | |
plot(fie2); axisPhylo(); | |
############################################################# | |
# finally, if your tree does not have node labels, but you do not want | |
# to mess around with MRCAs, give it node labels | |
phy3 <- phy; | |
phy3$node.label <- paste0("Node", seq(1:phy3$Nnode)); | |
# plot the tree to see labels | |
plot(phy3); axisPhylo(); nodelabels(text=phy3$node.label); | |
# let's do this | |
fie3 <- keep.nodes(phy3, c("b", "c", "Node3")); | |
plot(fie3); axisPhylo(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment