Skip to content

Instantly share code, notes, and snippets.

@avallecam
Last active September 3, 2024 12:50
Show Gist options
  • Save avallecam/4670a8430f01a3d2290a2a3f2e3f104e to your computer and use it in GitHub Desktop.
Save avallecam/4670a8430f01a3d2290a2a3f2e3f104e to your computer and use it in GitHub Desktop.
phylogenetics: get the uncertainty for each node from a most-recent-common-ancertor (MRCA) estimation with {ape} and {treedater}
# pak::pak("treedater")

library(treedater)
#> Loading required package: ape
#> Loading required package: limSolve
library(tidyverse)

# make a random tree
tre <- ape::rtree(10)
# simulate sample times based on distance from root to tip:
sts <- setNames( ape::node.depth.edgelength(tre)[1:ape::Ntip(tre)], tre$tip.label)
# modify edge length to represent evolutionary distance with rate 1e-3:
tre$edge.length <- tre$edge.length * 1e-3

ggtree::ggtree(tre)

# treedater: 
td <- dater(tre = tre, sts = sts*100, s=1000, clock='strict', omega0=.0015)
#> Note: Minimum temporal branch length  (*minblen*) set to 2.097628202755. Increase *minblen* in the event of convergence failures.
#> Warning in dater(tre = tre, sts = sts * 100, s = 1000, clock = "strict", :
#> omega0 provided incompatible with numStartConditions > 0. Setting
#> numStartConditions to zero.
#> Tree is rooted. Not estimating root position.
# note: I multiplied sample times to get integer values

datertree <- td$intree
datertree$edge.length <- td$edge.length
ggtree::ggtree(datertree)

# parametric bootstrap: 
pb <- parboot(td, nreps=25)
#> Running in quiet mode. To print progress, set quiet=FALSE.
#> NOTE: Running with overrideSearchRoot will speed up execution but may underestimate variance.
#> NOTE: Running with overrideTempConstraint will speed up execution but may underestimate variance. Bootstrap tree replicates may have negative branch lengths.
# time of common ancestor
pb
#>                            pseudo ML        2.5 %       97.5 %
#> Time of common ancestor 8.448801e+01 5.341243e+01 8.448801e+01
#> Mean substitution rate  4.832436e-05 3.760551e-05 6.209843e-05
#> 
#>  For more detailed output, $trees provides a list of each fit to each simulation
pb$timeOfMRCA_CI
#>     2.5%    97.5% 
#> 53.41243 84.48801
# plot lineages through time
plot(pb)

19 elements = 10 tips and 9 internal nodes

tre_ntip <- ape::Ntip(tre)
tre_nnode <- ape::Nnode(tre)
tre_all <- tre_ntip + tre_nnode

25 replicates

# x <- 17
# sapply( pb$trees, function(tr) tr$timeOfMRCA + node.depth.edgelength(tr)[x] ) |>
#   quantile(prob=c(.025, .975))

out <- NULL
for (x in 1:tre_all) {
  new <- sapply( pb$trees, function(tr) tr$timeOfMRCA + node.depth.edgelength(tr)[x] ) |>
    quantile(prob=c(.025, .975)) %>% as_tibble() %>% mutate(element = x)
  out <- bind_rows(out,new)
  # out %>% print()
}

out %>% 
  group_by(element) %>% 
  mutate(value_min = min(value)) %>% 
  ungroup() %>% 
  mutate(value = as.character(value)) %>% 
  filter(!magrittr::is_in(element,1:tre_ntip)) %>% 
  arrange(value_min) %>% 
  print(n=Inf)
#> # A tibble: 18 × 3
#>    value            element value_min
#>    <chr>              <int>     <dbl>
#>  1 53.4124256271064      11      53.4
#>  2 84.488007707987       11      53.4
#>  3 68.1757009803594      17      68.2
#>  4 86.585635910742       17      68.2
#>  5 70.8420537365323      18      70.8
#>  6 88.683264113497       18      70.8
#>  7 112.003506340798      19     112. 
#>  8 147.141767984722      19     112. 
#>  9 120.818052033801      12     121. 
#> 10 137.292099660262      12     121. 
#> 11 128.566465760904      15     129. 
#> 12 139.389727863017      15     129. 
#> 13 156.951521054786      16     157. 
#> 14 210.067560752628      16     157. 
#> 15 167.536981710331      13     168. 
#> 16 201.498920742888      13     168. 
#> 17 223.480924229751      14     223. 
#> 18 240.923771162052      14     223.

the uncertainty of nodes is listed after the tips for that reason, we drop all the rows from 1 to ‘number of tips’ we arrange the values in ascendant order and the first value match the MRCA

pb$timeOfMRCA_CI
#>     2.5%    97.5% 
#> 53.41243 84.48801
# end

Created on 2024-09-03 with reprex v2.1.0

@avallecam
Copy link
Author

Easy copy and paste code

# pak::pak("treedater")

library(treedater)
library(tidyverse)

# make a random tree
tre <- ape::rtree(10)
# simulate sample times based on distance from root to tip:
sts <- setNames( ape::node.depth.edgelength(tre)[1:ape::Ntip(tre)], tre$tip.label)
# modify edge length to represent evolutionary distance with rate 1e-3:
tre$edge.length <- tre$edge.length * 1e-3

ggtree::ggtree(tre)

# treedater: 
td <- dater(tre = tre, sts = sts*100, s=1000, clock='strict', omega0=.0015)
# note: I multiplied sample times to get integer values

datertree <- td$intree
datertree$edge.length <- td$edge.length
ggtree::ggtree(datertree)

# parametric bootstrap: 
pb <- parboot(td, nreps=25)
# time of common ancestor
pb
pb$timeOfMRCA_CI
# plot lineages through time
plot(pb)

#' 19 elements = 10 tips and 9 internal nodes
tre_ntip <- ape::Ntip(tre)
tre_nnode <- ape::Nnode(tre)
tre_all <- tre_ntip + tre_nnode
#' 25 replicates

# x <- 17
# sapply( pb$trees, function(tr) tr$timeOfMRCA + node.depth.edgelength(tr)[x] ) |>
#   quantile(prob=c(.025, .975))

out <- NULL
for (x in 1:tre_all) {
  new <- sapply( pb$trees, function(tr) tr$timeOfMRCA + node.depth.edgelength(tr)[x] ) |>
    quantile(prob=c(.025, .975)) %>% as_tibble() %>% mutate(element = x)
  out <- bind_rows(out,new)
  # out %>% print()
}

out %>% 
  group_by(element) %>% 
  mutate(value_min = min(value)) %>% 
  ungroup() %>% 
  mutate(value = as.character(value)) %>% 
  filter(!magrittr::is_in(element,1:tre_ntip)) %>% 
  arrange(value_min) %>% 
  print(n=Inf)

#' the uncertainty of nodes is listed after the tips
#' for that reason, we drop all the rows from 1 to 'number of tips'
#' we arrange the values in ascendant order
#' and the first value match the MRCA
pb$timeOfMRCA_CI

# end

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment