Last active
August 6, 2019 11:48
-
-
Save roey-angel/359017cb7d196315276c14423e7d1ef4 to your computer and use it in GitHub Desktop.
Like phyloseq::merge_samples() but retains chr and fct information in sample_data()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Like phyloseq::merge_samples() but retains chr and fct information in sample_data() | |
#' | |
#' Merge/agglomerate the sample indices of a phyloseq object according to a categorical variable contained in a sample_data or a provided factor. | |
#' Unlike merge_samples() this function will only apply a `mean` to numeric variables in `samples_data()` and will retain all unique values of any non-numeric variable. | |
#' In case of conflicting entries for merged samples, both will be retained and separated by a ",". | |
#' | |
#' @author Roey Angel | |
#' @usage MergeSamples(Ps_obj, grouping_name = "Description") | |
#' @param ps (Required). A phyloseq object that has sample indices. | |
#' | |
#' @param grouping_name (Required). A single character string matching a variable name in | |
#' the corresponding sample_data of \code{ps}. | |
#' | |
#' @param fun (Optional). The function that will be used to merge the values that | |
#' correspond to the same group for each variable. | |
#' Note that this is (currently) ignored for the otu_table, where the equivalent | |
#' function is \code{\link[base]{sum}}, but evaluated via \code{\link[base]{rowsum}} | |
#' for efficiency. | |
#' | |
#' @return A merged phyloseq object with its sample indices merged according to the factor indicated by the \code{grouping_name} argument. | |
#' | |
#' @seealso \code{\link{phyloseq::merge_samples}} | |
#' | |
#' @export | |
MergeSamples <- function(ps = Ps_obj, grouping_name = "Description", fun = "mean") { | |
require(dplyr) | |
require(purrr) | |
require(phyloseq) | |
if (taxa_are_rows(ps)) {ps <- t(ps)} # needs to be in sample-by-species orientation | |
SD2merge <- as.tibble(sample_data(ps)) # grab sample_data | |
org_col_names <- colnames(SD2merge) # retain original sample_data variable order | |
grouping_col <- select(SD2merge, group = grouping_name) # grab grouping var | |
# grap factor variables | |
SD2merge %>% | |
select_if(is.factor) %>% | |
colnames() -> | |
fct_vars | |
# merge the OTU table | |
ps %>% | |
otu_table() %>% | |
as(., "matrix") %>% | |
rowsum(., as_vector(grouping_col)) %>% | |
# convert back to otu_table, and return | |
otu_table(., taxa_are_rows = FALSE) -> | |
merged_OTU_table | |
# ps %>% # generalised form but very slow | |
# otu_table() %>% | |
# as(., "matrix") %>% | |
# as.tibble() %>% | |
# group_by(as_vector(grouping_col)) %>% | |
# summarise_all(., fun) | |
# merge numeric | |
SD2merge %>% | |
select_if(is.numeric) %>% | |
bind_cols(grouping_col, .) %>% | |
group_by(group) %>% | |
summarise_all(., fun, na.rm = TRUE) -> | |
numeric_cols | |
# merge other | |
SD2merge %>% | |
select_if(negate(is.numeric)) %>% | |
bind_cols(grouping_col, .) %>% | |
group_by(group) %>% | |
summarise_all(list(~paste(unique(.), collapse = ","))) -> | |
other_cols | |
# join merged numeric and other columns | |
full_join(numeric_cols, other_cols, by = "group") %>% | |
select(-group) %>% | |
select(org_col_names) %>% # order columns like they were | |
mutate_at(fct_vars, funs(factor(.))) %>% # return factor type to fct vars | |
column_to_rownames(var = grouping_name) -> | |
merged_SD | |
# build and return a phyloseq object | |
return(phyloseq(otu_table(merged_OTU_table), | |
sample_data(merged_SD))) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment