Last active
April 8, 2020 06:35
-
-
Save genomewalker/8ddc673e07eeadc7c4a535c4f258ba8c to your computer and use it in GitHub Desktop.
arctic_plants
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(DESeq2) | |
library(phyloseq) | |
library(taxonomizr) | |
# Read metadata | |
metadata <- read_csv("data/metadata_v10.csv", ) | |
names(metadata) <- c("label", "age_ka_bp", "age_type", "site_abrev", "region", "dating_lab_id", | |
"C14_age", "OSL_age", "age_errors", "group1", "lat", "lon") | |
# Read abundance tables | |
data_raw <- read_csv("data/readsNO_genus.csv") | |
data_norm <- read_csv("data/readsNO-norm_genus_richness.csv") | |
data_prop <- read_csv("data/readsNO-prop_genus_with_metadata.csv") | |
# get intersection of samples and taxa | |
data_all <- data_raw %>% | |
pivot_longer(cols = c(-taxa, -taxa_ID), names_to = "sample", values_to = "counts") %>% | |
inner_join(data_norm %>% | |
rename(taxa = genus) %>% | |
pivot_longer(cols = -taxa, names_to = "sample", values_to = "counts_scaled")) %>% | |
inner_join(data_prop %>% | |
select(-age, -region, -group1) %>% | |
pivot_longer(cols = -sample, names_to = "taxa", values_to = "prop")) | |
samples <- data_all$sample %>% unique() | |
taxas <- data_all$taxa_ID %>% unique() | |
# Get full taxonomic paths | |
tax_paths <- taxas %>% | |
map_dfr(function(X){getTaxonomy(X) %>% as_tibble(rownames = "taxid")}) | |
# Create a phyloseq object to keep everything nice and tidy | |
physeq_raw <- phyloseq(otu_table(data_raw %>% | |
select(taxa_ID, all_of(samples)) %>% | |
column_to_rownames("taxa_ID") %>% as.data.frame(), taxa_are_rows = TRUE), | |
sample_data(metadata %>% column_to_rownames("label") %>% as.data.frame()), | |
tax_table(tax_paths %>% column_to_rownames("taxid") %>% as.matrix())) | |
# Convert physeq object to DESeq2 | |
physeq_raw_ds <- phyloseq_to_deseq2(physeq_raw, ~1) | |
# We need a custom geometric function to deal with 0s | |
gm_mean = function(x, na.rm=TRUE){ | |
exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) | |
} | |
geoMeans <- apply(counts(physeq_raw_ds), 1, gm_mean) | |
# We want to use poscounts as its designed for metagenomic studies | |
diagdds <- estimateSizeFactors(physeq_raw_ds, type="poscounts", locfunc = genefilter::shorth, geoMeans = geoMeans) | |
diagdds <- estimateDispersions(diagdds, fitType='local') | |
diagvst <- varianceStabilizingTransformation(diagdds, blind = FALSE) | |
diagvst <- assay(diagvst) | |
# As we will have negative values (probably correspond to “less than one count” after rescaling) | |
# we will convert them to 0 to avoid problems with distance/dissimilarity measures that have problems with | |
# negative values | |
diagvst[diagvst < 0] <- 0 | |
physeq_raw_vst <- physeq_raw | |
physeq_raw_norm <- physeq_raw | |
# Those are the variance stabilized counts | |
otu_table(physeq_raw_vst) <- otu_table(diagvst, taxa_are_rows = TRUE) | |
# This just divides each sample by the size factor. | |
otu_table(physeq_raw_norm) <- otu_table(counts(diagdds, normalized = TRUE), taxa_are_rows = TRUE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment