Skip to content

Instantly share code, notes, and snippets.

@genomewalker
Last active April 8, 2020 06:35
Show Gist options
  • Save genomewalker/8ddc673e07eeadc7c4a535c4f258ba8c to your computer and use it in GitHub Desktop.
Save genomewalker/8ddc673e07eeadc7c4a535c4f258ba8c to your computer and use it in GitHub Desktop.
arctic_plants
library(tidyverse)
library(DESeq2)
library(phyloseq)
library(taxonomizr)
# Read metadata
metadata <- read_csv("data/metadata_v10.csv", )
names(metadata) <- c("label", "age_ka_bp", "age_type", "site_abrev", "region", "dating_lab_id",
"C14_age", "OSL_age", "age_errors", "group1", "lat", "lon")
# Read abundance tables
data_raw <- read_csv("data/readsNO_genus.csv")
data_norm <- read_csv("data/readsNO-norm_genus_richness.csv")
data_prop <- read_csv("data/readsNO-prop_genus_with_metadata.csv")
# get intersection of samples and taxa
data_all <- data_raw %>%
pivot_longer(cols = c(-taxa, -taxa_ID), names_to = "sample", values_to = "counts") %>%
inner_join(data_norm %>%
rename(taxa = genus) %>%
pivot_longer(cols = -taxa, names_to = "sample", values_to = "counts_scaled")) %>%
inner_join(data_prop %>%
select(-age, -region, -group1) %>%
pivot_longer(cols = -sample, names_to = "taxa", values_to = "prop"))
samples <- data_all$sample %>% unique()
taxas <- data_all$taxa_ID %>% unique()
# Get full taxonomic paths
tax_paths <- taxas %>%
map_dfr(function(X){getTaxonomy(X) %>% as_tibble(rownames = "taxid")})
# Create a phyloseq object to keep everything nice and tidy
physeq_raw <- phyloseq(otu_table(data_raw %>%
select(taxa_ID, all_of(samples)) %>%
column_to_rownames("taxa_ID") %>% as.data.frame(), taxa_are_rows = TRUE),
sample_data(metadata %>% column_to_rownames("label") %>% as.data.frame()),
tax_table(tax_paths %>% column_to_rownames("taxid") %>% as.matrix()))
# Convert physeq object to DESeq2
physeq_raw_ds <- phyloseq_to_deseq2(physeq_raw, ~1)
# We need a custom geometric function to deal with 0s
gm_mean = function(x, na.rm=TRUE){
exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))
}
geoMeans <- apply(counts(physeq_raw_ds), 1, gm_mean)
# We want to use poscounts as its designed for metagenomic studies
diagdds <- estimateSizeFactors(physeq_raw_ds, type="poscounts", locfunc = genefilter::shorth, geoMeans = geoMeans)
diagdds <- estimateDispersions(diagdds, fitType='local')
diagvst <- varianceStabilizingTransformation(diagdds, blind = FALSE)
diagvst <- assay(diagvst)
# As we will have negative values (probably correspond to “less than one count” after rescaling)
# we will convert them to 0 to avoid problems with distance/dissimilarity measures that have problems with
# negative values
diagvst[diagvst < 0] <- 0
physeq_raw_vst <- physeq_raw
physeq_raw_norm <- physeq_raw
# Those are the variance stabilized counts
otu_table(physeq_raw_vst) <- otu_table(diagvst, taxa_are_rows = TRUE)
# This just divides each sample by the size factor.
otu_table(physeq_raw_norm) <- otu_table(counts(diagdds, normalized = TRUE), taxa_are_rows = TRUE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment