Skip to content

Instantly share code, notes, and snippets.

@padpadpadpad
Last active July 24, 2024 16:59
Show Gist options
  • Save padpadpadpad/4201dc530b18a8d36363d37286edfc7c to your computer and use it in GitHub Desktop.
Save padpadpadpad/4201dc530b18a8d36363d37286edfc7c to your computer and use it in GitHub Desktop.
Example for plotting prcomp() and betadisper() models in ggplot2
# creating pretty plots from prcomp and vegan in R
# clear workspace
mise::mise(vars = TRUE, pkgs = TRUE, console = TRUE, figs = TRUE)
# can do multiple comparisons of adonis factors
# devtools::install_github('leffj/mctoolsr')
# load in packages
library(vegan)
library(ggplot2)
library(dplyr)
library(tidyr)
library(magrittr)
library(mctoolsr)
library(broom)
# load in example data from vegan
data(varespec)
# First 16 sites grazed, remaining 8 sites ungrazed
# create a metadata dataset
# creating a column to identify each row of the varespec data with
d_vars <- data.frame(groups = as.character(c(rep('grazed',16), rep('ungrazed',8))),
row = row.names(varespec))
# plotting a PCA in ggplot2
PCA <- prcomp(varespec)
# view summary stats of PCA
plot(PCA, type = "l")
summary(PCA)
# view biplot
biplot(PCA)
# These are not very good!!!!
# can extract all the data using broom
# get PCA summary
d_PCA_sum <- broom::tidy(PCA, matrix = 'pcs')
# get PCA loadings
d_PCA_load <- broom::tidy(PCA, matrix = 'v') %>%
mutate(., PC = paste('PC_', PC, sep = '')) %>%
spread(., PC, value)
# subset for just top 15 loadings
# if you have a lot of genes this is advisable otherwise you will have so much text!
# I calculate the distance of each point in the PC1 and PC2 from 0,0 and subset those
# function to get distance from 00
dist_from_00 <- function(x, y){
return(sqrt((0 - x)^2+(0-y)^2))
}
top_15_loadings <- mutate(d_PCA_load, distance = dist_from_00(PC_1, PC_2)) %>%
top_n(., 15, distance)
# get PCA points
d_PCA_points <- broom::tidy(PCA, matrix = 'samples') %>%
mutate(., row = as.character(row),
PC = paste('PC_', PC, sep = '')) %>%
merge(., d_vars, by = 'row') %>%
spread(., PC, value)
# fancy ggplot of PCA
ggplot() +
geom_text(aes(PC_1, PC_2, label = column, hjust = 0.5*(1 - sign(PC_1)), vjust = 0.5*(1-sign(PC_2))), d_PCA_load, col = 'red4') +
geom_segment(aes(x = 0, y = 0, yend = PC_2, xend = PC_1, group = column), d_PCA_load, arrow = arrow(length = unit(0.01, "npc")), col = 'red4') +
geom_point(aes(PC_1, PC_2, col = groups), d_PCA_points) +
theme_bw(base_size = 12, base_family = 'Helvetica') +
coord_fixed(sqrt(d_PCA_sum$percent[2]/d_PCA_sum$percent[1])) +
stat_ellipse(aes(PC_1, PC_2, col = groups, group = groups), d_PCA_points, type = "t") +
ylab('PCA 2 [25.4%]') +
xlab('PCA 1 [53.8%]') +
scale_fill_manual('', values = c('black', 'grey'), labels = c('Grazed', 'Ungrazed')) +
scale_color_manual('', values = c('black', 'grey'), labels = c('Grazed', 'Ungrazed')) +
theme(legend.position = c(0.85, 0.85))
# this does not work too well for this example dataset but will work better for others!
# do adonis and multiple contrasts
# distance matrices
Euclid_matrix <- dist(varespec)
# make some character vectors factors
d_vars <- mutate_at(d_vars, c('groups'), as.factor)
row.names(d_vars) <- d_vars$row
# adonis
mod1_euclid <- adonis(Euclid_matrix ~ groups, data = d_vars)
# do pairwise contrasts - look at different ways of doing this! ####
# using mctoolsr from GitHub - this looks good
# NOTE - need to make sure that variable dataset has rownames equivalent to the distance matrix
euclid_contrasts <- mctoolsr::calc_pairwise_permanovas(Euclid_matrix, d_vars, 'groups')
# obviously a bit redundant when there are only two levels
# make a column of all unique combinations of factors and insert instead of "groups"
# betadisper() analysis and plotting in ggplot2 ####
# Looking at variance across groups ####
# can only do one factor would use every combination of id
mod1_dispers <- betadisper(Euclid_matrix, d_vars$groups)
# plot of model
plot(mod1_dispers)
boxplot(mod1_dispers)
# anova
anova(mod1_dispers)
# Permutation test for F
pmod <- permutest(mod1_dispers, pairwise = TRUE)
# Tukey's Honest Significant Differences
T_HSD <- TukeyHSD(mod1_dispers)
# get betadisper dataframes ####
# have written functions to grab the necessary data from the betadisper object
# functions ####
# getting distances from betadisper() object
betadisper_distances <- function(model){
temp <- data.frame(group = model$group)
temp2 <- data.frame(distances = unlist(model$distances))
temp2$sample <- row.names(temp2)
temp <- cbind(temp, temp2)
temp <- dplyr::select(temp, group, sample, dplyr::everything())
row.names(temp) <- NULL
return(temp)
}
# getting eigenvalues out of betadisper() object
betadisper_eigenvalue <- function(model){
temp <- data.frame(eig = unlist(model$eig))
temp$PCoA <- row.names(temp)
row.names(temp) <- NULL
return(temp)
}
# getting the eigenvectors out of a betadisper() object
betadisper_eigenvector <- function(model){
temp <- data.frame(group = model$group)
temp2 <- data.frame(unlist(model$vectors))
temp2$sample <- row.names(temp2)
temp <- cbind(temp, temp2)
temp <- dplyr::select(temp, group, sample, dplyr::everything())
row.names(temp) <- NULL
return(temp)
}
# get centroids
betadisper_centroids <- function(model){
temp <- data.frame(unlist(model$centroids))
temp$group <- row.names(temp)
temp <- dplyr::select(temp, group, dplyr::everything())
row.names(temp) <- NULL
return(temp)
}
# betadisper data
get_betadisper_data <- function(model){
temp <- list(distances = betadisper_distances(model),
eigenvalue = betadisper_eigenvalue(model),
eigenvector = betadisper_eigenvector(model),
centroids = betadisper_centroids(model))
return(temp)
}
# get betadisper data ####
betadisper_dat <- get_betadisper_data(mod1_dispers)
# do some transformations on the data
betadisper_dat$eigenvalue <- mutate(betadisper_dat$eigenvalue, percent = eig/sum(eig))
# add convex hull points ####
# this could be put in a function
betadisper_dat$chull <- group_by(betadisper_dat$eigenvector, group) %>%
do(data.frame(PCoA1 = .$PCoA1[c(chull(.$PCoA1, .$PCoA2), chull(.$PCoA1, .$PCoA2)[1])],
PCoA2 = .$PCoA2[c(chull(.$PCoA1, .$PCoA2), chull(.$PCoA1, .$PCoA2)[1])])) %>%
data.frame()
# combine centroid and eigenvector dataframes for plotting
betadisper_lines <- merge(select(betadisper_dat$centroids, group, PCoA1, PCoA2), select(betadisper_dat$eigenvector, group, PCoA1, PCoA2), by = c('group'))
# Now the dataframes are all ready to be completely customisable in ggplot
# plot betadispersion plot
ggplot() +
geom_point(aes(PCoA1, PCoA2, col = group), betadisper_dat$centroids, size = 4) +
geom_point(aes(PCoA1, PCoA2, col = group), betadisper_dat$eigenvector) +
geom_path(aes(PCoA1, PCoA2, col = group, group = group), betadisper_dat$chull ) +
geom_segment(aes(x = PCoA1.x, y = PCoA2.x, yend = PCoA2.y, xend = PCoA1.y, group = row.names(betadisper_lines), col = group), betadisper_lines) +
theme_bw(base_size = 12, base_family = 'Helvetica') +
ylab('PCoA Axis 2 [25.4%]') +
xlab('PCoA Axis 2 [53.8%]') +
scale_color_manual('', values = c('black', 'grey'), labels = c("Grazed", 'Ungrazed')) +
theme(legend.position = c(0.9, 0.3)) +
coord_fixed(sqrt(d_PCA_sum$percent[2]/d_PCA_sum$percent[1]))
# plot distances from centroid
ggplot(betadisper_dat$distances, aes(group, distances, fill = group, col = group)) +
geom_boxplot(aes(fill = group, col = group), outlier.shape = NA, width = 0.5, position = position_dodge(width = 0.55)) +
stat_summary(position = position_dodge(width = 0.55), geom = 'crossbar', fatten = 0, color = 'white', width = 0.4, fun.data = function(x){ return(c(y=median(x), ymin=median(x), ymax=median(x)))}) +
geom_point(aes(group, distances, col = group), shape = 21, fill ='white', position = position_jitterdodge(dodge.width = 0.55, jitter.width = 0.2)) +
theme_bw(base_size = 12, base_family = 'Helvetica') +
scale_color_manual('', values = c('black', 'grey'), labels = c("Grazed", 'Ungrazed')) +
scale_fill_manual('', values = c('black', 'grey'), labels = c("Grazed", 'Ungrazed')) +
ylab('Distance to centroid') +
theme(legend.position = 'none') +
xlab('') +
scale_x_discrete(labels = c('Grazed', 'Ungrazed'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment