Last active
July 24, 2024 16:59
-
-
Save padpadpadpad/4201dc530b18a8d36363d37286edfc7c to your computer and use it in GitHub Desktop.
Example for plotting prcomp() and betadisper() models in ggplot2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# creating pretty plots from prcomp and vegan in R | |
# clear workspace | |
mise::mise(vars = TRUE, pkgs = TRUE, console = TRUE, figs = TRUE) | |
# can do multiple comparisons of adonis factors | |
# devtools::install_github('leffj/mctoolsr') | |
# load in packages | |
library(vegan) | |
library(ggplot2) | |
library(dplyr) | |
library(tidyr) | |
library(magrittr) | |
library(mctoolsr) | |
library(broom) | |
# load in example data from vegan | |
data(varespec) | |
# First 16 sites grazed, remaining 8 sites ungrazed | |
# create a metadata dataset | |
# creating a column to identify each row of the varespec data with | |
d_vars <- data.frame(groups = as.character(c(rep('grazed',16), rep('ungrazed',8))), | |
row = row.names(varespec)) | |
# plotting a PCA in ggplot2 | |
PCA <- prcomp(varespec) | |
# view summary stats of PCA | |
plot(PCA, type = "l") | |
summary(PCA) | |
# view biplot | |
biplot(PCA) | |
# These are not very good!!!! | |
# can extract all the data using broom | |
# get PCA summary | |
d_PCA_sum <- broom::tidy(PCA, matrix = 'pcs') | |
# get PCA loadings | |
d_PCA_load <- broom::tidy(PCA, matrix = 'v') %>% | |
mutate(., PC = paste('PC_', PC, sep = '')) %>% | |
spread(., PC, value) | |
# subset for just top 15 loadings | |
# if you have a lot of genes this is advisable otherwise you will have so much text! | |
# I calculate the distance of each point in the PC1 and PC2 from 0,0 and subset those | |
# function to get distance from 00 | |
dist_from_00 <- function(x, y){ | |
return(sqrt((0 - x)^2+(0-y)^2)) | |
} | |
top_15_loadings <- mutate(d_PCA_load, distance = dist_from_00(PC_1, PC_2)) %>% | |
top_n(., 15, distance) | |
# get PCA points | |
d_PCA_points <- broom::tidy(PCA, matrix = 'samples') %>% | |
mutate(., row = as.character(row), | |
PC = paste('PC_', PC, sep = '')) %>% | |
merge(., d_vars, by = 'row') %>% | |
spread(., PC, value) | |
# fancy ggplot of PCA | |
ggplot() + | |
geom_text(aes(PC_1, PC_2, label = column, hjust = 0.5*(1 - sign(PC_1)), vjust = 0.5*(1-sign(PC_2))), d_PCA_load, col = 'red4') + | |
geom_segment(aes(x = 0, y = 0, yend = PC_2, xend = PC_1, group = column), d_PCA_load, arrow = arrow(length = unit(0.01, "npc")), col = 'red4') + | |
geom_point(aes(PC_1, PC_2, col = groups), d_PCA_points) + | |
theme_bw(base_size = 12, base_family = 'Helvetica') + | |
coord_fixed(sqrt(d_PCA_sum$percent[2]/d_PCA_sum$percent[1])) + | |
stat_ellipse(aes(PC_1, PC_2, col = groups, group = groups), d_PCA_points, type = "t") + | |
ylab('PCA 2 [25.4%]') + | |
xlab('PCA 1 [53.8%]') + | |
scale_fill_manual('', values = c('black', 'grey'), labels = c('Grazed', 'Ungrazed')) + | |
scale_color_manual('', values = c('black', 'grey'), labels = c('Grazed', 'Ungrazed')) + | |
theme(legend.position = c(0.85, 0.85)) | |
# this does not work too well for this example dataset but will work better for others! | |
# do adonis and multiple contrasts | |
# distance matrices | |
Euclid_matrix <- dist(varespec) | |
# make some character vectors factors | |
d_vars <- mutate_at(d_vars, c('groups'), as.factor) | |
row.names(d_vars) <- d_vars$row | |
# adonis | |
mod1_euclid <- adonis(Euclid_matrix ~ groups, data = d_vars) | |
# do pairwise contrasts - look at different ways of doing this! #### | |
# using mctoolsr from GitHub - this looks good | |
# NOTE - need to make sure that variable dataset has rownames equivalent to the distance matrix | |
euclid_contrasts <- mctoolsr::calc_pairwise_permanovas(Euclid_matrix, d_vars, 'groups') | |
# obviously a bit redundant when there are only two levels | |
# make a column of all unique combinations of factors and insert instead of "groups" | |
# betadisper() analysis and plotting in ggplot2 #### | |
# Looking at variance across groups #### | |
# can only do one factor would use every combination of id | |
mod1_dispers <- betadisper(Euclid_matrix, d_vars$groups) | |
# plot of model | |
plot(mod1_dispers) | |
boxplot(mod1_dispers) | |
# anova | |
anova(mod1_dispers) | |
# Permutation test for F | |
pmod <- permutest(mod1_dispers, pairwise = TRUE) | |
# Tukey's Honest Significant Differences | |
T_HSD <- TukeyHSD(mod1_dispers) | |
# get betadisper dataframes #### | |
# have written functions to grab the necessary data from the betadisper object | |
# functions #### | |
# getting distances from betadisper() object | |
betadisper_distances <- function(model){ | |
temp <- data.frame(group = model$group) | |
temp2 <- data.frame(distances = unlist(model$distances)) | |
temp2$sample <- row.names(temp2) | |
temp <- cbind(temp, temp2) | |
temp <- dplyr::select(temp, group, sample, dplyr::everything()) | |
row.names(temp) <- NULL | |
return(temp) | |
} | |
# getting eigenvalues out of betadisper() object | |
betadisper_eigenvalue <- function(model){ | |
temp <- data.frame(eig = unlist(model$eig)) | |
temp$PCoA <- row.names(temp) | |
row.names(temp) <- NULL | |
return(temp) | |
} | |
# getting the eigenvectors out of a betadisper() object | |
betadisper_eigenvector <- function(model){ | |
temp <- data.frame(group = model$group) | |
temp2 <- data.frame(unlist(model$vectors)) | |
temp2$sample <- row.names(temp2) | |
temp <- cbind(temp, temp2) | |
temp <- dplyr::select(temp, group, sample, dplyr::everything()) | |
row.names(temp) <- NULL | |
return(temp) | |
} | |
# get centroids | |
betadisper_centroids <- function(model){ | |
temp <- data.frame(unlist(model$centroids)) | |
temp$group <- row.names(temp) | |
temp <- dplyr::select(temp, group, dplyr::everything()) | |
row.names(temp) <- NULL | |
return(temp) | |
} | |
# betadisper data | |
get_betadisper_data <- function(model){ | |
temp <- list(distances = betadisper_distances(model), | |
eigenvalue = betadisper_eigenvalue(model), | |
eigenvector = betadisper_eigenvector(model), | |
centroids = betadisper_centroids(model)) | |
return(temp) | |
} | |
# get betadisper data #### | |
betadisper_dat <- get_betadisper_data(mod1_dispers) | |
# do some transformations on the data | |
betadisper_dat$eigenvalue <- mutate(betadisper_dat$eigenvalue, percent = eig/sum(eig)) | |
# add convex hull points #### | |
# this could be put in a function | |
betadisper_dat$chull <- group_by(betadisper_dat$eigenvector, group) %>% | |
do(data.frame(PCoA1 = .$PCoA1[c(chull(.$PCoA1, .$PCoA2), chull(.$PCoA1, .$PCoA2)[1])], | |
PCoA2 = .$PCoA2[c(chull(.$PCoA1, .$PCoA2), chull(.$PCoA1, .$PCoA2)[1])])) %>% | |
data.frame() | |
# combine centroid and eigenvector dataframes for plotting | |
betadisper_lines <- merge(select(betadisper_dat$centroids, group, PCoA1, PCoA2), select(betadisper_dat$eigenvector, group, PCoA1, PCoA2), by = c('group')) | |
# Now the dataframes are all ready to be completely customisable in ggplot | |
# plot betadispersion plot | |
ggplot() + | |
geom_point(aes(PCoA1, PCoA2, col = group), betadisper_dat$centroids, size = 4) + | |
geom_point(aes(PCoA1, PCoA2, col = group), betadisper_dat$eigenvector) + | |
geom_path(aes(PCoA1, PCoA2, col = group, group = group), betadisper_dat$chull ) + | |
geom_segment(aes(x = PCoA1.x, y = PCoA2.x, yend = PCoA2.y, xend = PCoA1.y, group = row.names(betadisper_lines), col = group), betadisper_lines) + | |
theme_bw(base_size = 12, base_family = 'Helvetica') + | |
ylab('PCoA Axis 2 [25.4%]') + | |
xlab('PCoA Axis 2 [53.8%]') + | |
scale_color_manual('', values = c('black', 'grey'), labels = c("Grazed", 'Ungrazed')) + | |
theme(legend.position = c(0.9, 0.3)) + | |
coord_fixed(sqrt(d_PCA_sum$percent[2]/d_PCA_sum$percent[1])) | |
# plot distances from centroid | |
ggplot(betadisper_dat$distances, aes(group, distances, fill = group, col = group)) + | |
geom_boxplot(aes(fill = group, col = group), outlier.shape = NA, width = 0.5, position = position_dodge(width = 0.55)) + | |
stat_summary(position = position_dodge(width = 0.55), geom = 'crossbar', fatten = 0, color = 'white', width = 0.4, fun.data = function(x){ return(c(y=median(x), ymin=median(x), ymax=median(x)))}) + | |
geom_point(aes(group, distances, col = group), shape = 21, fill ='white', position = position_jitterdodge(dodge.width = 0.55, jitter.width = 0.2)) + | |
theme_bw(base_size = 12, base_family = 'Helvetica') + | |
scale_color_manual('', values = c('black', 'grey'), labels = c("Grazed", 'Ungrazed')) + | |
scale_fill_manual('', values = c('black', 'grey'), labels = c("Grazed", 'Ungrazed')) + | |
ylab('Distance to centroid') + | |
theme(legend.position = 'none') + | |
xlab('') + | |
scale_x_discrete(labels = c('Grazed', 'Ungrazed')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment