Created
April 2, 2017 01:29
-
-
Save pmagwene/51e79ed29477164a758b8137889372ea to your computer and use it in GitHub Desktop.
Creates a plot showing the time series expression of genes within clusters, overlain by the w/in cluster mean
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dendextend) | |
library(tidyr) | |
library(dplyr) | |
library(magrittr) | |
library(ggplot2) | |
# load data in "wide" format (genes in columns) | |
spellman <- read.csv("spellman-reformated.csv") | |
# restructure in "long" format | |
spellman.long <- gather(spellman, gene, expression, -expt, -time) | |
# calculate corr based on alpha factor expt measures | |
alpha.cor <- | |
spellman %>% | |
filter(expt == "alpha") %>% | |
select(-expt, -time) %>% | |
cor(use="pairwise.complete.obs") | |
# cluster the alpha factor data using hclust | |
alpha.clust <- | |
as.dist(1 - alpha.cor) %>% | |
hclust(method="complete") | |
# cut tree, to yield 8 clusters | |
clusters <- dendextend::cutree(alpha.clust, k=8, | |
order_clusters_as_data = TRUE) | |
# create a data frame holding cluster membership for each gene | |
clusters.df <- data.frame(gene = names(clusters), | |
cluster = as.factor(clusters)) | |
# do a left_join, combining the information in spellman.long | |
# with clusters.df (matched on gene). This effectively adds the | |
# cluster information as a new column to spellman.long data frame | |
# keeping the appropriate matches by gene name | |
alpha.long <- | |
spellman.long %>% | |
filter(expt == "alpha") %>% | |
left_join(clusters.df, by = c("gene")) | |
# calculate the mean at each time point within each cluster | |
cluster.means <- | |
alpha.long %>% | |
group_by(cluster, time) %>% | |
summarize(mean.exp = mean(expression, na.rm = TRUE)) | |
# draw a figure showing time varying gene expression | |
# in each cluster, overlain with the each clusters | |
# mean time series | |
alpha.long %>% | |
ggplot(aes(time, expression, group=gene)) + | |
geom_line(alpha=0.25) + | |
geom_line(aes(time, mean.exp, group=NULL,color=cluster), | |
data = cluster.means, | |
size=1.1) + | |
facet_wrap(~cluster, ncol=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment