Created
March 31, 2017 05:03
-
-
Save pmagwene/3cadd4af4c98565da0a34a7cc5901bc7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyr) | |
library(dplyr) | |
library(magrittr) | |
library(ggplot2) | |
# read data from causton heat expression data set | |
# see groups 4 and 9 data/focal papers | |
causton <- read.csv("causton-2001-heat-expression.csv") | |
# Take a look at data. You'll see that the genes are in rows, | |
# and the time points and gene names are in columns | |
head(causton) | |
# Reshape data by | |
causton.long <- | |
causton %>% | |
gather(time, expression, -ORF, -Gene) %>% | |
arrange(ORF) # sort by ORF | |
# look at the data after reshaping | |
head(causton.long) | |
# Calculate the variance by gene | |
causton.var <- | |
causton.long %>% | |
group_by(ORF) %>% | |
summarize(var = var(expression, na.rm = TRUE)) | |
# Find the ORFs with non-zero variance | |
non.zero.var.ORFs <- | |
causton.var %>% | |
filter(var != 0) %$% # NOTE use of %$% operator from magrittr pkg | |
ORF | |
# only keep genes with non-zero variance | |
causton.trim <- | |
causton.long %>% | |
filter(ORF %in% non.zero.var.ORFs) | |
# spread to "wide" format appropriate for calculating correlations, with | |
# genes in columns, time points in row, after dropping Gene name column | |
causton.wide <- | |
causton.trim %>% | |
select(-Gene) %>% # drop the Gene name column | |
spread(ORF, expression) | |
# calculating correlations | |
causton.cor <- | |
causton.wide %>% | |
select(-time) %>% # drop the time column before calculating correlations | |
cor(use = "pairwise.complete.obs") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment