Skip to content

Instantly share code, notes, and snippets.

@pmagwene
Created March 31, 2017 05:03
Show Gist options
  • Save pmagwene/3cadd4af4c98565da0a34a7cc5901bc7 to your computer and use it in GitHub Desktop.
Save pmagwene/3cadd4af4c98565da0a34a7cc5901bc7 to your computer and use it in GitHub Desktop.
library(tidyr)
library(dplyr)
library(magrittr)
library(ggplot2)
# read data from causton heat expression data set
# see groups 4 and 9 data/focal papers
causton <- read.csv("causton-2001-heat-expression.csv")
# Take a look at data. You'll see that the genes are in rows,
# and the time points and gene names are in columns
head(causton)
# Reshape data by
causton.long <-
causton %>%
gather(time, expression, -ORF, -Gene) %>%
arrange(ORF) # sort by ORF
# look at the data after reshaping
head(causton.long)
# Calculate the variance by gene
causton.var <-
causton.long %>%
group_by(ORF) %>%
summarize(var = var(expression, na.rm = TRUE))
# Find the ORFs with non-zero variance
non.zero.var.ORFs <-
causton.var %>%
filter(var != 0) %$% # NOTE use of %$% operator from magrittr pkg
ORF
# only keep genes with non-zero variance
causton.trim <-
causton.long %>%
filter(ORF %in% non.zero.var.ORFs)
# spread to "wide" format appropriate for calculating correlations, with
# genes in columns, time points in row, after dropping Gene name column
causton.wide <-
causton.trim %>%
select(-Gene) %>% # drop the Gene name column
spread(ORF, expression)
# calculating correlations
causton.cor <-
causton.wide %>%
select(-time) %>% # drop the time column before calculating correlations
cor(use = "pairwise.complete.obs")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment