Last active
October 8, 2015 17:26
-
-
Save nachocab/5a9c84b68776bd23dbb7 to your computer and use it in GitHub Desktop.
Using yaml to deal with gene sets in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
install.packages("yaml") | |
# change path to your favorite local directory | |
gene_list <- function(filename, format = "yaml", path = "/Users/nacho/gene_info/gene_sets"){ | |
path <- file.path(path, filename) | |
if (format == "yaml"){ | |
raw_genes <- yaml::yaml.load_file(path) | |
# TODO: run through the nested names in genes and check that | |
# there are no "." or ":", give warning or replace with "_" | |
unlisted_genes <- unlist(raw_genes) | |
groups <- sub("\\d+$", "", names(unlisted_genes)) | |
genes <- list(nested = tapply(unlisted_genes, groups, unname)) | |
names(genes$nested) <- gsub('\\.', ':', names(genes$nested)) | |
genes$flat <- unique(unname(unlist(raw_genes))) | |
} else { | |
genes <- scan(path, "character") | |
} | |
genes | |
} | |
# example usage | |
my_isgs <- gene_list("isgs.yaml")$flat | |
[1] "CXCL10" "CXCL11" "DDX58" ... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment