% Title % Name % Date
List
| # dataframetools | |
| # A few simple functions for performing simple tasks with data.frames | |
| # --- | |
| # Includes functions for: | |
| # | |
| # - reordering data.frames | |
| # - identifying invariant or blank columns | |
| # - identifying groups of columns that are redundant with each other | |
| # - converting all columns of class factor to class character |
| #' Modified version of the ggplot2 plotmatrix function that accepts additional | |
| #' variables for aesthetic mapping. | |
| #' | |
| #' example | |
| #' data(iris) | |
| #' iris.vars <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") | |
| #' ggpairs(data = iris, facet.vars = iris.vars, | |
| #' mapping = aes(color = Species, shape = Species)) | |
| ggpairs <- function (data, facet.vars = colnames(data), facet.scale = "free", |
| # Code written by brentp in response to BioStars question: | |
| # http://www.biostars.org/post/show/6544/ | |
| import random | |
| import sys | |
| def write_random_records(fqa, fqb, N=100000): | |
| """ get N random headers from a fastq file without reading the | |
| whole thing into memory""" | |
| records = sum(1 for _ in open(fqa)) / 4 |
| #' Generate data.frame of feature annotations | |
| #' | |
| #' Use bioconductor annotation packages to create a data.frame of feature/probe | |
| #' annotations. | |
| #' | |
| #' @param chip character string identifying chip model (e.g., "illuminaHumanv2") | |
| #' @param features optional character vector of chip features (i.e., probeset ids) | |
| #' @param vars character vector of desired annotations. These must match objects | |
| #' provided by the annotation package (e.g., "CHR") | |
| #' @param duplicate.values how should duplicate values be handled? The default |
| #' FTP tree mapper | |
| #' Save an FTP site's directory stucture as a list. | |
| #' @author Aaron Wolen | |
| #' | |
| #' @example | |
| #' url <- 'ftp://ftp.genboree.org/EpigenomeAtlas/Current-Release/experiment-sample' | |
| #' roadmap <- map_ftp(url = url, dirs = "Histone_H2BK120ac", recursive = TRUE) | |
| map_ftp <- function(url, dirs, recursive = FALSE) { | |
| require(RCurl, quietly = TRUE) |
| Name | URL | |
|---|---|---|
| ENCODE | ftp://encodeftp.cse.ucsc.edu/pipeline/hg19/ | |
| ENCODE (test) | http://hgdownload-test.cse.ucsc.edu/goldenPath/hg19/encodeDCC/ | |
| RoadMap | ftp://ftp.genboree.org/EpigenomeAtlas/Current-Release |
| library(IRanges) | |
| library(GenomicRanges) | |
| library(rtracklayer) | |
| # Select a BigWig file | |
| bw.dir <- "/home/chromatin/roadmap/DNase_hypersensitivity/brain_fetal" | |
| bw.file <- dir(bw.dir, full.names = TRUE, pattern = "*.bigWig")[1] | |
| # Specify a genomic range | |
| selection <- GRanges(seqnames = "chr4", |
| install.packages(c("RCurl", "XML")) | |
| bioc.v <- tools:::.BioC_version_associated_with_R_version | |
| repos <- tools:::.read_repositories(file.path(R.home("etc"), "repositories")) | |
| bioc.repo <- repos["BioCsoft",]$URL | |
| bioc.repo <- sub("2\\.\\d+", bioc.v, bioc.repo) | |
| packages <- c("zlibbioc", "BiocGenerics", "Biobase", "IRanges", | |
| "AnnotationDbi", "GenomicRanges", "Biostrings", "Rsamtools", |
| clipboard <- function(x, sep.lines = FALSE){ | |
| clipboard <- pipe('pbcopy', 'w') | |
| if(sep.lines){ | |
| x <- unlist(strsplit(as.character(x), split = ",")) | |
| x <- sub(" ", "", x) | |
| } | |
| write.table(x, clipboard, sep = "\t", | |
| quote = FALSE, col.names = FALSE, row.names = FALSE) |