Let's start with a mapping of Ensembl gene identifiers to Entrez gene identifiers:
xs <- structure(
c("7105", "64102", "9093", "9093"),
.Names = c("ENSG00000000003", "ENSG00000000005", "ENSG00000103423", "ENSG00000276726")
)
xs
Let's start with a mapping of Ensembl gene identifiers to Entrez gene identifiers:
xs <- structure(
c("7105", "64102", "9093", "9093"),
.Names = c("ENSG00000000003", "ENSG00000000005", "ENSG00000103423", "ENSG00000276726")
)
xs
#' Convert counts to transcripts per million (TPM). | |
#' | |
#' Convert a numeric matrix of features (rows) and conditions (columns) with | |
#' raw feature counts to transcripts per million. | |
#' | |
#' Lior Pachter. Models for transcript quantification from RNA-Seq. | |
#' arXiv:1104.3889v2 | |
#' | |
#' Wagner, et al. Measurement of mRNA abundance using RNA-seq data: | |
#' RPKM measure is inconsistent among samples. Theory Biosci. 24 July 2012. |
# RPKM versus TPM | |
# | |
# RPKM and TPM are both normalized for library size and gene length. | |
# | |
# RPKM is not comparable across different samples. | |
# | |
# For more details, see: http://blog.nextgenetics.net/?e=51 | |
rpkm <- function(counts, lengths) { | |
rate <- counts / lengths |
# https://www.biostars.org/p/162853 | |
cache = {} | |
f2 = open('f2') | |
header2 = f2.next() | |
for line in f2: | |
fields = line.strip().split() | |
snp = fields[0] |
library(ggplot2) | |
library(FField) | |
# You'll have to play with repulsion, cex.x, and cex.y to get satisfactory results. | |
plot_text <- function(x, y, label, repulsion = 1, cex.x = 110, cex.y = 40) { | |
dat <- data.frame(xpos = x, ypos = y, label = label) | |
dat$label <- as.character(dat$label) | |
# Use the FField package to repel the text labels away from each other. | |
dat <- cbind( |
#!/usr/bin/env bash | |
# bigWigRegions | |
# Kamil Slowikowski | |
# | |
# Depends on bigWigToBedGraph: http://hgdownload.cse.ucsc.edu/admin/exe/ | |
if [[ $# -ne 2 ]]; then | |
echo "bigWigRegions - Print bigWig data for each region in a bed file." | |
echo -e "usage:\n bigWigRegions in.bigWig in.bed > out.bedGraph" | |
echo " bigWigRegions in.bigWig <(zcat in.bed.gz) > out.bedGraph" |
#!/usr/bin/env bash | |
# sra2srr.sh | |
# | |
# Example | |
# ------- | |
# To download all of the read archive files for SRP012001: | |
# sra2srr.sh SRP012001 | while read srr; do prefetch $srr; done | |
# | |
# For 'esearch', 'efetch', 'xtract', you must install Entrez Direct: | |
# http://www.ncbi.nlm.nih.gov/books/NBK179288/ |
#!/usr/bin/env bash | |
# reads_per_seq | |
# | |
# Run 8 jobs in parallel on many BAM files: | |
# | |
# parallel -j8 reads_per_seq ::: *.bam | |
set -eou pipefail | |
if [[ $# -ne 1 || "$1" != *.bam ]]; then |
#' Make a matrix suitable for use with RUVSeq methods such as RUVs(). | |
#' | |
#' Each row in the returned matrix corresponds to a set of replicate samples. | |
#' The number of columns is the size of the largest set of replicates; rows for | |
#' smaller sets are padded with -1 values. | |
#' | |
#' @param xs A vector indicating membership in a group. | |
#' @seealso RUVSeq::RUVs | |
#' @example | |
#' makeGroups(c("A","B","B","C","C","D","D","D","A")) |
#!/usr/bin/env bash | |
# drop_long_skipped_reads.sh | |
# Kamil Slowikowski | |
# July 19, 2015 | |
# | |
# Drop reads from a BAM file if they have long skipped regions. | |
# | |
# Example: | |
# | |
# ./drop_long_skipped_reads.sh FILE.bam 1000000 |