This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(gridExtra) | |
cov <- read.table("/Users/arq5x/Documents/Projects/HallLab/TCGA-1KG/ForKeystone/tcga_and_1kg_span_cov.txt",header=TRUE) | |
span <- qplot(sample, span_cov, data=cov, fill=factor(type_num), geom="bar", | |
binwidth=1, | |
xlab="Sample", | |
ylab="Spanning coverage") + | |
opts(axis.ticks = theme_blank(), | |
axis.text.x = theme_blank(), | |
axis.title.x = theme_text(size = 18, face = "bold"), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export BATCH1="1094PC0005 1094PC0009 1094PC0012 1094PC0013 " | |
export BATCH2="1094PC0016 1094PC0017 1094PC0018 1094PC0019 \ | |
1094PC0020 1094PC0021 1094PC0022 1094PC0023 1094PC0025 " | |
export BATCH3="1478PC0001B 1478PC0002 1478PC0003 1478PC0004 \ | |
1478PC0005 1478PC0006B 1478PC0007B 1478PC0008B \ | |
1478PC0009B 1478PC0010 1478PC0011 1478PC0012 \ | |
1478PC0013B 1478PC0014B 1478PC0015B 1478PC0016 \ | |
1478PC0017B 1478PC0018 1478PC0019 1478PC0020 \ | |
1478PC0021 1478PC0022B 1478PC0023B 1478PC0024B" | |
export BATCH4="1719PC0001 1719PC0002 1719PC0003 1719PC0004 \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################ | |
# Pair the alignments. | |
# Keep proper, on-target (i.e. +/- 500 bp of a probe) pairs. | |
# Require mapping quality >= 20 | |
############################################################ | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/ | |
export STEPNAME=t1d-ex-bwa-par | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export GENOME=hg19 | |
export SNPBUILD=131 | |
curl -s http://hgdownload.cse.ucsc.edu/goldenPath/$GENOME/database/snp$SNPBUILD.txt.gz | \ | |
zcat | \ | |
cut -f 2,3,4,5,6,7,10,16 > dbsnp.$SNPBUILD.$GENOME.bed | |
head dbsnp.$SNPBUILD.$GENOME.bed | |
chr1 10433 10433 rs56289060 0 + -/C near-gene-5 | |
chr1 10491 10492 rs55998931 0 + C/T near-gene-5 | |
chr1 10518 10519 rs62636508 0 + C/G near-gene-5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Step 1: Get transcripts from UCSC refGene (hg19) into a BED file. | |
# Notes: | |
# the awk statement reorders the "raw" columns into BED12 format | |
# bed12ToBed6 converts the BED12 into discrete BED6 entries for each exon | |
# - the -n option is new and in the bedtools repository | |
$ curl -s http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/refGene.txt.gz | \ | |
zcat | \ | |
awk '{OFS="\t"; print $3,$5,$6,$2,$9,$4,$7,$8,"0",$9,$10,$11}' | \ | |
bed12ToBed6 -n \ | |
> refGene.bed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Staring FASTQ files | |
export FQ1=1.fq | |
export FQ2=2.fq | |
# The names of the random subsets | |
export FQ1SUBSET=1.rand.fq | |
export FQ2SUBSET=2.rand.fq | |
# How many random pairs do we want? | |
export N=100 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################### | |
# Assume we have a file of BED exons for every gene and transcript. | |
# The exons are listed in genomic order for each gene/transcipt | |
################################################################### | |
$ head -n 5 exons.bed | |
chr1 1337462 1337636 MRPL20 exon1 - | |
chr1 1340996 1341266 MRPL20 exon2 - | |
chr1 1341188 1341266 MRPL20 exon3 - | |
chr1 1342288 1342399 MRPL20 exon4 - | |
chr1 1342510 1342597 MRPL20 exon5 - |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void MultiCovBam::CollectCoverage() | |
{ | |
BamMultiReader reader; | |
if ( !reader.Open(_bam_files) ) | |
{ | |
cerr << "Could not open input BAM files." << endl; return; | |
} | |
else | |
{ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################ | |
# Index the position-sorted BAM files. | |
############################################################ | |
export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="bam-index" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $TUMHOME; samtools index bam/$sample.*.bam" | $QSUB |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
######################################## | |
# 1. Counting the discrete occurrences | |
# of a value in each column of a | |
# matrix. Store the count for each | |
# column in a new vector whose size | |
# is the number of columns in the | |
# matrix. | |
######################################## | |
# make a 3x3 matrix with columns | |
# having 0, 1, and 2 zeros |
OlderNewer