Created
March 28, 2012 20:09
-
-
Save taoliu/2230081 to your computer and use it in GitHub Desktop.
Convert modENCODE PET RNA-seq SAM to pileup bigWig
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
if [ $# -lt 1 ]; then | |
echo `basename $0` "<PET RNAseq SAM filename>" | |
exit | |
fi | |
F=$1 | |
# chromosome length file | |
G=ce10.len | |
# a temporary name | |
TNAME=`mktemp -u` | |
TNAME=$1`basename $TNAME` | |
# convert SAM to BAM | |
samtools view -bS $F -o ${TNAME}.bam | |
# sort BAM | |
samtools sort ${TNAME}.bam ${TNAME}.sorted | |
# keep only paired reads | |
samtools view -b -f 2 ${TNAME}.sorted.bam > ${TNAME}.sorted.paired.bam | |
# convert to bed12 | |
bamToBed -i ${TNAME}.sorted.paired.bam -bed12 > ${TNAME}.sorted.paired.bed12 | |
# fix chromosome names from wormbase/ensembl convention to UCSC style | |
perl -pi -e '$_="chr".$_;s/MtDNA/M/' ${TNAME}.sorted.paired.bed12 | |
# count total reads number and calculate scaling factor for RPM values | |
TOTAL=`wc -l ${TNAME}.sorted.paired.bed12|cut -f 1 -d " "` | |
SCALE_FACTOR=`echo "scale=5;1000000.0/"$TOTAL | bc` | |
# compute coverage file in bedGraph, for RPM values | |
genomeCoverageBed -split -scale ${SCALE_FACTOR} -i ${TNAME}.sorted.paired.bed12 -bga -g ${G} > ${F}.bdg | |
# convert bedGraph to bigWig | |
bedClip ${F}.bdg ${G} ${TNAME}.bdg.clip | |
bedGraphToBigWig ${TNAME}.bdg.clip ${G} ${F}.bw | |
# clean temporary files | |
rm -f ${TNAME}.* | |
echo "Check ${F}.bw" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment