Created
February 18, 2014 11:53
-
-
Save mtw/9069560 to your computer and use it in GitHub Desktop.
Convert unmapped BAM files to FASTQ and remove adapters for single-end experiments
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
cutadapt=`which cutadapt` | |
fastqc=`which fastqc` | |
bam2fastq=`which bam2fastq` | |
gzip=`which gzip` | |
origdir="." | |
results="cutadapt" | |
fastqcdir="${results}/FastQC" | |
adapter5="AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT" | |
a3="CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC" | |
indexC1_R1="TGGTCA" # TGACCA | |
indexC1_R2="CTGATC" # GATCAG | |
indexC2_R1="ATTGGC" # GCCAAT | |
indexC2_R2="AAGCTA" # TAGCTT | |
indexC3_R1="GATCTG" # CAGATC | |
indexC3_R2="GTAGCC" # GGCTAC | |
indexC4_R1="TCAAGT" # ACTTGA | |
indexC4_R2="TACAAG" # CTTGTA | |
inprefix="C" | |
outprefix="D" | |
samples=4 | |
froms=1 | |
replicates=2 | |
threads=30 | |
if ! [ -d "$results" ]; then | |
mkdir -p $results | |
fi | |
if ! [ -d "$fastqcdir" ]; then | |
mkdir -p $fastqcdir | |
fi | |
for s in $(seq $froms $samples) | |
do | |
for r in $(seq 1 $replicates) | |
do | |
sample=${inprefix}${s}_R${r} | |
outsample=${outprefix}${s}_R${r} | |
if [ $sample == "C1_R1" ]; then | |
adapter3index=${indexC1_R1} | |
elif [ $sample == "C1_R2" ]; then | |
adapter3index=${indexC1_R2} | |
elif [ $sample == "C2_R1" ]; then | |
adapter3index=${indexC2_R1} | |
elif [ $sample == "C2_R2" ]; then | |
adapter3index=${indexC2_R2} | |
elif [ $sample == "C3_R1" ]; then | |
adapter3index=${indexC3_R1} | |
elif [ $sample == "C3_R2" ]; then | |
adapter3index=${indexC3_R2} | |
elif [ $sample == "C4_R1" ]; then | |
adapter3index=${indexC4_R1} | |
elif [ $sample == "C4_R2" ]; then | |
adapter3index=${indexC4_R2} | |
else | |
echo "Could not assign 3' index adapter, exiting..." | |
exit 555 | |
fi | |
adapter3="${adapter3index}${a3}" | |
echo "processing $sample" | |
echo "5' adapter: ${adapter5}" | |
echo "3' adapter: ${adapter3}" | |
rd="${origdir}/${sample}.fastq" | |
outrd="${results}/${outsample}.fastq" | |
# process original BAM file; convert to FASTQ and do QC | |
$bam2fastq -q ${sample}.bam -o $rd | |
$fastqc --quiet --noextract -o $fastqcdir -t $threads $rd && $gzip -c $rd > ${origdir}/${sample}.fq.gz & | |
# end procesing original files | |
echo "cutadapt infile: $rd" | |
echo "cutadapt outfile: $outrd" | |
if ! [ -f "$rd" ]; then | |
echo "ERROR: $rd not available" | |
fi | |
set -x | |
# run cutadapt in single-end mode && rm non-clipped FASTQ file && run FastQC | |
$cutadapt -q 30 -m 25 -g $adapter5 -a $adapter3 -o $outrd $rd 1> cutadapt_${sample}.out 2> cutadapt_${sample}.err && rm $rd && $fastqc --quiet --noextract -o $fastqcdir -t $threads $outrd && $gzip $outrd & | |
set +x | |
done | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment