Skip to content

Instantly share code, notes, and snippets.

@mtw
Created February 18, 2014 11:53
Show Gist options
  • Save mtw/9069560 to your computer and use it in GitHub Desktop.
Save mtw/9069560 to your computer and use it in GitHub Desktop.
Convert unmapped BAM files to FASTQ and remove adapters for single-end experiments
#!/bin/bash
cutadapt=`which cutadapt`
fastqc=`which fastqc`
bam2fastq=`which bam2fastq`
gzip=`which gzip`
origdir="."
results="cutadapt"
fastqcdir="${results}/FastQC"
adapter5="AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"
a3="CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC"
indexC1_R1="TGGTCA" # TGACCA
indexC1_R2="CTGATC" # GATCAG
indexC2_R1="ATTGGC" # GCCAAT
indexC2_R2="AAGCTA" # TAGCTT
indexC3_R1="GATCTG" # CAGATC
indexC3_R2="GTAGCC" # GGCTAC
indexC4_R1="TCAAGT" # ACTTGA
indexC4_R2="TACAAG" # CTTGTA
inprefix="C"
outprefix="D"
samples=4
froms=1
replicates=2
threads=30
if ! [ -d "$results" ]; then
mkdir -p $results
fi
if ! [ -d "$fastqcdir" ]; then
mkdir -p $fastqcdir
fi
for s in $(seq $froms $samples)
do
for r in $(seq 1 $replicates)
do
sample=${inprefix}${s}_R${r}
outsample=${outprefix}${s}_R${r}
if [ $sample == "C1_R1" ]; then
adapter3index=${indexC1_R1}
elif [ $sample == "C1_R2" ]; then
adapter3index=${indexC1_R2}
elif [ $sample == "C2_R1" ]; then
adapter3index=${indexC2_R1}
elif [ $sample == "C2_R2" ]; then
adapter3index=${indexC2_R2}
elif [ $sample == "C3_R1" ]; then
adapter3index=${indexC3_R1}
elif [ $sample == "C3_R2" ]; then
adapter3index=${indexC3_R2}
elif [ $sample == "C4_R1" ]; then
adapter3index=${indexC4_R1}
elif [ $sample == "C4_R2" ]; then
adapter3index=${indexC4_R2}
else
echo "Could not assign 3' index adapter, exiting..."
exit 555
fi
adapter3="${adapter3index}${a3}"
echo "processing $sample"
echo "5' adapter: ${adapter5}"
echo "3' adapter: ${adapter3}"
rd="${origdir}/${sample}.fastq"
outrd="${results}/${outsample}.fastq"
# process original BAM file; convert to FASTQ and do QC
$bam2fastq -q ${sample}.bam -o $rd
$fastqc --quiet --noextract -o $fastqcdir -t $threads $rd && $gzip -c $rd > ${origdir}/${sample}.fq.gz &
# end procesing original files
echo "cutadapt infile: $rd"
echo "cutadapt outfile: $outrd"
if ! [ -f "$rd" ]; then
echo "ERROR: $rd not available"
fi
set -x
# run cutadapt in single-end mode && rm non-clipped FASTQ file && run FastQC
$cutadapt -q 30 -m 25 -g $adapter5 -a $adapter3 -o $outrd $rd 1> cutadapt_${sample}.out 2> cutadapt_${sample}.err && rm $rd && $fastqc --quiet --noextract -o $fastqcdir -t $threads $outrd && $gzip $outrd &
set +x
done
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment