Last active
March 16, 2016 06:22
-
-
Save jerowe/17467d609e8b809d64a5 to your computer and use it in GitHub Desktop.
Reference genomes with lots of things
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| DATE=`date +%Y-%m-%d--%H-%M-%S` | |
| export RefGenDir="/scratch/Reference_Genomes/Public" | |
| export SoftwareDir="/scratch/gencore/software/bin/build_indices" | |
| read -r -d '' TEMPLATE <<EOF | |
| #HPC queue=adserial | |
| #HPC commands_per_node=12 | |
| #HPC cpus_per_task=12 | |
| #HPC procs=12 | |
| EOF | |
| ############################ | |
| # Exported Functions | |
| ############################ | |
| function gatk_index(){ | |
| REF=$1 | |
| DIR=$(dirname $1) | |
| BASE=$(basename $1) | |
| CWD=$(pwd) | |
| uncompress_index $REF | |
| DICT=`echo $BASE | sed 's/.fa//'` | |
| DICT=`echo $BASE | sed 's/.fna//'` | |
| DICT=`echo $BASE | sed 's/.fasta//'` | |
| cat <<EOF | |
| java -jar /scratch/gencore/software/picard/2.0.1/picard.jar CreateSequenceDictionary \\ | |
| REFERENCE=${BASE} \\ | |
| OUTPUT=${DICT}.dict; \\ | |
| cd $CWD | |
| EOF | |
| } | |
| function samtools_index(){ | |
| REF=$1 | |
| DIR=$(dirname $1) | |
| BASE=$(basename $1) | |
| CWD=$(pwd) | |
| uncompress_index $REF | |
| cat <<EOF | |
| samtools faidx $BASE; \\ | |
| cd $CWD | |
| EOF | |
| } | |
| function bowtie_index(){ | |
| REF=$1 | |
| DIR=$(dirname $1) | |
| BASE=$(basename $1) | |
| CWD=$(pwd) | |
| uncompress_index $REF | |
| cat <<EOF | |
| bowtie2-build $BASE $BASE; \\ | |
| cd $CWD | |
| EOF | |
| } | |
| function bwa_index(){ | |
| REF=$1 | |
| DIR=$(dirname $1) | |
| BASE=$(basename $1) | |
| CWD=$(pwd) | |
| uncompress_index $REF | |
| cat <<EOF | |
| /scratch/gencore/software/bin/bwa/0.7.12/bin/bwa index $BASE; \\ | |
| cd $CWD | |
| EOF | |
| } | |
| function uncompress_index(){ | |
| REF=$1 | |
| DIR=$(dirname $1) | |
| cat <<EOF | |
| cd $DIR && \\ | |
| EOF | |
| if [ -f "${REF}.gz" ]; then | |
| cat <<EOF | |
| gzip -d $REF.gz && \\ | |
| EOF | |
| fi | |
| } | |
| export -f gatk_index | |
| export -f samtools_index | |
| export -f bwa_index | |
| export -f bowtie_index | |
| export -f uncompress_index | |
| ############################ | |
| # Main Functions | |
| ############################ | |
| function find_refs(){ | |
| ext=$1 | |
| index=$2 | |
| process=$3 | |
| find `pwd` -name *${ext}.gz -o -name *.${ext} -print0 | sed 's/.${ext}.gz/.${ext}/g' | xargs -0 -I {} bash -c "if [ ! -f "{}.${index}" ]; then ${process}_index {}; fi" >> "/scratch/gencore/software/bin/build_indices/build_${process}.sh" | |
| ##Force Reindex | |
| #find `pwd` -name *${ext}.gz -o -name *.${ext} -print0 | sed 's/.${ext}.gz/.${ext}/g' | xargs -0 -I {} bash -c "${process}_index {}" >> "/scratch/gencore/software/bin/build_indices/build_${process}.sh" | |
| } | |
| function process_jobs() { | |
| index=$1 | |
| process=$2 | |
| EXTS=('fa' 'fna' 'fasta') | |
| for i in "${EXTS[@]}"; do | |
| find_refs $i $index $process | |
| done | |
| } | |
| function submit_jobs(){ | |
| process=$1 | |
| module purge && module load NYUAD/2.0 apps anyenv | |
| pbsrunner.pl --infile "/scratch/gencore/software/bin/build_indices/build_${process}.sh" --outdir "/scratch/gencore/pbslogs/${DATE}_${process}_index" --jobname "${process}_index" | |
| } | |
| ############################ | |
| # Main Create Indices | |
| ############################ | |
| ############################ | |
| # Bowtie | |
| ############################ | |
| cd /scratch/Reference_Genomes/Public | |
| process='bowtie' | |
| index='1.bt2' | |
| cat > ${SoftwareDir}/build_${process}.sh <<EOF | |
| $TEMPLATE | |
| #HPC module=NYUAD/2.0 apps tuxedo/2.0 | |
| EOF | |
| process_jobs $index $process | |
| submit_jobs $process | |
| ############################ | |
| #BWA | |
| ############################ | |
| cd /scratch/Reference_Genomes/Public | |
| process='bwa' | |
| index='bwa' | |
| cat > ${SoftwareDir}/build_${process}.sh <<EOF | |
| $TEMPLATE | |
| #HPC module=NYUAD/2.0 apps anyenv/1 | |
| EOF | |
| process_jobs $index $process | |
| submit_jobs $process | |
| ############################ | |
| ##GATK | |
| ############################ | |
| cd /scratch/Reference_Genomes/Public | |
| process='gatk' | |
| index='dict' | |
| cat > ${SoftwareDir}/build_${process}.sh <<EOF | |
| $TEMPLATE | |
| #HPC module=NYUAD/2.0 gcc zlib openssl anyenv/1 jdk | |
| EOF | |
| process_jobs $index $process | |
| submit_jobs $process | |
| ############################ | |
| ##Samtools | |
| ############################ | |
| process='samtools' | |
| index='fai' | |
| cd /scratch/Reference_Genomes/Public | |
| cat > ${SoftwareDir}/build_${process}.sh <<EOF | |
| $TEMPLATE | |
| #HPC module=NYUAD/2.0 gcc zlib openssl anyenv/1 ncurses samtools | |
| EOF | |
| process_jobs $index $process | |
| submit_jobs $process |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment