Created
April 1, 2014 09:34
-
-
Save nuada/9910959 to your computer and use it in GitHub Desktop.
Parallel blasting of bacterial sequences, inspired by: http://students.washington.edu/bowmanjs/wordpress/?p=737
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
number_of_jobs=4 | |
# Opts for fast blasting, http://www.biostars.org/p/16425/#16431 | |
blastopts="-num_alignments 20 -num_threads 4 -word_size 50" | |
# Job script | |
if [[ $1 ]]; then | |
# Extract sequences | |
for otu in $(cat $1); do | |
samtools faidx otus/rep_set/seqs_rep_set.fasta ${otu} >> $1.fasta | |
done | |
blastn -task megablast -db /resources/blast/nt -outfmt 5 -query $1.fasta -out $1.xml ${blastopts} | |
exit 0 | |
fi | |
# Index original FASTA | |
samtools faidx otus/rep_set/seqs_rep_set.fasta | |
# Find unassigned sequences and split into number_of_jobs | |
grep -i unassigned otus/uclust_assigned_taxonomy/seqs_rep_set_tax_assignments.txt | awk '{ print $1; }' >> unassigned.$$ | |
split -n l/${number_of_jobs} unassigned.$$ part | |
rm unassigned.$$ | |
# Submit jobs | |
for part in part??; do | |
qsub -terse -sync y -b y -V -cwd $0 ${part} & | |
done | |
wait | |
rm part?? part??.fasta |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment