Created
November 22, 2017 22:45
-
-
Save pansapiens/f899b20f6e9f8b88315aeb082b4211eb to your computer and use it in GitHub Desktop.
Download the latest Uniprot (Swissprot+Trembl) release, make a BLAST database
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
# Downloads the lastet release of Uniprot, putting it in a release-specific directory. | |
# Creates associated BLAST databases. | |
# We need makeblastdb on our PATH, somehow | |
# module load blast | |
# Better to use a stable DOWNLOAD_TMP name to support resuming downloads | |
#DOWNLOAD_TMP=$(mktemp -d _downloading-XXXX) | |
DOWNLOAD_TMP=_downloading | |
mkdir -p ${DOWNLOAD_TMP} | |
cd ${DOWNLOAD_TMP} | |
wget -c "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/RELEASE.metalink" | |
# Extract the release name (like 2017_10 or 2017_1) | |
RELEASE=$(grep -oP '(?<=<version>)(\d\d\d\d\_\d?\d)(?=<\/version>)' RELEASE.metalink) | |
wget -c "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz" | |
wget -c "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz" | |
wget -c "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt" | |
wget -c "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/README" | |
wget -c "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/LICENSE" | |
cd .. | |
mkdir ${RELEASE} | |
mv ${DOWNLOAD_TMP}/* ${RELEASE} | |
rmdir ${DOWNLOAD_TMP} | |
cd ${RELEASE} | |
gunzip uniprot_sprot.fasta.gz | |
gunzip uniprot_trembl.fasta.gz | |
cat uniprot_sprot.fasta uniprot_trembl.fasta >uniprot_${RELEASE}.fasta | |
makeblastdb -in uniprot_${RELEASE}.fasta -out uniprot_${RELEASE} -dbtype prot -parse_seqids -title uniprot_${RELEASE} | |
makeblastdb -in uniprot_sprot.fasta -out uniprot_sprot -dbtype prot -parse_seqids -title uniprot_sprot | |
makeblastdb -in uniprot_trembl.fasta -out uniprot_trembl -dbtype prot -parse_seqids -title uniprot_trembl | |
cd .. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment