Created
March 15, 2024 02:13
-
-
Save Ge0rges/594f88f09d17b5d24cc0051f65c491e5 to your computer and use it in GitHub Desktop.
Make the MDMParis defense finder models compatible with Anvi'o.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Set the GitHub repository and destination folder | |
github_repo="mdmparis/defense-finder-models" | |
destination_folder="DefenseFinder/" | |
source_folder="defense-finder-models" | |
# Function to check if a file exists, and exit with a warning if it does | |
check_file_exists() { | |
if [ -e "$1" ]; then | |
echo "Warning: File $1 already exists. Exiting." | |
exit 1 | |
fi | |
} | |
# Function to download the latest release from GitHub | |
download_latest_release() { | |
repo_url="https://api.github.com/repos/$1/releases/latest" | |
download_url=$(curl -s "$repo_url" | grep "browser_download_url" | cut -d '"' -f 4) | |
wget -O latest_release.tar.gz "$download_url" | |
tar -xzf latest_release.tar.gz | |
rm latest_release.tar.gz | |
} | |
# Check if any of the required files already exist | |
check_file_exists "$destination_folder/genes.hmm" | |
check_file_exists "$destination_folder/kind.txt" | |
check_file_exists "$destination_folder/reference.txt" | |
check_file_exists "$destination_folder/target.txt" | |
check_file_exists "$destination_folder/noise_cutoff_terms.txt" | |
check_file_exists "$destination_folder/genes.txt" | |
# Download the latest release from GitHub | |
download_latest_release "$github_repo" | |
# Create the destination folder if it doesn't exist | |
mkdir -p "$destination_folder" | |
# Create kind.txt with "DefenseFinder" | |
echo "DefenseFinder" > "$destination_folder/kind.txt" | |
# Extract version number from metadata.yml | |
version_number=$(grep "vers" "$source_folder/metadata.yml" | cut -d ' ' -f2) | |
# Create reference.txt with "DefenseFinder" and version number | |
echo "DefenseFinder $version_number" > "$destination_folder/reference.txt" | |
# Create target.txt with "AA:GENE" | |
echo "AA:GENE" > "$destination_folder/target.txt" | |
# Create noise_cutoff_terms.txt with "-E 1e-25" | |
echo "-E 1e-25" > "$destination_folder/noise_cutoff_terms.txt" | |
# Create genes.txt with "gene accession hmmsource" | |
echo -e "gene\taccession\thmmsource" > "$destination_folder/genes.txt" | |
for file in "$source_folder/profiles"/*.hmm; do | |
base_name=$(basename "$file" .hmm) | |
# Extract gene names and count occurrences within the file | |
gene_counts=$(awk '/^NAME/ {if (++count[$2] == 1) print $2; else print $2"_"count[$2]}' "$file") | |
# Loop through each gene name and create a separate row in genes.txt | |
while read -r final_gene_name; do | |
echo -e "$final_gene_name\t$base_name\thttps://github.com/mdmparis/defense-finder-models" >> "$destination_folder/genes.txt" | |
done <<< "$gene_counts" | |
# Modify the NAME field in the .hmm file | |
awk '/^NAME/ {if (++count[$2] == 1) print $1, $2; else print $1, $2"_"count[$2]} !/^NAME/ {print $0}' "$file" > "$file.tmp" && mv "$file.tmp" "$file" | |
done | |
# Concatenate .hmm files into genes.hmm | |
cat "$source_folder/profiles"/*.hmm > "$destination_folder/genes.hmm" | |
# Compress genes.hmm using gzip | |
gzip "$destination_folder/genes.hmm" | |
# Delete | |
rm -r $source_folder | |
# Display completion message | |
echo "Script completed successfully. Files are in $destination_folder." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment