Last active
March 29, 2019 06:51
-
-
Save genomewalker/1666d6565e6c2c848fbca6230ef37f00 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env Rscript | |
# Check if basic packages are installed ----------------------------------- | |
is.installed <- function(pkg){ | |
is.element(pkg, installed.packages()[,1]) | |
} | |
if (!is.installed("crayon") || !is.installed("optparse")){ | |
cat("We will try to install the packages crayon and optparse... (this will be only be done once)\n") | |
Sys.sleep(5) | |
if (!is.installed("crayon")){ | |
suppressMessages(install.packages("crayon", repos = "http://cran.us.r-project.org")) | |
} | |
suppressMessages(library(crayon)) | |
if (!is.installed("optparse")){ | |
suppressMessages(install.packages("optparse", repos = "http://cran.us.r-project.org")) | |
} | |
} | |
suppressMessages(library(crayon)) | |
suppressMessages(library(optparse)) | |
# Check if packaged are installed ----------------------------------------- | |
cat("\nChecking if all packages are installed...\n\n") | |
needed = c("ape", "tidyverse", "pbmcapply", "phangorn", "maditr", "data.table") | |
missing_package <- FALSE | |
# For loop to run through each of the packages | |
for (p in 1:length(needed)){ | |
if(is.installed(needed[p])){ | |
cat(sprintf("%-10s: %s", needed[p], green("Installed\n"))) | |
}else{ | |
cat(sprintf("%-10s: %s", needed[p], red("Not installed\n"))) | |
missing_package <- TRUE | |
} | |
} | |
quit_not_installed <- function(){ | |
cat("\nMissing packages, please install them.\n") | |
quit(save = "no", status = 1) | |
} | |
if (missing_package) { | |
quit_not_installed() | |
}else{ | |
cat("\nAll packages installed.\n") | |
} | |
Sys.sleep(2) | |
system("clear") | |
# Script command line options --------------------------------------------- | |
option_list = list( | |
make_option(c("-t", "--tree"), type="character", default=NULL, | |
help="Tree file name", metavar="character"), | |
make_option(c("-d", "--data"), type="character", default=NULL, | |
help="Contextual data filename", metavar="character"), | |
make_option(c("-p", "--threads"), type="integer", default=1, | |
help="Number of threads [default= %default]", metavar="integer"), | |
make_option(c("-o", "--out"), type="character", default="annotation.tsv", | |
help="Iutput file name [default= %default]", metavar="character") | |
) | |
opt_parser = OptionParser(option_list=option_list); | |
opt = parse_args(opt_parser); | |
if (is.null(opt$tree) || is.null(opt$data)){ | |
print_help(opt_parser) | |
stop("At least two arguments must be supplied (tree file and contextual data files).n", call.=FALSE) | |
} | |
suppressMessages(library(ape)) | |
suppressMessages(library(tidyverse)) | |
suppressMessages(library(pbmcapply)) | |
suppressMessages(library(maditr)) | |
suppressMessages(library(data.table)) | |
# Do stuff ---------------------------------------------------------------- | |
ncores <- opt$threads | |
cat(paste0("\nReading tree ", opt$tree, "...")) | |
pr_tree <- read.tree(file = opt$tree) | |
cat(green(" done\n")) | |
cat(paste(" Tree in file", opt$tree, "has", yellow(scales::comma(Ntip(pr_tree))), "tips and", yellow(scales::comma(Nedge(pr_tree))), "edges\n\n")) | |
cat(paste0("\nReading contextual data ", opt$data, "...")) | |
suppressMessages(pr_cdata <- read_tsv(opt$data, col_names = FALSE) %>% | |
rename(tip.label = X1, supercluster = X2)) | |
cat(paste("done\n File", opt$data, "has", yellow(scales::comma(nrow(pr_cdata))), "rows and", yellow(scales::comma(ncol(pr_cdata))), "columns\n")) | |
cat("\nExtracting inserted sequences from tree...") | |
pr_inserted <- tibble(tip.label = pr_tree$tip.label) %>% | |
filter(grepl("seq", tip.label)) | |
cat(paste0("done\n ", yellow(scales::comma(nrow(pr_inserted)))," sequencs extracted\n\n")) | |
pairs <- (Ntip(pr_tree) * (Ntip(pr_tree) - 1))/2 | |
cat(paste0("Calculating cophenetic distance (", yellow(scales::comma(pairs))," pairs)... ")) | |
coph <- cophenetic.phylo(pr_tree) %>% | |
as.dist() %>% | |
broom::tidy() | |
cat(green("done\n")) | |
majority_vote <- function (x, seed = 12345) { | |
set.seed(seed) | |
whichMax <- function(x) { | |
m <- seq_along(x)[x == max(x, na.rm = TRUE)] | |
if (length(m) > 1) | |
sample(m, size = 1) | |
else m | |
} | |
x <- as.vector(x) | |
tab <- table(x) | |
m <- whichMax(tab) | |
out <- list(table = tab, ind = m, majority = names(tab)[m]) | |
return(out) | |
} | |
assign_orfs <- function(X, phy = phy, data = tips, coph = coph, n_mrca = n_mrca){ | |
top <- coph %>% | |
dt_filter(item1 == X | item2 == X) %>% | |
dt_mutate(item1 = as.character(item1), item2 = as.character(item2)) %>% | |
as_tibble() %>% | |
rowwise() %>% | |
mutate(item2 = ifelse(item1 != X, item1, item2), item1 = X) %>% | |
inner_join(data, by = c( "item2" = "tip.label")) %>% | |
arrange((distance)) %>% | |
head(n_mrca) | |
# top <- coph %>% | |
# dt_filter(item1 == X | item2 == X) | |
# f_q <- quantile(top$distance)[[2]] | |
# top <- top %>% | |
# dt_filter(distance <= f_q) %>% | |
# dt_mutate(item1 = as.character(item1), item2 = as.character(item2)) | |
# top <- top[, c("item1", "item2") := list(X, ifelse(item1 != X, item1, item2)), by = .I] %>% | |
# dt_inner_join(data, by = c( "item2" = "tip.label")) %>% | |
# arrange((distance)) %>% | |
# as_tibble() %>% | |
# head(n_mrca) | |
bestmrca <- getMRCA(phy, unique(c(top$item1, top$item2))) | |
mrcatips <- phy$tip.label[unlist(phangorn::Descendants(phy, bestmrca, type = "tips"))] | |
if(length(data %>% filter(tip.label %in% mrcatips) %>% .$supercluster %>% unique()) != 1){ | |
annot <- majority_vote(data %>% filter(tip.label %in% mrcatips) %>% .$supercluster)$majority | |
majority <- TRUE | |
}else{ | |
annot <- data %>% filter(tip.label %in% mrcatips) %>% .$supercluster %>% unique() | |
majority <- FALSE | |
} | |
tibble(tip.label = X, supercluster = annot, majority = majority) | |
} | |
cat(paste0("Annotating ORFs with ", yellow(ncores) ," cores...\n")) | |
orf_annotation <- pbmcapply::pbmclapply(pr_inserted$tip.label, | |
assign_orfs, | |
phy = pr_tree, | |
data = pr_cdata, | |
coph = coph %>% as.data.table(), | |
n_mrca = 1, | |
mc.cores = ncores, | |
ignore.interactive = T, | |
max.vector.size = 1e7) %>% | |
bind_rows() | |
cat(paste0("Exporting annotations to file ", silver(opt$out), "... ")) | |
write_tsv(orf_annotation, path = opt$out, col_names = FALSE) | |
cat(green("done\n\n")) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env Rscript | |
# Check if basic packages are installed ----------------------------------- | |
is.installed <- function(pkg){ | |
is.element(pkg, installed.packages()[,1]) | |
} | |
if (!is.installed("crayon") || !is.installed("optparse")){ | |
cat("We will try to install the packages crayon and optparse... (this will be only be done once)\n") | |
Sys.sleep(5) | |
if (!is.installed("crayon")){ | |
suppressMessages(install.packages("crayon", repos = "http://cran.us.r-project.org")) | |
} | |
suppressMessages(library(crayon)) | |
if (!is.installed("optparse")){ | |
suppressMessages(install.packages("optparse", repos = "http://cran.us.r-project.org")) | |
} | |
} | |
suppressMessages(library(crayon)) | |
suppressMessages(library(optparse)) | |
# Check if packaged are installed ----------------------------------------- | |
cat("\nChecking if all packages are installed...\n\n") | |
needed = c("magrittr", "tidyverse", "pbmcapply", "maditr", "data.table") | |
missing_package <- FALSE | |
# For loop to run through each of the packages | |
for (p in 1:length(needed)){ | |
if(is.installed(needed[p])){ | |
cat(sprintf("%-10s: %s", needed[p], green("Installed\n"))) | |
}else{ | |
cat(sprintf("%-10s: %s", needed[p], red("Not installed\n"))) | |
missing_package <- TRUE | |
} | |
} | |
quit_not_installed <- function(){ | |
cat("\nMissing packages, please install them.\n") | |
quit(save = "no", status = 1) | |
} | |
if (missing_package) { | |
quit_not_installed() | |
}else{ | |
cat("\nAll packages installed.\n") | |
} | |
Sys.sleep(2) | |
system("clear") | |
# Script command line options --------------------------------------------- | |
option_list = list( | |
make_option(c("-d", "--data"), type="character", default=NULL, | |
help="ORF data filename", metavar="character"), | |
make_option(c("-p", "--threads"), type="integer", default=1, | |
help="Number of threads [default= %default]", metavar="integer"), | |
make_option(c("-c", "--components"), type="character", default=NULL, | |
help="Iutput file name [default= %default]", metavar="character"), | |
make_option(c("-o", "--out"), type="character", default="alluvial.tsv", | |
help="Output file name [default= %default]", metavar="character") | |
) | |
opt_parser = OptionParser(option_list=option_list); | |
opt = parse_args(opt_parser); | |
if (is.null(opt$data)){ | |
print_help(opt_parser) | |
stop("At least one arguments must be supplied (tree file and contextual data files).n", call.=FALSE) | |
} | |
suppressMessages(library(magrittr)) | |
suppressMessages(library(tidyverse)) | |
suppressMessages(library(pbmcapply)) | |
suppressMessages(library(maditr)) | |
suppressMessages(library(data.table)) | |
ncores <- opt$threads | |
cat(paste0("\nReading file ", opt$data, "...")) | |
suppressMessages(cl_tax_orfs <- read_tsv(opt$data, col_names = TRUE) %>% | |
mutate(cl_name = as.character(cl_name))) | |
cat(green(" done\n")) | |
cat(paste(" File", opt$data, "has", nrow(cl_tax_orfs), "rows and", ncol(cl_tax_orfs), "columns\n\n")) | |
cat(paste0("\nReading file ", opt$components, "...")) | |
suppressMessages(cl_components <- read_tsv(opt$components, col_names = TRUE) %>% | |
mutate(cl_name = as.character(cl_name))) | |
cat(green(" done\n")) | |
cat(paste(" File", opt$components, "has", nrow(cl_components), "rows and", ncol(cl_components), "columns\n\n")) | |
majority_vote <- function (x, seed = 12345) { | |
set.seed(seed) | |
whichMax <- function(x) { | |
m <- seq_along(x)[x == max(x, na.rm = TRUE)] | |
if (length(m) > 1) | |
sample(m, size = 1) | |
else m | |
} | |
x <- as.vector(x) | |
tab <- table(x) | |
m <- whichMax(tab) | |
out <- list(table = tab, ind = m, majority = names(tab)[m]) | |
return(out) | |
} | |
# Analyse annotations ----------------------------------------------------- | |
# cl_tax_orfs %>% | |
# group_by(cl_name, category) %>% | |
# count() %>% | |
# arrange(desc(n)) %>% | |
# group_by(category) %>% | |
# skimr::skim() | |
propagate_annotation <-function(X, data = data){ | |
cls <- data %>% | |
dplyr::filter(cl_name == X) | |
consensus_superkingdom <- cls %>% | |
dplyr::filter(!is.na(superkingdom)) %>% | |
summarise(consensus_superkingdom = ifelse(n() < 1, NA, majority_vote(superkingdom)$majority)) %>% .$consensus_superkingdom | |
consensus_phylum <- cls %>% | |
dplyr::filter(superkingdom == consensus_superkingdom, | |
!is.na(phylum)) %>% | |
summarise(consensus_phylum = ifelse(n() < 1, paste(consensus_superkingdom, "NA", sep = "_"), majority_vote(phylum)$majority)) %>% .$consensus_phylum | |
consensus_class <- cls %>% | |
dplyr::filter(superkingdom == consensus_superkingdom, | |
phylum == consensus_phylum, | |
!is.na(class)) %>% | |
summarise(consensus_class = ifelse(n() < 1, paste(consensus_phylum, "NA", sep = "_"), majority_vote(class)$majority)) %>% .$consensus_class | |
consensus_order <- cls %>% | |
dplyr::filter(superkingdom == consensus_superkingdom, | |
phylum == consensus_phylum, | |
class == consensus_class, | |
!is.na(order)) %>% | |
summarise(consensus_order = ifelse(n() < 1, paste(consensus_class, "NA", sep = "_"), majority_vote(order)$majority)) %>% .$consensus_order | |
consensus_family <- cls %>% | |
dplyr::filter(superkingdom == consensus_superkingdom, | |
phylum == consensus_phylum, | |
class == consensus_class, | |
order == consensus_order, | |
!is.na(family)) %>% | |
summarise(consensus_family = ifelse(n() < 1, paste(consensus_order, "NA", sep = "_"), majority_vote(family)$majority)) %>% .$consensus_family | |
consensus_genus <- cls %>% | |
dplyr::filter(superkingdom == consensus_superkingdom, | |
phylum == consensus_phylum, | |
class == consensus_class, | |
order == consensus_order, | |
family == consensus_family, | |
!is.na(genus)) %>% | |
summarise(consensus_genus = ifelse(n() < 1, paste(consensus_family, "NA", sep = "_"), majority_vote(genus)$majority)) %>% .$consensus_genus | |
tibble(cl_name = X, consensus_superkingdom, consensus_phylum, consensus_class, | |
consensus_order, consensus_family, consensus_genus) | |
} | |
cat(paste("Propagating taxonomic annotations at cluster level using", cyan(ncores), "cores...\n")) | |
cl_tax_consensus <- pbmcapply::pbmclapply(cl_tax_orfs$cl_name %>% unique(), | |
propagate_annotation, data = cl_tax_orfs, | |
mc.cores = ncores, | |
ignore.interactive = T, | |
max.vector.size = 1e7) %>% | |
bind_rows() | |
tax_ranks <- c("consensus_superkingdom", "consensus_phylum", "consensus_class", "consensus_order", "consensus_family", "consensus_genus") | |
cat(paste0("Propagation at taxonomic annotations at cluster level... ", green("done\n\n"))) | |
# Quick look to the consensus annotations --------------------------------- | |
#map(tax_ranks, function(X){cl_tax_consensus %>% group_by_(X) %>% count(sort = TRUE) %>% ungroup()}) | |
#cl_tax_consensus %>% filter(is.na(consensus_phylum)) | |
# Write results ----------------------------------------------------------- | |
# cl_tax_orfs %>% | |
# select(supercluster, cl_name) %>% | |
# inner_join(cl_tax_consensus %>% select(cl_name, consensus_class, consensus_phylum, consensus_superkingdom)) %>% | |
# write_tsv("~/Downloads/pr2alluvial.tsv") | |
# Annotate at the ORF level ----------------------------------------------- | |
# Uses the data generated above | |
propagate_annotation_na <-function(X, data = data){ | |
cls <- data[X,] %>% | |
select(genus, family, order, class, phylum, superkingdom, orf, cl_name, supercluster) | |
consensus_superkingdom <- cls %>% | |
summarise(consensus_superkingdom = ifelse(is.na(superkingdom), NA, superkingdom))%>% .$consensus_superkingdom | |
consensus_phylum <- cls %>% | |
summarise(consensus_phylum = ifelse(is.na(phylum), paste(consensus_superkingdom, "NA", sep = "_"), phylum)) %>% .$consensus_phylum | |
consensus_class <- cls %>% | |
summarise(consensus_class = ifelse(is.na(class), paste(consensus_phylum, "NA", sep = "_"), class)) %>% .$consensus_class | |
consensus_order <- cls %>% | |
summarise(consensus_order = ifelse(is.na(order), paste(consensus_class, "NA", sep = "_"), order)) %>% .$consensus_order | |
consensus_family <- cls %>% | |
summarise(consensus_family = ifelse(is.na(family), paste(consensus_order, "NA", sep = "_"), family)) %>% .$consensus_family | |
consensus_genus <- cls %>% | |
dplyr::filter(superkingdom == consensus_superkingdom, | |
phylum == consensus_phylum, | |
class == consensus_class, | |
order == consensus_order, | |
family == consensus_family, | |
!is.na(genus)) %>% | |
summarise(consensus_genus = ifelse(n() < 1, paste(consensus_family, "NA", sep = "_"), majority_vote(genus)$majority)) %>% .$consensus_genus | |
tibble(supercluster = cls$supercluster, orf = cls$orf, cl_name = cls$cl_name, consensus_superkingdom, consensus_phylum, consensus_class, | |
consensus_order, consensus_family, consensus_genus) | |
} | |
cat("Collecting consensus annotations... ") | |
pr_clusters_consensus <- cl_tax_orfs %>% | |
select(supercluster, cl_name) %>% | |
inner_join(cl_tax_consensus %>% select(cl_name, consensus_superkingdom, consensus_phylum, consensus_class, consensus_order, consensus_family, consensus_genus), by = "cl_name") | |
cat(green("done"),"\nCollecting ORFs with taxonomic annotations... ") | |
pr_clusters_no_na <- cl_tax_orfs %>% | |
filter(!(is.na(superkingdom) | is.na(phylum))) | |
cat(green("done"), "\nCollecting ORFs without taxonomic annotations... ") | |
cl_tax_consensus_na <- cl_tax_orfs %>% | |
filter(is.na(superkingdom) | is.na(phylum)) %>% | |
select(supercluster, cl_name, orf) %>% | |
unique() %>% | |
as.data.table() %>% | |
dt_inner_join(pr_clusters_consensus %>% select(-supercluster), by = "cl_name") %>% unique() %>% as_tibble() | |
cat(green("done"),"\nPropagating taxonomic annotations at the ORF level using", cyan(ncores), "cores... ") | |
cl_tax_consensus_no_na <- pbmclapply(1:nrow(pr_clusters_no_na), | |
propagate_annotation_na, | |
data = pr_clusters_no_na, | |
mc.cores = ncores, | |
ignore.interactive = T, | |
max.vector.size = 1e7) %>% | |
bind_rows() | |
cat(paste0("Propagation of taxonomic annotations at the ORF level... ", green("done"), "\n\nExporting data for alluvial plot drawing to file ", silver(opt$out), "... ")) | |
bind_rows(cl_tax_consensus_no_na, | |
cl_tax_consensus_na) %>% | |
left_join(cl_components, by = "cl_name") %>% | |
write_tsv(opt$out) | |
cat(green("done\n")) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import argparse | |
from Bio import AlignIO | |
import sys | |
version = "0.1.1 (28.11.17)" | |
name = os.path.basename(sys.argv[0]) #get scriptname from actual script filename that was called | |
parser=argparse.ArgumentParser(description="Converts alignments in FastaFormat to (strict & interleaved; relaxed & interleaved if '-r' is set) phylip format. Will raise error if alignments contain dots (\".\"), so replace those with dashes (\"-\") beforehand (e.g. using sed)") | |
parser.add_argument('-i','--input', action = "store", dest = "input", required = True, help = "(aligned) input fasta") | |
parser.add_argument('-o','--output', action = "store", dest = "output", help = "Output filename (default = <Input-file>.phylip") | |
parser.add_argument('-r','--relaxed', action = "store_true", dest = "relaxed", default = False, help = "output in \"relaxed\" phylip format. (default = False --> Output is strict phylip") | |
args=parser.parse_args() | |
if not args.output: | |
args.output = args.input + ".phylip" #if no outputname was specified set outputname based on inputname | |
def main(): | |
infile = open(args.input, "r") | |
outfile = open(args.output, "w") | |
alignments = AlignIO.parse(infile, "fasta") | |
if args.relaxed: | |
AlignIO.write(alignments, outfile, "phylip-relaxed") | |
else: | |
AlignIO.write(alignments, outfile, "phylip") | |
infile.close() | |
outfile.close() | |
sys.stderr.write("\nfinished\n") | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
function error_handler() { | |
printf "${RED}ERROR:${NC} Error occurred in script at line: ${RED}${1}${NC}\n" | |
printf "${RED}ERROR:${NC} Line exited with status: ${RED}${2}${NC}\n" | |
exit | |
} | |
trap 'error_handler ${LINENO} $?' ERR | |
declare -r GREEN='\033[1;32m' | |
declare -r RED='\033[1;31m' | |
declare -r YELLOW='\033[1;33m' | |
declare -r CYAN='\033[1;36m' | |
declare -r PURPLE='\033[1;35m' | |
# declare -r BLUE='\033[1;34m' | |
declare -r GREY='\033[1;30m' | |
declare -r NC='\033[0m' | |
declare -r SEQKITBIN=seqkit | |
declare -r TAXACONBIN=/scratch/antonio/PRs/WF/EPA-NG/taxnameconvert-2.4/taxnameconvert.pl | |
declare -r MODELTBIN=/scratch/antonio/PRs/WF/EPA-NG/modeltest-ng/bin/modeltest-ng | |
declare -r RAXMLBIN=~/opt/standard-RAxML/raxmlHPC-PTHREADS-AVX2 | |
declare -r F2PBIN=~/opt/scripts/fasta2phylip.py | |
declare -r PAPABIN=~/opt/papara_nt/papara | |
declare -r EPABIN=/scratch/antonio/PRs/WF/epa-ng/bin/epa-ng | |
declare -r ODSEQBIN=~/opt/OD-Seq_mg/OD-seq | |
declare -r TAXITBIN=taxit | |
declare -r PPLACERBIN=pplacer | |
declare -r GAPPABIN=/scratch/antonio/PRs/WF/gappa/bin/gappa | |
declare -r CDHITBIN=cdhit | |
declare -r ORFANNOT=~/opt/scripts/annotate_query_sequences.R | |
declare -r THREADS=64 | |
declare -r GUPPYBIN=guppy | |
declare -r ALLUVIALBIN=~/opt/scripts/create_alluvial.R | |
#declare -r COMPFILE=k_components.tsv | |
# Files used during the analysis | |
# | |
# Basic files | |
model_files=(MT.ckp MT.log MT.out) | |
raxml_files=(RAxML_binaryModelParameters.OPT RAxML_info.OPT RAxML_log.OPT RAxML_result.OPT) | |
papara_files=(MICrhoDE_prot_aligned.fixed.phy query.0.fasta query.0.core.fasta query.0.outlier.fasta query.fasta papara_alignment.ALN papara_alignment.ALN0 papara_log.ALN papara_log.ALN0 papara_quality.ALN papara_quality.ALN0) | |
epang_files=(EPA_PR/epa_info.log EPA_PR/epa_result.jplace) | |
pplacer_files=(query.PR.jplace) | |
#gappa_files_e=(epa-ng_labelled_tree epa-ng_per_pquery_assign epa-ng_profile.csv MICrhoDE_prot_aligned.query-SC.epa-ng.tsv) | |
#gappa_files_p=(pplacer_labelled_tree pplacer_per_pquery_assign pplacer_profile.csv MICrhoDE_prot_aligned.query-SC.pplacer.tsv) | |
basic_files_download=(k_tax_stat.tsv.gz kwp_tax_stat.tsv.gz gu_tax_stat.tsv.gz k_cl_orfs.tsv.gz kwp_cl_orfs.tsv.gz gu_cl_orfs.tsv.gz) | |
basic_files=(k_tax_stat.tsv.gz kwp_tax_stat.tsv.gz gu_tax_stat.tsv.gz k_cl_orfs.tsv.gz kwp_cl_orfs.tsv.gz gu_cl_orfs.tsv.gz k_pr_tax.tsv kwp_pr_tax.tsv gu_pr_tax.tsv k_pr_orfs.tsv kwp_pr_orfs.tsv gu_pr_orfs.tsv) | |
graft_files_e=(grafted_tree.epa-ng.tre) | |
graft_files_p=(grafted_tree.pplacer.tre) | |
annot_files_e=(epa-ng_per_pquery_assign.tsv) | |
annot_files_p=(pplacer_per_pquery_assign.tsv) | |
dedup_files=(all_sequences_names_to_extract.dedup all_sequences_names_to_extract.dedup.clstr) | |
itol_files=(MICrhoDE_itol_strip.txt MICrhoDE_SC_colors.txt MICrhoDE_query-SC_colors.txt MICrhoDE_itol_strip_query-SC.epa-ng.txt MICrhoDE_itol_strip_query-SC.pplacer.txt) | |
alluvial_files=(PR_tax_orf.pplacer.tsv PR_tax_orf.pplacer.tsv PR_tax_orf.epa-ng.tsv PR_tax_orf.epa-ng.tsv alluvial_data.pplacer.tsv alluvial_data.epa-ng.tsv) | |
function create_alluvial (){ | |
cat <(printf "short_name\torf\tcl_name\ttaxid\tfunc\tsize\tspecies\tgenus\tfamily\torder\tclass\tphylum\tsuperkingdom\tcategory\tsupercluster\n") > PR_tax_orf."${1}".tsv | |
join -t $'\t' -1 13 -2 1 <(sort -t $'\t' -k13,13 <(cat <(awk '{print $0"\tk"}' k_pr_tax.tsv) <(awk '{print $0"\tkwp"}' kwp_pr_tax.tsv) <(awk '{print $0"\tgu"}' gu_pr_tax.tsv))) <(sort -t $'\t' -k1,1 <(cat "${1}"_per_pquery_assign.tsv | tr ';' $'\t' | cut -f1,2)) >> PR_tax_orf."${1}".tsv | |
cat <(printf "orf\tcl_name\tshort_name\tcategory\n") <(awk '{print $0"\tk"}' k_pr_orfs.tsv) <(awk '{print $0"\tkwp"}' kwp_pr_orfs.tsv) <(awk '{print $0"\tgu"}' gu_pr_orfs.tsv) > PR_cl_orf."${1}".tsv | |
"${ALLUVIALBIN}" -d PR_tax_orf."${1}".tsv -p 64 -c "${COMPFILE}" -o alluvial_data."${1}".tsv | |
} | |
function get_time() { | |
TIME=$(date +%T) | |
printf "[${GREY}${TIME}${NC}]" | |
} | |
function gappa_assign () { | |
"${GAPPABIN}" analyze assign --jplace-path "${1}" --taxon-file MICrhoDE_prot_aligned.SC.tsv | |
sed ':a;N;/\nseq/!s/\n/\t/;ta;P;D' per_pquery_assign | awk -vFS="\t" '{if(NF > 2){print $1"\t"$6}else{print $1"\tNO_ASSIGN"}}' > MICrhoDE_prot_aligned.query-SC."${2}".tsv | |
mv labelled_tree "${2}"_labelled_tree | |
mv profile.csv "${2}"_profile.csv | |
mv per_pquery_assign "${2}"_per_pquery_assign | |
} | |
function graft_tree () { | |
"${GUPPYBIN}" tog -o grafted_tree."${2}".tre "${1}" | |
} | |
function orf_assign (){ | |
"${ORFANNOT}" -t "${1}" -d "${2}" -p "${THREADS}" -o "${3}"_per_pquery_assign.tsv | |
} | |
function check_files_missing(){ | |
files=("$@") | |
ALL=0 | |
for file in "${files[@]}"; do | |
if [ ! -f "${file}" ]; then | |
ALL=1 | |
fi | |
done | |
echo "${ALL}" | |
} | |
function create_basic_data (){ | |
# Generate files for alluvial plots | |
# Prepare results from HMMER | |
# Get consensus taxonomy for each orf | |
files=("$@") | |
printf "$(get_time) Downloading cluster data from ${YELLOW}metagenomics.eu${NC}..." | |
for file in "${files[@]}"; do | |
URL=http://files.metagenomics.eu/unknowns/ | |
wget -qN "${URL}"/"${file}" | |
done | |
printf " ${GREEN}done${NC}\n" | |
printf "$(get_time) Filtering taxonomic information ${RED}[get a coffee]${NC}..." | |
join -t $'\t' -1 2 -2 1 <(zcat k_tax_stat.tsv.gz | sort --parallel="${THREADS}" -S20% -k2,2) <(sort -k1,1 "${NAM}".name_mapping.tsv) > k_pr_tax.tsv | |
join -t $'\t' -1 2 -2 1 <(zcat kwp_tax_stat.tsv.gz | sort --parallel="${THREADS}" -S20% -k2,2) <(sort -k1,1 "${NAM}".name_mapping.tsv) > kwp_pr_tax.tsv | |
join -t $'\t' -1 2 -2 1 <(zcat gu_tax_stat.tsv.gz | sort --parallel="${THREADS}" -S20% -k2,2) <(sort -k1,1 "${NAM}".name_mapping.tsv) > gu_pr_tax.tsv | |
printf " ${GREEN}done${NC}\n" | |
printf "$(get_time) Adding cluster information to query sequences ${RED}[get a coffee]${NC}..." | |
join -t $'\t' -1 2 -2 1 <(zcat k_cl_orfs.tsv.gz | tr ' ' $'\t' | sort --parallel="${THREADS}" -S20% -k2,2) <(sort -k1,1 "${NAM}".name_mapping.tsv) > k_pr_orfs.tsv | |
join -t $'\t' -1 2 -2 1 <(zcat kwp_cl_orfs.tsv.gz | tr ' ' $'\t' | sort --parallel="${THREADS}" -S20% -k2,2) <(sort -k1,1 "${NAM}".name_mapping.tsv) > kwp_pr_orfs.tsv | |
join -t $'\t' -1 2 -2 1 <(zcat gu_cl_orfs.tsv.gz | tr ' ' $'\t' | sort --parallel="${THREADS}" -S20% -k2,2) <(sort -k1,1 "${NAM}".name_mapping.tsv) > gu_pr_orfs.tsv | |
printf " ${GREEN}done${NC}\n" | |
} | |
function create_itol_data () { | |
paste <(cut -f2 MICrhoDE_prot_aligned.SC.0.tsv | sort -u) <(printf "#72d9d3\n#e7abd3\n#bfd89b\n#9fbeed\n#e6bb8f\n#b2d6c8\n#dac4c8") > MICrhoDE_SC_colors.txt | |
cat <(printf "DATASET_COLORSTRIP\nSEPARATOR TAB\nDATASET_LABEL\tSuperCluster\nCOLOR\t#ff0000\nCOLOR_BRANCHES\t0\nDATA\n\n") <(join -t $'\t' -1 2 -2 1 <(sort -t $'\t' -k2,2 MICrhoDE_prot_aligned.SC.0.tsv) <(sort -k1,1 MICrhoDE_SC_colors.txt) | awk -vFS="\t" '{print $2"\t"$5"\t"$1}') > MICrhoDE_itol_strip.txt | |
# Create data file for iTOL using the grappa assignments | |
paste <(cut -f2 MICrhoDE_prot_aligned.SC.0.tsv | sort -u) <(printf "#72d9d3\n#e7abd3\n#bfd89b\n#9fbeed\n#e6bb8f\n#b2d6c8\n#dac4c8") > MICrhoDE_qurey-SC_colors.txt | |
cat MICrhoDE_SC_colors.txt <(printf "NO_ASSIGN\t#000000") >> MICrhoDE_query-SC_colors.txt | |
cat MICrhoDE_itol_strip.txt <(join -t $'\t' -1 2 -2 1 <(sort -t $'\t' -k2,2 <(cat pplacer_per_pquery_assign.tsv | tr ';' $'\t' | cut -f1,2)) <(sort -k1,1 MICrhoDE_query-SC_colors.txt) | awk -vFS="\t" '{print $2"\t"$3"\t"$1}') > MICrhoDE_itol_strip_query-SC.pplacer.txt | |
cat MICrhoDE_itol_strip.txt <(join -t $'\t' -1 2 -2 1 <(sort -t $'\t' -k2,2 <(cat epa-ng_per_pquery_assign.tsv | tr ';' $'\t' | cut -f1,2)) <(sort -k1,1 MICrhoDE_query-SC_colors.txt) | awk -vFS="\t" '{print $2"\t"$3"\t"$1}') > MICrhoDE_itol_strip_query-SC.epa-ng.txt | |
} | |
if [ $# -eq 0 ]; then | |
printf "$(get_time) ${RED}ERROR:${NC} No arguments supplied. Provide a ${CYAN}FASTA${NC} file with sequences to be placed\n" | |
exit 1 | |
fi | |
QUERY=${1} | |
COMPFILE=${2} | |
if [ ${QUERY: -6} != ".fasta" ]; then | |
printf "$(get_time) ${RED}ERROR:${NC} Provide a ${CYAN}FASTA${NC} file with sequences to be placed with ${CYAN}.fasta${NC} extension\n" | |
exit 1 | |
fi | |
NAM=$(basename "${QUERY}" .fasta) | |
# Remove everything after STOP (*) codon on MICrhoDE_prot_aligned.fasta | |
printf "$(get_time) Getting data from ${YELLOW}MICrhoDE${NC}..." | |
wget -N -q http://application.sb-roscoff.fr/micrhode/MICrhoDE_prot_aligned.fasta | |
wget -N -q http://application.sb-roscoff.fr/micrhode/MicRhoDE_051214.txt | |
wget -N -q http://application.sb-roscoff.fr/micrhode/tree_MicRhoDE_complete.nwk | |
printf " ${GREEN}done${NC}\n" | |
printf "$(get_time) Fixing ${YELLOW}MICrhoDE${NC} alignments..." | |
sed -i -e 's/*p/--/' MICrhoDE_prot_aligned.fasta | |
# Convert lowercase sequences to uppercase | |
"${SEQKITBIN}" replace -p .+ -r "ref-{nr}" MICrhoDE_prot_aligned.fasta | seqkit seq -u > MICrhoDE_prot_aligned.fixed.fasta | |
paste <(grep '>' MICrhoDE_prot_aligned.fasta | tr -d '>') <(grep '>' MICrhoDE_prot_aligned.fixed.fasta | tr -d '>') > MICrhoDE_prot_aligned.name_mapping.tsv | |
printf " ${GREEN}done${NC}\n" | |
printf "$(get_time) Fixing ${YELLOW}MICrhoDE${NC} tree..." | |
# Rename tree (http://www.cibiv.at/software/taxnameconvert/) | |
"${TAXACONBIN}" -f 1 -t 2 MICrhoDE_prot_aligned.name_mapping.tsv tree_MicRhoDE_complete.nwk tree_MicRhoDE_complete.renamed.nwk > /dev/null 2>&1 | |
printf " ${GREEN}done${NC}\n" | |
printf "$(get_time) Dereplicating query sequences...\n" | |
PRES=$(check_files_missing "${dedup_files[@]}") | |
if [ "${PRES}" -eq 0 ]; then | |
printf "$(get_time) Dereplicating query sequences... ${RED}skipped${NC}: File exists.\n" | |
else | |
rm -f "${dedup_files[@]}" | |
"${CDHITBIN}" -i "${QUERY}" -c 1 -d 0 -o "${NAM}".dedup | |
printf "$(get_time) Dereplicating query sequences... ${GREEN}done${NC}\n" | |
fi | |
printf "$(get_time) Fixing query sequences..." | |
# Remove sequences with an X from our PRs | |
# Convert lowercase sequences to uppercase | |
"${SEQKITBIN}" replace -p .+ -r "seq-{nr}" "${NAM}".dedup | seqkit seq -u > "${NAM}".fixed.fasta | |
paste <(grep '>' "${NAM}".dedup | tr -d '>') <(grep '>' "${NAM}".fixed.fasta | tr -d '>') > "${NAM}".name_mapping.tsv | |
"${SEQKITBIN}" grep -v -s -p X -r "${NAM}".fixed.fasta > "${NAM}".noX.fasta | |
printf " ${GREEN}done${NC}\n" | |
printf "$(get_time) Filtering sequences smaller than 100aa..." | |
# Keep sequences longer than 100aa | |
"${SEQKITBIN}" seq --quiet -m 100 "${NAM}".noX.fasta > "${NAM}".noX.100.fasta 2> /dev/null | |
printf " ${GREEN}done${NC}\n" | |
printf "$(get_time) Identifying best substitution model...\n" | |
PRES=$(check_files_missing "${model_files[@]}") | |
if [ "${PRES}" -eq 0 ]; then | |
printf "$(get_time) Identifying best substitution model... ${RED}skipped${NC}: File exists.\n" | |
BESTM=$(grep Model: MT.out | awk '{print $2}' | sort | uniq -c | sort -k1,1nr | awk NR==1'{print $2}') | |
BESTMR=$(grep raxmlHPC MT.out | awk '{print $6}' | uniq -c | sort -k1,1nr | awk NR==1'{print $2}') | |
printf "$(get_time) Best model: ${GREEN}${BESTM}${NC}\n" | |
else | |
rm -f "${model_files[@]}" | |
# Use modeltest to find the best model | |
"${MODELTBIN}" -i MICrhoDE_prot_aligned.fixed.fasta -o MT -p "${THREADS}" -d aa -T raxml --force -t user -u tree_MicRhoDE_complete.renamed.nwk | |
BESTM=$(grep Model: MT.out | awk '{print $2}' | sort | uniq -c | sort -k1,1nr | awk NR==1'{print $2}') | |
BESTMR=$(grep raxmlHPC MT.out | awk '{print $6}' | uniq -c | sort -k1,1nr | awk NR==1'{print $2}') | |
printf "$(get_time) Identifying best substitution model... ${GREEN}done${NC}\n" | |
printf "$(get_time) Best model: ${GREEN}${BESTM}${NC}\n" | |
fi | |
printf "$(get_time) Optimizing initial tree parameters and branch lengths...\n" | |
PRES=$(check_files_missing "${raxml_files[@]}") | |
if [ "${PRES}" -eq 0 ]; then | |
printf "$(get_time) Optimizing initial tree parameters and branch lengths... ${RED}skipped${NC}: File exists.\n" | |
else | |
rm -f "${raxml_files[@]}" | |
# Optimize tree parameters and branch lengths | |
[ -f RAxML_log.OPT ] && rm -rf *.OPT | |
"${RAXMLBIN}" -s MICrhoDE_prot_aligned.fixed.fasta -T "${THREADS}" -f e -n OPT -t tree_MicRhoDE_complete.renamed.nwk -m "${BESTMR}" | |
printf "$(get_time) Optimizing initial tree parameters and branch lengths... ${GREEN}done${NC}\n" | |
fi | |
# Add our PRs seqs to the REF alignment with PaPaRA (https://github.com/sim82/papara_nt) | |
# First we need to patch the PaPaRA pvec.h. Get patch from: https://gist.github.com/genomewalker/a3f03846cf7e5ad63be63a9c8c153301 | |
# Patch the file | |
# git clone --recursive https://github.com/sim82/papara_nt | |
# cd papara_nt || exit "Cannot create folder" | |
# wget https://gist.githubusercontent.com/genomewalker/a3f03846cf7e5ad63be63a9c8c153301/raw/4cee4913ea576806f504d47f6e5702e228f9307c/pvec.h.patch | |
# patch < pvec.h.patch | |
printf "$(get_time) ${PURPLE}[Iter 1]:${NC} Aligning query sequences to reference alignment with PaPaRA...\n" | |
PRES=$(check_files_missing "${papara_files[@]}") | |
if [ "${PRES}" -eq 0 ]; then | |
printf "$(get_time) Aligning query sequences to reference alignment with PaPaRA... ${RED}skipped${NC}: File exists.\n" | |
else | |
rm -f "${papara_files[@]}" | |
# Convert FASTA ref alignment to relaxed phylip | |
"${F2PBIN}" -i MICrhoDE_prot_aligned.fixed.fasta -o MICrhoDE_prot_aligned.fixed.phy -r > /dev/null 2>&1 | |
# Align query sequences to reference alignment | |
[ -f papara_log.ALN0 ] && rm -rf *ALN0 | |
"${PAPABIN}" -t RAxML_result.OPT -s MICrhoDE_prot_aligned.fixed.phy -q "${NAM}".noX.100.fasta -a -j "${THREADS}" -n ALN0 -r | |
# We will use epa-ng to place the query sequences into the reference tree | |
# First we split the alignment in reference and query | |
"${EPABIN}" --split MICrhoDE_prot_aligned.fixed.phy papara_alignment.ALN0 | |
printf "$(get_time) ${PURPLE}[Iter 1]:${NC} Aligning query sequences to reference alignment with PaPaRA... ${GREEN} done${NC}\n" | |
printf "$(get_time) Identifying outliers (OD-Seq) in query alignment..." | |
[ -f query.fasta ] && mv query.fasta query.0.fasta | |
"${ODSEQBIN}" -t 24 -i query.0.fasta -c query.0.core.fasta -o query.0.outlier.fasta | |
printf "$(get_time) Identifying outliers (OD-Seq) in query alignment... ${GREEN}done${NC}\n" | |
printf "$(get_time) ${PURPLE}[Iter 2]:${NC} Aligning core query sequences to reference alignment with PaPaRA...\n" | |
# Align query sequences to reference alignment | |
[ -f papara_alignment.ALN ] && rm -rf *ALN | |
"${PAPABIN}" -t RAxML_result.OPT -s MICrhoDE_prot_aligned.fixed.phy -q query.0.core.fasta -a -j "${THREADS}" -n ALN -r | |
# We will use epa-ng to place the query sequences into the reference tree | |
# First we split the alignment in reference and query | |
[ -f query.fasta ] && rm -rf query.fasta | |
"${EPABIN}" --split MICrhoDE_prot_aligned.fixed.phy papara_alignment.ALN | |
printf "$(get_time) ${PURPLE}[Iter 2]:${NC} Aligning core query sequences to reference alignment with PaPaRA... ${GREEN}done${NC}\n" | |
fi | |
printf "$(get_time) Placing query sequences to reference tree using ${CYAN}epa-ng${NC}...\n" | |
PRES=$(check_files_missing "${epang_files[@]}") | |
if [ "${PRES}" -eq 0 ]; then | |
printf "$(get_time) Placing query sequences to reference tree using ${CYAN}epa-ng${NC}... ${RED}skipped${NC}: File exists.\n" | |
else | |
# And place the sequences | |
rm -f "${epang_files[@]}" | |
mkdir -p EPA_PR | |
"${EPABIN}" --tree RAxML_result.OPT --ref-msa MICrhoDE_prot_aligned.fixed.fasta --query query.fasta --outdir EPA_PR --model "${BESTM}" -T "${THREADS}" | |
printf "$(get_time) Placing query sequences to reference tree using ${CYAN}epa-ng${NC}... ${GREEN}done${NC}\n" | |
fi | |
printf "$(get_time) Placing query sequences to reference tree using ${CYAN}pplacer${NC}...\n" | |
PRES=$(check_files_missing "${pplacer_files[@]}") | |
if [ "${PRES}" -eq 0 ]; then | |
printf "$(get_time) Placing query sequences to reference tree using ${CYAN}epa-ng${NC}... ${RED}skipped${NC}: File exists.\n" | |
else | |
rm -f "${pplacer_files[@]}" | |
# Place sequences with pplacer | |
"${TAXITBIN}" create -l PR -P PR.refpkg --aln-fasta MICrhoDE_prot_aligned.fixed.fasta --tree-stats RAxML_info.OPT --tree-file RAxML_result.OPT | |
"${PPLACERBIN}" -o query.PR.jplace -p --keep-at-most 20 -c PR.refpkg query.fasta -j "${THREADS}" | |
printf "$(get_time) Placing query sequences to reference tree using ${CYAN}pplacer${NC}... ${GREEN}done${NC}\n" | |
fi | |
# 2. Assign taxonomy | |
# PRES=$(check_files_missing "${gappa_files_e[@]}") | |
# if [ "${PRES}" -eq 0 ]; then | |
# printf "$(get_time) Assigning SuperCluster affiliation with ${CYAN}gappa${NC} using the ${CYAN}epa-ng${NC} results... ${RED}skipped${NC}: File exists.\n" | |
# else | |
# rm -f "${gappa_files_e[@]}" | |
# gappa_assign EPA_PR/epa_result.jplace epa-ng | |
# printf "$(get_time) Assigning SuperCluster affiliation with ${CYAN}gappa${NC} using the ${CYAN}epa-ng${NC} results... ${GREEN}done${NC}\n" | |
# fi | |
# | |
# printf "$(get_time) Assigning SuperCluster affiliation with ${CYAN}gappa${NC} using the ${CYAN}pplacer${NC} results...\n" | |
# PRES=$(check_files_missing "${gappa_files_p[@]}") | |
# if [ "${PRES}" -eq 0 ]; then | |
# printf "$(get_time) Assigning SuperCluster affiliation with ${CYAN}gappa${NC} using the ${CYAN}pplacer${NC} results... ${RED}skipped${NC}: File exists.\n" | |
# else | |
# rm -f "${gappa_files_p[@]}" | |
# gappa_assign query.PR.jplace pplacer | |
# printf "$(get_time) Assigning SuperCluster affiliation with ${CYAN}gappa${NC} using the ${CYAN}pplacer${NC} results... ${GREEN}done${NC}\n" | |
# fi | |
printf "$(get_time) Grafting ${CYAN}epa-ng${NC} tree with ${CYAN}guppy${NC}...\n" | |
PRES=$(check_files_missing "${graft_files_e[@]}") | |
if [ "${PRES}" -eq 0 ]; then | |
printf "$(get_time) Grafting ${CYAN}epa-ng${NC} tree with ${CYAN}guppy${NC}... ${RED}skipped${NC}: File exists.\n" | |
else | |
rm -f "${graft_files_e[@]}" | |
graft_tree EPA_PR/epa_result.jplace epa-ng | |
printf "$(get_time) Grafting ${CYAN}epa-ng${NC} tree with ${CYAN}guppy${NC}... ${GREEN}done${NC}\n" | |
fi | |
printf "$(get_time) Grafting ${CYAN}pplacer${NC} tree with ${CYAN}guppy${NC}...\n" | |
PRES=$(check_files_missing "${graft_files_p[@]}") | |
if [ "${PRES}" -eq 0 ]; then | |
printf "$(get_time) Grafting ${CYAN}pplacer${NC} tree with ${CYAN}guppy${NC}... ${RED}skipped${NC}: File exists.\n" | |
else | |
rm -f "${graft_files_p[@]}" | |
graft_tree query.PR.jplace pplacer | |
printf "$(get_time) Grafting ${CYAN}pplacer${NC} tree with ${CYAN}guppy${NC}... ${GREEN}done${NC}\n" | |
fi | |
join -t $'\t' -1 1 -2 1 <(tail -n+2 MicRhoDE_051214.txt | cut -f3,8,9,10 | sort -k1,1) <(sort -k1,1 MICrhoDE_prot_aligned.name_mapping.tsv) | awk -vFS="\t" '{print $5"\t"$2"\t"$3"\t"$4}' > MICrhoDE_prot_aligned.SC.0.tsv | |
paste <(cut -f1 MICrhoDE_prot_aligned.SC.0.tsv) <(cut -f2- MICrhoDE_prot_aligned.SC.0.tsv | tr $'\t' ';') > MICrhoDE_prot_aligned.SC.tsv | |
printf "$(get_time) Assigning SuperCluster affiliation with using the ${CYAN}epa-ng${NC} results...\n" | |
PRES=$(check_files_missing "${annot_files_e[@]}") | |
if [ "${PRES}" -eq 0 ]; then | |
printf "$(get_time) Assigning SuperCluster affiliation with using the ${CYAN}epa-ng${NC} results... ${RED}skipped${NC}: File exists.\n" | |
else | |
rm -f "${annot_files_e[@]}" | |
orf_assign grafted_tree.epa-ng.tre MICrhoDE_prot_aligned.SC.tsv epa-ng | |
printf "$(get_time) Assigning SuperCluster affiliation with using the ${CYAN}epa-ng${NC} results... ${GREEN}done${NC}\n" | |
fi | |
printf "$(get_time) Assigning SuperCluster affiliation with using the ${CYAN}pplacer${NC} results...\n" | |
PRES=$(check_files_missing "${annot_files_p[@]}") | |
if [ "${PRES}" -eq 0 ]; then | |
printf "$(get_time) Assigning SuperCluster affiliation with using the ${CYAN}pplacer${NC} results... ${RED}skipped${NC}: File exists.\n" | |
else | |
rm -f "${annot_files_p[@]}" | |
orf_assign grafted_tree.pplacer.tre MICrhoDE_prot_aligned.SC.tsv pplacer | |
printf "$(get_time) Assigning SuperCluster affiliation with using the ${CYAN}pplacer${NC} results... ${GREEN}done${NC}\n" | |
fi | |
# Create data file for iTOL/alluvial using the grafted tree assignments | |
printf "$(get_time) Getting basic data for ${CYAN}iTOL${NC} and ${CYAN}alluvial${NC} plots...\n" | |
PRES=$(check_files_missing "${basic_files[@]}") | |
if [ "${PRES}" -eq 0 ]; then | |
printf "$(get_time) Getting basic data for ${CYAN}iTOL${NC} and ${CYAN}alluvial${NC} plots... ${RED}skipped${NC}: File exists.\n" | |
else | |
rm -f "${basic_files[@]}" | |
create_basic_data "${basic_files_download[@]}" | |
printf "$(get_time) Getting basic data for ${CYAN}iTOL${NC} and ${CYAN}alluvial${NC} plots... ${GREEN}done${NC}\n" | |
fi | |
# Generate files for iTOL | |
# https://itol.embl.de/help/dataset_color_strip_template.txt | |
# 1. Assign colors to clusters and create strip file | |
printf "$(get_time) Creating data for ${CYAN}iTOL${NC}...\n" | |
PRES=$(check_files_missing "${itol_files[@]}") | |
if [ "${PRES}" -eq 0 ]; then | |
printf "$(get_time) Creating data for ${CYAN}iTOL${NC}... ${RED}skipped${NC}: File exists.\n" | |
else | |
rm -f "${itol_files[@]}" | |
create_itol_data | |
printf "$(get_time) Creating data for ${CYAN}iTOL${NC}... ${GREEN}done${NC}\n" | |
fi | |
printf "$(get_time) Creating data for ${CYAN}alluvial${NC} plots...\n" | |
PRES=$(check_files_missing "${alluvial_files[@]}") | |
if [ "${PRES}" -eq 0 ]; then | |
printf "$(get_time) Creating data for ${CYAN}alluvial${NC} plots... ${RED}skipped${NC}: File exists.\n" | |
else | |
rm -f "${itol_files[@]}" | |
create_alluvial pplacer | |
create_alluvial epa-ng | |
printf "$(get_time) Creating data for ${CYAN}alluvial${NC} plots... ${GREEN}done${NC}\n" | |
fi | |
printf "\n\n$(get_time) ${RED}ANALYSIS FINISHED${NC}\n" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment