For the C program:
# Igraph C-library
wget https://igraph.org/nightly/get/c/igraph-0.7.1.tar.gz
tar xvfz igraph-0.7.1.tar.gz
cd igraph-0.7.1
./configure --prefix="${PREFIX}"/igraph/0.7.1
make -j 8
make check
For the C program:
# Igraph C-library
wget https://igraph.org/nightly/get/c/igraph-0.7.1.tar.gz
tar xvfz igraph-0.7.1.tar.gz
cd igraph-0.7.1
./configure --prefix="${PREFIX}"/igraph/0.7.1
make -j 8
make check
easy-taxonomy e77760e07c3167ef94f937365175d68b.assm.combined.fasta DB/uniref50-tax/UniRef50 e77760e07c3167ef94f937365175d68b.UniRef50 /dev/shm/tmp/contig-taxonomy/e77760e07c3167ef94f937365175d68b.tax.UniRef50 --tax-lineage 2 --majority 0.5 --vote-mode 1 --lca-mode 3 --orf-filter 1 --lca-ranks superkingdom,phylum,class,order,family,genus --threads 32 | |
MMseqs Version: 5daca424b162cc5fdf0b9cd151aebed86975cbf6 | |
ORF filter 1 | |
ORF filter e-value 100 | |
ORF filter sensitivity 2 | |
LCA mode 3 | |
Majority threshold 0.5 | |
Vote mode 1 | |
LCA ranks superkingdom,phylum,class,order,family,genus |
name: anvio-7.1 | |
channels: | |
- conda-forge | |
- bioconda | |
- defaults | |
dependencies: | |
- python=3.6 | |
- anvio=7 | |
- diamond=0.9.14 | |
- pip |
from tqdm import tqdm | |
from collections import defaultdict | |
import argparse | |
from concurrent.futures import ThreadPoolExecutor, as_completed | |
from multiprocessing import Pool | |
import gzip | |
from itertools import zip_longest | |
from mimetypes import guess_type | |
from functools import partial | |
from cdifflib import CSequenceMatcher |
library(tidyverse) | |
# Read in the data | |
setwd("/maps/projects/lundbeck/scratch/taxDB/v6/metadata/src_files") | |
ncbi_assm_stats <- list.files(".", pattern = "genome_metadata.txt", full.names = TRUE) | |
ncbi_assm_stats <- map_dfr(ncbi_assm_stats, function(X) { | |
read_tsv(X, col_names = TRUE) | |
}) %>% | |
select(-filename) %>% |