mkdir agnostos-comp cd agnostos-comp
mkdir one_step two_step
tar xvfz mg_gtdb_20190502.tar.gz ln -s mg_gtdb_db_20190502.dbtype mg_gtdb_db_20190502_h.dbtype /vol/cloud/geogenetics/opt/bin/mmseqs convert2fasta mg_gtdb_db_20190502 mg_gtdb_db_20190502.fasta seqkit sample -j 16 -n 20000000 mg_gtdb_db_20190502.fasta -o mg_gtdb_db_20190502.20M.fasta -2 LC_ALL=C grep -c "GB_|RS_" mg_gtdb_db_20190502.20M.fasta
/vol/cloud/geogenetics/opt/bin/mmseqs createdb mg_gtdb_db_20190502.20M.fasta mg_gtdb_db_20190502.20M /vol/cloud/geogenetics/opt/bin/mmseqs cluster mg_gtdb_db_20190502.20M mg_gtdb_db_20190502.20M_cluDB tmp --threads 32 -c 0.8 --cov-mode 0 --min-seq-id 0.3 -s 5
LC_ALL=C grep "GB_|RS_" mg_gtdb_db_20190502.20M.fasta | tr -d '>' > gtdb.ids seqkit grep -f gtdb.ids mg_gtdb_db_20190502.20M.fasta > gtdb_20190502.20M.fasta seqkit grep -v -f gtdb.ids mg_gtdb_db_20190502.20M.fasta > mg_20190502.20M.fasta /vol/cloud/geogenetics/opt/bin/mmseqs createdb mg_20190502.20M.fasta step1-db /vol/cloud/geogenetics/opt/bin/mmseqs cluster step1-db step1_cluDB tmp --threads 32 -c 0.8 --cov-mode 0 --min-seq-id 0.3 -s 5