mkdir agnostos-comp cd agnostos-comp
mkdir one_step two_step
tar xvfz mg_gtdb_20190502.tar.gz ln -s mg_gtdb_db_20190502.dbtype mg_gtdb_db_20190502_h.dbtype
| name: anvio-7.1 | |
| channels: | |
| - conda-forge | |
| - bioconda | |
| - defaults | |
| dependencies: | |
| - python=3.6 | |
| - anvio=7 | |
| - diamond=0.9.14 | |
| - pip |
| easy-taxonomy e77760e07c3167ef94f937365175d68b.assm.combined.fasta DB/uniref50-tax/UniRef50 e77760e07c3167ef94f937365175d68b.UniRef50 /dev/shm/tmp/contig-taxonomy/e77760e07c3167ef94f937365175d68b.tax.UniRef50 --tax-lineage 2 --majority 0.5 --vote-mode 1 --lca-mode 3 --orf-filter 1 --lca-ranks superkingdom,phylum,class,order,family,genus --threads 32 | |
| MMseqs Version: 5daca424b162cc5fdf0b9cd151aebed86975cbf6 | |
| ORF filter 1 | |
| ORF filter e-value 100 | |
| ORF filter sensitivity 2 | |
| LCA mode 3 | |
| Majority threshold 0.5 | |
| Vote mode 1 | |
| LCA ranks superkingdom,phylum,class,order,family,genus |
For the C program:
# Igraph C-library
wget https://igraph.org/nightly/get/c/igraph-0.7.1.tar.gz
tar xvfz igraph-0.7.1.tar.gz
cd igraph-0.7.1
./configure --prefix="${PREFIX}"/igraph/0.7.1
make -j 8
make check| 1.2G ./agnostosDB_dbf02445-20200519_environmental.tar.gz | |
| 104G ./agnostosDB_dbf02445-20200519_hh-suite-db.tar.gz | |
| 21G ./agnostosDB_dbf02445-20200519_mmseqs-cluseqdb.tar.gz | |
| 11G ./agnostosDB_dbf02445-20200519_mmseqs-profiles.tar.gz | |
| 3.9G ./agnostosDB_dbf02445-20200519_original-data.tar.gz | |
| 279M ./agnostosDB_dbf02445-20200519_phylogenetic.tar.gz | |
| 111M ./cluDB_name_origin_size.tsv.gz | |
| 312M ./cluster_category_summary_stats.tsv.gz | |
| 30M ./cluster_communities.tsv.gz | |
| 141M ./cluster_db_size_categ_origin.tsv.gz |
| # From | |
| cut -f3 permafrost_pathway_orthologs.txt | grep -v ortho | while read ko; do make ${ko}.kegg.faa; done | |
| for i in *kegg.faa; do NAM=$(basename $i .kegg.faa); hmmalign ../profiles/$NAM.hmm $NAM.kegg.faa | awk -vN=$NAM '{if (NR == 2){print "#=GF ID"N"\n#=GF AC "N}else{print $0}}' ; done > alns.stck | |
| mmseqs convertmsa faa/alns.stck ko_msa_db | |
| mmseqs msa2profile ko_msa_db permafrost_ko_profiles --match-mode 1 | |
| # KO | |
| seqkit replace --ignore-case --kv-file <(awk '{print $2"\t"$1}' ko_genes.list) --pattern "(.+)" --replacement "{kv}" <(seqkit replace -p "\s.+" test) > ko_seq.faa | |
| awk '/^>/{split($1,a,":")}{print >> a[2]".fa"}' ../ko_seq.faa | |
| ~/opt/uproc-1.2.0/uproc-makedb model ko_seq.faa koDB |
| library(tidyverse) | |
| library(DESeq2) | |
| library(phyloseq) | |
| library(taxonomizr) | |
| # Read metadata | |
| metadata <- read_csv("data/metadata_v10.csv", ) | |
| names(metadata) <- c("label", "age_ka_bp", "age_type", "site_abrev", "region", "dating_lab_id", | |
| "C14_age", "OSL_age", "age_errors", "group1", "lat", "lon") |
| library(tidyverse) | |
| library(ggtree) | |
| library(ape) | |
| library(readxl) | |
| library(viridis) | |
| # Get tree data | |
| tree_data <- read_xlsx(path = "clade_table_with_region.xlsx") %>% | |
| rename(label = ID) %>% | |
| mutate(group = case_when(age >= 50000 ~ "50+", |