mkdir agnostos-comp cd agnostos-comp
mkdir one_step two_step
tar xvfz mg_gtdb_20190502.tar.gz ln -s mg_gtdb_db_20190502.dbtype mg_gtdb_db_20190502_h.dbtype
name: anvio-7.1 | |
channels: | |
- conda-forge | |
- bioconda | |
- defaults | |
dependencies: | |
- python=3.6 | |
- anvio=7 | |
- diamond=0.9.14 | |
- pip |
easy-taxonomy e77760e07c3167ef94f937365175d68b.assm.combined.fasta DB/uniref50-tax/UniRef50 e77760e07c3167ef94f937365175d68b.UniRef50 /dev/shm/tmp/contig-taxonomy/e77760e07c3167ef94f937365175d68b.tax.UniRef50 --tax-lineage 2 --majority 0.5 --vote-mode 1 --lca-mode 3 --orf-filter 1 --lca-ranks superkingdom,phylum,class,order,family,genus --threads 32 | |
MMseqs Version: 5daca424b162cc5fdf0b9cd151aebed86975cbf6 | |
ORF filter 1 | |
ORF filter e-value 100 | |
ORF filter sensitivity 2 | |
LCA mode 3 | |
Majority threshold 0.5 | |
Vote mode 1 | |
LCA ranks superkingdom,phylum,class,order,family,genus |
For the C program:
# Igraph C-library
wget https://igraph.org/nightly/get/c/igraph-0.7.1.tar.gz
tar xvfz igraph-0.7.1.tar.gz
cd igraph-0.7.1
./configure --prefix="${PREFIX}"/igraph/0.7.1
make -j 8
make check
1.2G ./agnostosDB_dbf02445-20200519_environmental.tar.gz | |
104G ./agnostosDB_dbf02445-20200519_hh-suite-db.tar.gz | |
21G ./agnostosDB_dbf02445-20200519_mmseqs-cluseqdb.tar.gz | |
11G ./agnostosDB_dbf02445-20200519_mmseqs-profiles.tar.gz | |
3.9G ./agnostosDB_dbf02445-20200519_original-data.tar.gz | |
279M ./agnostosDB_dbf02445-20200519_phylogenetic.tar.gz | |
111M ./cluDB_name_origin_size.tsv.gz | |
312M ./cluster_category_summary_stats.tsv.gz | |
30M ./cluster_communities.tsv.gz | |
141M ./cluster_db_size_categ_origin.tsv.gz |
# From | |
cut -f3 permafrost_pathway_orthologs.txt | grep -v ortho | while read ko; do make ${ko}.kegg.faa; done | |
for i in *kegg.faa; do NAM=$(basename $i .kegg.faa); hmmalign ../profiles/$NAM.hmm $NAM.kegg.faa | awk -vN=$NAM '{if (NR == 2){print "#=GF ID"N"\n#=GF AC "N}else{print $0}}' ; done > alns.stck | |
mmseqs convertmsa faa/alns.stck ko_msa_db | |
mmseqs msa2profile ko_msa_db permafrost_ko_profiles --match-mode 1 | |
# KO | |
seqkit replace --ignore-case --kv-file <(awk '{print $2"\t"$1}' ko_genes.list) --pattern "(.+)" --replacement "{kv}" <(seqkit replace -p "\s.+" test) > ko_seq.faa | |
awk '/^>/{split($1,a,":")}{print >> a[2]".fa"}' ../ko_seq.faa | |
~/opt/uproc-1.2.0/uproc-makedb model ko_seq.faa koDB |
library(tidyverse) | |
library(DESeq2) | |
library(phyloseq) | |
library(taxonomizr) | |
# Read metadata | |
metadata <- read_csv("data/metadata_v10.csv", ) | |
names(metadata) <- c("label", "age_ka_bp", "age_type", "site_abrev", "region", "dating_lab_id", | |
"C14_age", "OSL_age", "age_errors", "group1", "lat", "lon") |
library(tidyverse) | |
library(ggtree) | |
library(ape) | |
library(readxl) | |
library(viridis) | |
# Get tree data | |
tree_data <- read_xlsx(path = "clade_table_with_region.xlsx") %>% | |
rename(label = ID) %>% | |
mutate(group = case_when(age >= 50000 ~ "50+", |