Skip to content

Instantly share code, notes, and snippets.

View genomewalker's full-sized avatar

Antonio Fernandez-Guerra genomewalker

View GitHub Profile
+ set -e
+ MMSEQS=/vol/attached/opt/MPI/MMseqs2/bin/mmseqs
+ DIR=/vol/attached/gtdb
+ SDIR=/vol/scratch/gtdb
+ export 'OMPI_MCA_btl=^openib'
+ OMPI_MCA_btl='^openib'
+ export OMP_NUM_THREADS=28
+ OMP_NUM_THREADS=28
+ RUNNER='mpirun --mca btl_tcp_if_include ens3 -n 10 --map-by ppr:1:node --bind-to none '
+ /vol/attached/opt/MPI/MMseqs2/bin/mmseqs clusterupdate /vol/scratch/gtdb/marine_hmp_db_03112017 /vol/scratch/gtdb/mg_gtdb_orfs_db /vol/scratch/gtdb/marine_hmp_db_03112017_clu /vol/attached/gtdb/mg_gtdb_update_20190430/mg_gtdb_db_052019 /vol/attached/gtdb/mg_gtdb_update_20190430/mg_gtdb_db_052019_clu /vol/attached/gtdb/mg_gtdb_update_20190430/tmp --min-seq-id 0.3 -s 5 --cov-mode 0 -c 0.8 --split 10
node_list = Sys.getenv("SLURM_NODELIST")
cat("SLURM nodes:", node_list, "\n")
# Loop up IPs of the allocated nodes.
if (node_list != "") {
nodes = strsplit(node_list, ",")[[1]]
ips = rep(NA, length(nodes))
for (i in 1:length(nodes)) {
args = c(nodes[i], " | awk '/has address/ { print $4 ; exit }'")
result = system2("host", args = args, stdout = T)
# packages in environment at /Users/ufo/.pyenv/versions/miniconda3-latest/envs/anvio-6:
#
# Name Version Build Channel
anvio 6 0 bioconda
anvio-minimal 6 py_0 bioconda
appdirs 1.4.3 py_1 conda-forge
asn1crypto 1.2.0 py36_0 conda-forge
attrs 19.3.0 py_0 conda-forge
bcftools 1.9 h16e57c4_7 bioconda
biopython 1.74 py36h01d97ff_0 conda-forge
../opt/bbmap/repair.sh in=../DBs/fastq_clean/samples/tm3_1.2.gz out=r1.fq out2=r2.fq outs=sr.fq overwrite
Set INTERLEAVED to false
Started output stream.

Input:                          60113046 reads          4484553415 bases.
Result:                         60113046 reads (100.00%)        4484553415 bases (100.00%)
Pairs:                          1062720 reads (1.77%)   145407477 bases (3.24%)
library(tidyverse)
# Functions for pretty histograms
nclass.all <- function(x, fun = median)
{
fun(c(
nclass.Sturges(x),
nclass.scott(x),
nclass.FD(x)
))

Let's install linuxbrew and pyenv

sh -c "$(curl -fsSL https://raw.githubusercontent.com/Linuxbrew/install/master/install.sh)"
brew install pyenv pyenv-virtualenv pyenv-which-ext
pyenv install miniconda3-latest
pyenv global miniconda3-latest

then we need to add into .bash_profile

@genomewalker
genomewalker / tree.r
Last active April 2, 2020 20:46
Tree for Yucheng
library(tidyverse)
library(ggtree)
library(ape)
library(readxl)
library(viridis)
# Get tree data
tree_data <- read_xlsx(path = "clade_table_with_region.xlsx") %>%
rename(label = ID) %>%
mutate(group = case_when(age >= 50000 ~ "50+",
@genomewalker
genomewalker / norm.R
Last active April 8, 2020 06:35
arctic_plants
library(tidyverse)
library(DESeq2)
library(phyloseq)
library(taxonomizr)
# Read metadata
metadata <- read_csv("data/metadata_v10.csv", )
names(metadata) <- c("label", "age_ka_bp", "age_type", "site_abrev", "region", "dating_lab_id",
"C14_age", "OSL_age", "age_errors", "group1", "lat", "lon")
# From
cut -f3 permafrost_pathway_orthologs.txt | grep -v ortho | while read ko; do make ${ko}.kegg.faa; done
for i in *kegg.faa; do NAM=$(basename $i .kegg.faa); hmmalign ../profiles/$NAM.hmm $NAM.kegg.faa | awk -vN=$NAM '{if (NR == 2){print "#=GF ID"N"\n#=GF AC "N}else{print $0}}' ; done > alns.stck
mmseqs convertmsa faa/alns.stck ko_msa_db
mmseqs msa2profile ko_msa_db permafrost_ko_profiles --match-mode 1
# KO
seqkit replace --ignore-case --kv-file <(awk '{print $2"\t"$1}' ko_genes.list) --pattern "(.+)" --replacement "{kv}" <(seqkit replace -p "\s.+" test) > ko_seq.faa
awk '/^>/{split($1,a,":")}{print >> a[2]".fa"}' ../ko_seq.faa
~/opt/uproc-1.2.0/uproc-makedb model ko_seq.faa koDB
1.2G ./agnostosDB_dbf02445-20200519_environmental.tar.gz
104G ./agnostosDB_dbf02445-20200519_hh-suite-db.tar.gz
21G ./agnostosDB_dbf02445-20200519_mmseqs-cluseqdb.tar.gz
11G ./agnostosDB_dbf02445-20200519_mmseqs-profiles.tar.gz
3.9G ./agnostosDB_dbf02445-20200519_original-data.tar.gz
279M ./agnostosDB_dbf02445-20200519_phylogenetic.tar.gz
111M ./cluDB_name_origin_size.tsv.gz
312M ./cluster_category_summary_stats.tsv.gz
30M ./cluster_communities.tsv.gz
141M ./cluster_db_size_categ_origin.tsv.gz