Antonio Fernandez-Guerra genomewalker

For the C program:

# Igraph C-library
wget https://igraph.org/nightly/get/c/igraph-0.7.1.tar.gz
tar xvfz igraph-0.7.1.tar.gz
cd igraph-0.7.1
./configure --prefix="${PREFIX}"/igraph/0.7.1
make -j 8
make check

Get the data from figshare

convert the DB into fasta

mkdir agnostos-comp cd agnostos-comp

mkdir one_step two_step

tar xvfz mg_gtdb_20190502.tar.gz ln -s mg_gtdb_db_20190502.dbtype mg_gtdb_db_20190502_h.dbtype

REMOVE JAMF RESTRICTIONS ON MAC

REMOVE ONLY RESTRICTIONS

sudo jamf removeMDMProfile removes all restrictions

sudo jamf manage brings back all restrictions and profiles

REMOVE ALL RESTRICTIONS AND DISABLE JAMF BINARIES WHILE KEEPING YOUR ACCESS TO VPN AND OTHER SERVICES

sudo jamf removeMDMProfile removes all restrictions

Diferences on bowtie2 alignment time between concatenated and non-concatenated genomes

Concat number of references: 52,517
No concat number of references: 8,086,857
Number of reads: 164,369,171

# CONCAT
$ bowtie2-build --seed 42 --threads 24 genomes-concat.fa genomes-concat

...

	easy-taxonomy e77760e07c3167ef94f937365175d68b.assm.combined.fasta DB/uniref50-tax/UniRef50 e77760e07c3167ef94f937365175d68b.UniRef50 /dev/shm/tmp/contig-taxonomy/e77760e07c3167ef94f937365175d68b.tax.UniRef50 --tax-lineage 2 --majority 0.5 --vote-mode 1 --lca-mode 3 --orf-filter 1 --lca-ranks superkingdom,phylum,class,order,family,genus --threads 32

	MMseqs Version: 5daca424b162cc5fdf0b9cd151aebed86975cbf6
	ORF filter 1
	ORF filter e-value 100
	ORF filter sensitivity 2
	LCA mode 3
	Majority threshold 0.5
	Vote mode 1
	LCA ranks superkingdom,phylum,class,order,family,genus

	name: anvio-7.1
	channels:
	- conda-forge
	- bioconda
	- defaults
	dependencies:
	- python=3.6
	- anvio=7
	- diamond=0.9.14
	- pip

	Sub AuthorTec_ReplaceAuthorName()

	Const MacroName = "AuthorTec™ Replace Author Name"

	'In XML formatted Word documents (docx, dotx, docm, dotm), this

	'macro changes a specified author name on comments and tracked

	'revisions. It runs on both Windows and Mac versions of Microsoft Word

	from tqdm import tqdm
	from collections import defaultdict
	import argparse
	from concurrent.futures import ThreadPoolExecutor, as_completed
	from multiprocessing import Pool
	import gzip
	from itertools import zip_longest
	from mimetypes import guess_type
	from functools import partial
	from cdifflib import CSequenceMatcher

	library(tidyverse)

	# Read in the data
	setwd("/maps/projects/lundbeck/scratch/taxDB/v6/metadata/src_files")

	ncbi_assm_stats <- list.files(".", pattern = "genome_metadata.txt", full.names = TRUE)
	ncbi_assm_stats <- map_dfr(ncbi_assm_stats, function(X) {
	read_tsv(X, col_names = TRUE)
	}) %>%
	select(-filename) %>%