This is my recommended pipeline for assembly and annotation of small eukaryotic genomes (50 - 500 Mb).
All small scripts are available at CGP-scripts. For the programs a link is provided.
Please cite if you found the pipeline useful!
killall Xcode | |
xcrun -k | |
xcodebuild -alltargets clean | |
rm -rf "$(getconf DARWIN_USER_CACHE_DIR)/org.llvm.clang/ModuleCache" | |
rm -rf "$(getconf DARWIN_USER_CACHE_DIR)/org.llvm.clang.$(whoami)/ModuleCache" | |
rm -rf /Applications/Xcode.app | |
rm -rf ~/Library/Caches/com.apple.dt.Xcode | |
rm -rf ~/Library/Developer | |
rm -rf ~/Library/MobileDevice | |
rm -rf ~/Library/Preferences/com.apple.dt.Xcode.plist |
metaphlanToPhyloseq <- function( | |
metaphlandir, | |
metadat=NULL, | |
simplify=TRUE){ | |
## tax is a matrix or data.frame with the table of taxonomic abundances, rows are taxa, columns are samples | |
## metadat is an optional data.frame of specimen metadata, rows are samples, columns are variables | |
## if simplify=TRUE, use only the most detailed level of taxa names in the final object | |
## metaphlanToPhyloseq("~/Downloads/metaphlan_bugs_list") | |
.getMetaphlanTree <- function(removeGCF=TRUE, simplify=TRUE){ | |
if (!requireNamespace("ape")) { |
# Adapted from https://stackoverflow.com/a/7267364/1036500 by Andrie de Vries | |
# This is it: theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) | |
library(ggplot2) | |
td <- expand.grid( | |
hjust=c(0, 0.5, 1), | |
vjust=c(0, 0.5, 1), | |
angle=c(0, 45, 90), |
cue_file = 'file.cue' | |
d = open(cue_file).read().splitlines() | |
general = {} | |
tracks = [] | |
current_file = None |
if (!require("pacman")) install.packages("pacman") | |
pacman::p_load(dplyr, venneuler) | |
pacman::p_load_current_gh('trinker/textshape', 'thomasp85/ggforce') | |
x <- matrix(sample(0:4, 80, TRUE, c(.6, .1, .1, .1, .1)), ncol=4) | |
colnames(x) <- LETTERS[1:4] | |
This is my recommended pipeline for assembly and annotation of small eukaryotic genomes (50 - 500 Mb).
All small scripts are available at CGP-scripts. For the programs a link is provided.
Please cite if you found the pipeline useful!
library(data.table) | |
species <- fread('species.csv') | |
## identify and remove records that were present when we imported the table | |
usdaplants <- species[!is.na(NativeStatus) ] | |
## data table method of renaming columns | |
setnames(usdaplants, | |
old = c('id', 'genus', 'species', 'scientificname', 'commonname'), | |
new = c('betydb.species.id', 'Genus', 'Species', 'ScientificName', 'CommonName')) |
x <- data.frame(d=runif(12), g=rep(1:4, each =3)) | |
my.col <- c("deepskyblue3","darkorange2","darkgray","gold") | |
spacer <- c(1, 0.1, 0.1, 1, 0.1, 0.1, 1, 0.1, 0.1, 1, 0.1, 0.1) | |
bw <- 0.8 | |
xmax <- (sum(spacer) * bw) + (nrow(x) * bw) |
tar -cf - $dir | tee >(ncftpput -v -F -u USER -p PASS -c HOST ${dir}.tar) | md5sum > ${dir}.md5 |
interleave-reads.py file.1.fq.gz file.2.fq.gz \ | |
| skewer -Q 2 -t 2 -x $HOME/Trimmomatic-0.33/adapters/TruSeq3-PE.fa - -1 \ | |
| normalize-by-median.py --max-memory-usage 2e9 -C 30 -o - - \ | |
| trim-low-abund.py -V -M 2e9 -o - --cutoff 2 - \ | |
| split-paired-reads.py --output-orphaned orph.fq -1 stream.1.fq -2 stream.2.fq - |