- To create UniRef90 protein databases for NCBI blast and Diamond Blast
- To create a tab delimited taxid mapping file with two columns : sequenceID\tNCBITaxonID
Download the uniref90 xml file first (warning - this is ~15 GB, will take a while)
#' call primer3 for a given set of DNAstringSet object | |
#' Forked from https://gist.github.com/al2na/8540391 | |
#' TODO: Add support for target amplicon region (Maybe as [] in the fasta input) | |
#' @param seq: DNA template as a character string (required) | |
#' @param fw_primer: optional forward (left) primer, if provided Primer3 will assess it and will try to find a suitable reverse primer. Default: NULL | |
#' @param rv_primer: optional reverse (right) primer (must be reverse-complemented to the template), if provided Primer3 will assess it and will try to find a suitable forward primer. Default: NULL | |
#' @param size_range: a string with space separated list of desired amplicon size ranges. Default: '151-500' | |
#' @param Tm: range of melting temprature parameters as a numerical vector containing (min,optimal,max). default: c(55,57,58) | |
#' @param name: name of the amplicon in 'chr_start_end' format | |
#' @param sequence_target: a string containing space separated list of target pairs: 'starting_position,target_length starting_p |
#!/bin/bash | |
# installs plenv, perl, carton, cpanminus, sets up environment in .bash_profile or .profile | |
# from https://github.com/tokuhirom/plenv#readme | |
MY_PROFILE_FILE="$HOME/.profile" | |
if [[ -n "$PERL_MB_OPT" ]]; then | |
echo "You must unset your local::lib environment variables first" | |
echo "Edit your ~/.bash_profile or ~/.bashrc and remove any references" | |
echo "to local::lib or export PERL*..." |
Download the uniref90 xml file first (warning - this is ~15 GB, will take a while)
# Run this script in a directory containing zip files from fastqc. It aggregates images of each type in individual folders | |
# So looking across data is quick. | |
zips=`ls *.zip` | |
for i in $zips; do | |
unzip -o $i &>/dev/null; | |
done | |
fastq_folders=${zips/.zip/} |
stat_smooth_func <- function(mapping = NULL, data = NULL, | |
geom = "smooth", position = "identity", | |
..., | |
method = "auto", | |
formula = y ~ x, | |
# show_formula = TRUE, | |
se = TRUE, | |
n = 80, | |
span = 0.75, | |
fullrange = FALSE, |
Just simple methods to keep the code clean.
Inspired by progrium/bashstyle and Kfir Lavi post.
#!/usr/bin/env Rscript | |
#PBS -V | |
# This script will call copy number variants using cn.mops | |
# usage: Rscript cn.mops.R --help | |
library(optparse) | |
option_list = list( | |
make_option(c("-i", "--input_dir"), type="character", default=NULL, |
The set
lines
set -euxo pipefail
is short for:set -e
set -u
#!/bin/bash | |
if [ $# -ne 1 ]; then | |
echo "Usage: $0 <input_gtf_file>" | |
exit 1 | |
fi | |
input_gtf="$1" | |
output_prefix="output" | |
sorted_gtf="${output_prefix}.sorted.gtf" |
#! /bin/bash | |
## See also https://github.com/nextflow-io/nextflow/discussions/4308 | |
## cd to a parent directory for a Nextflow pipeline executation, i.e. contains .nextflow and work directories | |
WORKDIR=$1 | |
## Find work directories essential to the last pipeline run, as absolute paths | |
nextflow log last > $WORKDIR/preserve_dirs.txt | |
## Find all work directories, as absolute paths |