- To create UniRef90 protein databases for NCBI blast and Diamond Blast
- To create a tab delimited taxid mapping file with two columns : sequenceID\tNCBITaxonID
Download the uniref90 xml file first (warning - this is ~15 GB, will take a while)
| #' call primer3 for a given set of DNAstringSet object | |
| #' Forked from https://gist.github.com/al2na/8540391 | |
| #' TODO: Add support for target amplicon region (Maybe as [] in the fasta input) | |
| #' @param seq: DNA template as a character string (required) | |
| #' @param fw_primer: optional forward (left) primer, if provided Primer3 will assess it and will try to find a suitable reverse primer. Default: NULL | |
| #' @param rv_primer: optional reverse (right) primer (must be reverse-complemented to the template), if provided Primer3 will assess it and will try to find a suitable forward primer. Default: NULL | |
| #' @param size_range: a string with space separated list of desired amplicon size ranges. Default: '151-500' | |
| #' @param Tm: range of melting temprature parameters as a numerical vector containing (min,optimal,max). default: c(55,57,58) | |
| #' @param name: name of the amplicon in 'chr_start_end' format | |
| #' @param sequence_target: a string containing space separated list of target pairs: 'starting_position,target_length starting_p |
| #!/bin/bash | |
| # installs plenv, perl, carton, cpanminus, sets up environment in .bash_profile or .profile | |
| # from https://github.com/tokuhirom/plenv#readme | |
| MY_PROFILE_FILE="$HOME/.profile" | |
| if [[ -n "$PERL_MB_OPT" ]]; then | |
| echo "You must unset your local::lib environment variables first" | |
| echo "Edit your ~/.bash_profile or ~/.bashrc and remove any references" | |
| echo "to local::lib or export PERL*..." |
Download the uniref90 xml file first (warning - this is ~15 GB, will take a while)
| # Run this script in a directory containing zip files from fastqc. It aggregates images of each type in individual folders | |
| # So looking across data is quick. | |
| zips=`ls *.zip` | |
| for i in $zips; do | |
| unzip -o $i &>/dev/null; | |
| done | |
| fastq_folders=${zips/.zip/} |
| stat_smooth_func <- function(mapping = NULL, data = NULL, | |
| geom = "smooth", position = "identity", | |
| ..., | |
| method = "auto", | |
| formula = y ~ x, | |
| # show_formula = TRUE, | |
| se = TRUE, | |
| n = 80, | |
| span = 0.75, | |
| fullrange = FALSE, |
Just simple methods to keep the code clean.
Inspired by progrium/bashstyle and Kfir Lavi post.
| #!/usr/bin/env Rscript | |
| #PBS -V | |
| # This script will call copy number variants using cn.mops | |
| # usage: Rscript cn.mops.R --help | |
| library(optparse) | |
| option_list = list( | |
| make_option(c("-i", "--input_dir"), type="character", default=NULL, |
The set lines
set -euxo pipefail is short for:set -e
set -u
| #!/bin/bash | |
| if [ $# -ne 1 ]; then | |
| echo "Usage: $0 <input_gtf_file>" | |
| exit 1 | |
| fi | |
| input_gtf="$1" | |
| output_prefix="output" | |
| sorted_gtf="${output_prefix}.sorted.gtf" |
| #! /bin/bash | |
| ## See also https://github.com/nextflow-io/nextflow/discussions/4308 | |
| ## cd to a parent directory for a Nextflow pipeline executation, i.e. contains .nextflow and work directories | |
| WORKDIR=$1 | |
| ## Find work directories essential to the last pipeline run, as absolute paths | |
| nextflow log last > $WORKDIR/preserve_dirs.txt | |
| ## Find all work directories, as absolute paths |