#! /bin/bash | |
## See also https://github.com/nextflow-io/nextflow/discussions/4308 | |
## cd to a parent directory for a Nextflow pipeline executation, i.e. contains .nextflow and work directories | |
WORKDIR=$1 | |
## Find work directories essential to the last pipeline run, as absolute paths | |
nextflow log last > $WORKDIR/preserve_dirs.txt | |
## Find all work directories, as absolute paths |
#!/bin/bash | |
if [ $# -ne 1 ]; then | |
echo "Usage: $0 <input_gtf_file>" | |
exit 1 | |
fi | |
input_gtf="$1" | |
output_prefix="output" | |
sorted_gtf="${output_prefix}.sorted.gtf" |
The set
lines
- These lines deliberately cause your script to fail. Wait, what? Believe me, this is a good thing.
- With these settings, certain common errors will cause the script to immediately fail, explicitly and loudly. Otherwise, you can get hidden bugs that are discovered only when they blow up in production.
set -euxo pipefail
is short for:
set -e
set -u
#!/usr/bin/env Rscript | |
#PBS -V | |
# This script will call copy number variants using cn.mops | |
# usage: Rscript cn.mops.R --help | |
library(optparse) | |
option_list = list( | |
make_option(c("-i", "--input_dir"), type="character", default=NULL, |
Just simple methods to keep the code clean.
Inspired by progrium/bashstyle and Kfir Lavi post.
- All code goes in a function
- Always double quote variables
stat_smooth_func <- function(mapping = NULL, data = NULL, | |
geom = "smooth", position = "identity", | |
..., | |
method = "auto", | |
formula = y ~ x, | |
# show_formula = TRUE, | |
se = TRUE, | |
n = 80, | |
span = 0.75, | |
fullrange = FALSE, |
#!/bin/bash | |
# put this file in $HOME/bin and make it executable | |
# make sure that this file is being called by default, I did this by adding `alias "qstat=~/bin/qstat"` to my ~/.bashrc | |
# Servers | |
#awoonga="@awongmgmr1.storage:16001 @awonmgr2.storage:16001" | |
#flashlite="@flm1.ipoib:16001 @flashmgr2.ipoib:16001" | |
#tinaroo="@tinmgmr1.ib0:16001 @tinmgr2.ib0:16001" | |
awoonga="@awonmgr2" | |
flashlite="@flashmgr2" |
The following code snippets demonstrate an approach to substantially speed-up BLAST searches of large query files (whole transcriptomes/genomes) that are performed against the NCBI nr/nt/refseq databases by running small jobs annotating subsets of the input sequences against subsets of the databases.
The method is based on "divide and conquer" approaches [1–2] that split the search query and the database to multiple small jobs that require modest resources and time which can utilise high priority queues and are therefore ideal for an HPC setting. The exact number of jobs to be submitted is dependant on the specifics of the HPC cluster, primarily number of available nodes, queues limits and system loads and therefore need to be experimented for optimal results.
The suggested script is tailored to run on the QRIS Awoonga HPC cluster, using [Conda environment](https://docs.conda.io/en/latest
# -*- coding: utf-8 -*- | |
""" | |
Created on Tue Jan 28 16:34:57 2020 | |
@author: Ido Bar | |
""" | |