Joe Brown brwnj

#Job Array

#!/usr/bin/env bash
#BSUB -J align[1-63]
#BSUB -e align.%J.%I.err
#BSUB -o align.%J.%I.out
#BSUB -q normal
#BSUB -R "select[mem>16] rusage[mem=16] span[hosts=1]"
#BSUB -n 12

Setting up a small-ish example

Grab 5 bams and their indexes from 1000G to represent our alignments.

mkdir data && cd data
wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase1/data/HG00096/alignment/HG00096.chrom20.ILLUMINA.bwa.GBR.low_coverage.20101123.bam
wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase1/data/HG00096/alignment/HG00096.chrom20.ILLUMINA.bwa.GBR.low_coverage.20101123.bam.bai
wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase1/data/HG00097/alignment/HG00097.chrom20.SOLID.bfast.GBR.low_coverage.20101123.bam
wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase1/data/HG00097/alignment/HG00097.chrom20.SOLID.bfast.GBR.low_coverage.20101123.bam.bai

	#!/usr/bin/env python
	# encoding: utf-8
	"""
	Add a piece or all of the index read back onto R1 (or R2).
	"""
	from itertools import izip
	from toolshed import nopen

	def readfx(fh):
	# https://github.com/lh3/readfq/blob/master/readfq.py

	#!/usr/bin/env python
	# encoding: utf-8
	"""
	Join reads based on local alignment, taking higher quality base where mismatches
	are present.
	"""
	import sys, string, multiprocessing
	from Bio import pairwise2
	from toolshed import nopen
	from itertools import islice, izip, izip_longest

	import editdist

	def distance(a, b):
	"""
	Find best edit distance between two strings of potentially uneven length.

	>>> import editdist
	>>> distance("abc", "abc")
	0
	>>> distance("abc", "abcdef")

	#!/usr/bin/env python
	# coding=utf-8
	"""
	Download the best resolution of the top <limit> video from subreddit 'videos'
	to <out> directory.
	"""

	import multiprocessing
	import os
	import pafy

	#!/usr/bin/env python
	# coding=utf-8
	"""
	Runs bcl2fastq creating fastqs and concatenates fastqs across lanes. Intended
	to be used with NextSeq data and it does not do any cleanup! Original dumped
	fastqs will remain along with all of the bcl files.
	"""

	from __future__ import print_function

	# setting up AMI
	sudo service docker stop
	sudo rm -rf /var/lib/docker

	# update the image
	sudo yum update -y
	sudo yum install -y mdadm
	sudo yum install -y wget
	wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
	bash Miniconda3-latest-Linux-x86_64.sh -b -f -p $HOME/miniconda

	params.bams

	// mosdepth is going to need a prefix, a bam, and its index
	bams_ch = Channel
	// grab the bams and/or crams
	.fromPath(params.bams, checkIfExists: true)
	// set the first element to the basename of the file without its extension
	// the second element to the alignments (bam or cram)
	// and the third element to the index
	.map { file -> tuple(file.baseName, file, file + ("${file}".endsWith('.cram') ? '.crai' : '.bai')) }