nuada’s gists

nuada / ab2bam

Created August 7, 2015 10:37

Convert AB Sanger traces to FASTQ and align

	#!/bin/bash

	# Unzip files
	for file in *.zip; do
	unzip "$file"
	done

	# Extract fastqs
	for file in *.ab1; do
	seqret -sformat abi -osformat fastq -auto -stdout -sequence "$file" > "$(basename "$file" .ab1).fastq"

nuada / miseq-qc.sh

Created August 7, 2015 10:28

Merge MiSeq diagnostic images for easy eyeballing

	#!/bin/bash
	# usage: $0 <MiSeq output dir> <output dir>

	dir=$1/Thumbnail_Images/L001
	output=$2
	number_of_tiles=$(grep FlowcellLayout ${1}/RunInfo.xml \| cut -d = -f 5 \| sed -e 's/[^0-9]*//g')

	if [[ ! -d ${output} ]]; then
	mkdir ${output}
	fi

nuada / gemini_summarize.py

Created May 8, 2015 10:01

Script to summarize variants using GEMINI API

	#!/usr/share/gemini/anaconda/bin/python -E
	# -- coding: utf-8 --
	# usage: gemini_summarize.py <query> <gemini.db>

	import sys
	import locale
	from gemini import GeminiQuery

	DP_THRESHOLD = 8
	GQ_THRESHOLD = 20

nuada / drop_cols.py

Last active August 29, 2015 14:20

Drop certain columns from TSV file using Pandas

	#!/usr/bin/python

	import pandas as pd
	import sys

	df = pd.read_csv(sys.argv[1], sep='\t', index_col=0)

	for col in df.columns:
	if col[3] == 'B':
	df = df.drop(col, 1)

nuada / calculate-kinship.sh

Created March 6, 2015 12:37

Calculate kinship coefficient for set of samples in VCF

	#!/bin/bash

	bcftools norm -Ou -m -any $1 \| # Split multi-allelic alleles
	bcftools norm -Ou -f /resources/hg19/ucsc.hg19.fasta \| # Normalize
	bcftools annotate -Ob -x ID -I +'%CHROM:%POS:%REF:%ALT' \| # Replace IDs with unique ID
	plink --bcf /dev/stdin --keep-allele-order --double-id --allow-extra-chr 0 --make-bed --out variants

	king -b variants.bed --kinship --prefix kinship
	king -b variants.bed --individual

nuada / multiprocessing.sh

Created January 13, 2015 10:24

	#!/bin/bash

	max_jobs=4

	for i in $(seq 10); do
	while [[ $(jobs \| wc --lines) -ge ${max_jobs} ]]; do
	sleep 1
	done

	# Do the job

nuada / bed2picard_interval_list.sh

Last active March 14, 2020 02:43

Create picard interval list from BED file

nuada / phix_screen.sh

Last active August 29, 2015 14:03

Quick and dirty way to assess number of PhiX reads in samples

	#!/bin/bash

	DATA='/data/project'
	for sample in $(cd ${DATA}; ls -1 *.r1.fastq.gz \| cut -d . -f 1); do
	bowtie2 -x /resources/phix/bowtie2/phix -1 ${DATA}/${sample}.r1.fastq.gz -2 ${DATA}/${sample}.r2.fastq.gz -S /dev/null --al-conc-gz phix-${sample}.r%.fastq.gz -p 4 2>&1 \| tee -a phix.log
	done

nuada / bed_cleanup.sh

Last active August 29, 2015 14:02

Cleanup BED: sort and merge intervals, extract gene symbols

bedtools sort -i foo.bed | bedtools merge -i - -nms | sed -e 's/\r//g' | awk -F '\t' '{ split($4, name, "."); print $1 FS $2 FS $3 FS name[1] }'

nuada / bed2reg

Last active August 29, 2015 14:02

Convert BED file to PLINK/SEQ REG format

	#!/bin/bash

	echo -e "#CHR\tBP1\tBP2\tID"
	awk -F '\t' '{ print $1 FS $2+1 FS $3 FS NR }' \| sed -e 's/^$[0-9]\+$/chr\1/g'

Piotr Radkowski nuada