Skip to content

Instantly share code, notes, and snippets.

@nuada
nuada / ab2bam
Created August 7, 2015 10:37
Convert AB Sanger traces to FASTQ and align
#!/bin/bash
# Unzip files
for file in *.zip; do
unzip "$file"
done
# Extract fastqs
for file in *.ab1; do
seqret -sformat abi -osformat fastq -auto -stdout -sequence "$file" > "$(basename "$file" .ab1).fastq"
@nuada
nuada / miseq-qc.sh
Created August 7, 2015 10:28
Merge MiSeq diagnostic images for easy eyeballing
#!/bin/bash
# usage: $0 <MiSeq output dir> <output dir>
dir=$1/Thumbnail_Images/L001
output=$2
number_of_tiles=$(grep FlowcellLayout ${1}/RunInfo.xml | cut -d = -f 5 | sed -e 's/[^0-9]*//g')
if [[ ! -d ${output} ]]; then
mkdir ${output}
fi
@nuada
nuada / gemini_summarize.py
Created May 8, 2015 10:01
Script to summarize variants using GEMINI API
#!/usr/share/gemini/anaconda/bin/python -E
# -*- coding: utf-8 -*-
# usage: gemini_summarize.py <query> <gemini.db>
import sys
import locale
from gemini import GeminiQuery
DP_THRESHOLD = 8
GQ_THRESHOLD = 20
@nuada
nuada / drop_cols.py
Last active August 29, 2015 14:20
Drop certain columns from TSV file using Pandas
#!/usr/bin/python
import pandas as pd
import sys
df = pd.read_csv(sys.argv[1], sep='\t', index_col=0)
for col in df.columns:
if col[3] == 'B':
df = df.drop(col, 1)
@nuada
nuada / calculate-kinship.sh
Created March 6, 2015 12:37
Calculate kinship coefficient for set of samples in VCF
#!/bin/bash
bcftools norm -Ou -m -any $1 | # Split multi-allelic alleles
bcftools norm -Ou -f /resources/hg19/ucsc.hg19.fasta | # Normalize
bcftools annotate -Ob -x ID -I +'%CHROM:%POS:%REF:%ALT' | # Replace IDs with unique ID
plink --bcf /dev/stdin --keep-allele-order --double-id --allow-extra-chr 0 --make-bed --out variants
king -b variants.bed --kinship --prefix kinship
king -b variants.bed --individual
#!/bin/bash
max_jobs=4
for i in $(seq 10); do
while [[ $(jobs | wc --lines) -ge ${max_jobs} ]]; do
sleep 1
done
# Do the job
@nuada
nuada / bed2picard_interval_list.sh
Last active March 14, 2020 02:43
Create picard interval list from BED file
#!/bin/bash
if [[ -z $* ]]; then
echo "Usage: $0 <bam file> <bed file>"
exit 1
fi
# Header start
echo '@HD VN:1.0 SO:coordinate' | tr " " "\t"
@nuada
nuada / phix_screen.sh
Last active August 29, 2015 14:03
Quick and dirty way to assess number of PhiX reads in samples
#!/bin/bash
DATA='/data/project'
for sample in $(cd ${DATA}; ls -1 *.r1.fastq.gz | cut -d . -f 1); do
bowtie2 -x /resources/phix/bowtie2/phix -1 ${DATA}/${sample}.r1.fastq.gz -2 ${DATA}/${sample}.r2.fastq.gz -S /dev/null --al-conc-gz phix-${sample}.r%.fastq.gz -p 4 2>&1 | tee -a phix.log
done
@nuada
nuada / bed_cleanup.sh
Last active August 29, 2015 14:02
Cleanup BED: sort and merge intervals, extract gene symbols
bedtools sort -i foo.bed | bedtools merge -i - -nms | sed -e 's/\r//g' | awk -F '\t' '{ split($4, name, "."); print $1 FS $2 FS $3 FS name[1] }'
@nuada
nuada / bed2reg
Last active August 29, 2015 14:02
Convert BED file to PLINK/SEQ REG format
#!/bin/bash
echo -e "#CHR\tBP1\tBP2\tID"
awk -F '\t' '{ print $1 FS $2+1 FS $3 FS NR }' | sed -e 's/^\([0-9]\+\)/chr\1/g'