Skip to content

Instantly share code, notes, and snippets.

@tkrahn
tkrahn / wbtg2_pipeline.sh
Last active September 14, 2019 20:36
Nanopore De Novo Assembly Pipeline (Experimental)
#!/bin/bash
START=$(date +%s.%N)
clear
# setup parameters
YSEQID=${PWD##*/}
# YSEQID="1234" # (the above command simply gets the name of the last segment of the current working directory)
NUM_THREADS=$(getconf _NPROCESSORS_ONLN)
echo "We can use ${NUM_THREADS} threads."
#!/bin/bash
clear
YSEQ_ID=${PWD##*/}
# Exract umapped reads from the BAM file
samtools view -b ${YSEQ_ID}_bwa-mem_hg19_sorted.bam > ${YSEQ_ID}_unmapped.bam '*'
bedtools bamtofastq -i ${YSEQ_ID}_unmapped.bam -fq ${READS_1} -fq2 ${READS_2}
# Check if there are any bacteria?
#!/bin/bash
START=$(date +%s.%N)
clear
# setup parameters
YSEQID=${PWD##*/}
# YSEQID="1234" # (the above command simply gets the name of the last segment of the current working directory)
NUM_THREADS=$(getconf _NPROCESSORS_ONLN)
echo "We can use ${NUM_THREADS} threads."
@tkrahn
tkrahn / gist:484cb64430d5c4cea8a2b86c105318b3
Created April 15, 2019 16:32
Extracting mtDNA FASTA file from WGS BAM
# mtDNA allele calling $ FASTA file generation
samtools mpileup -r chrM -u -C 50 -v -f ${REF} ${BAMFILE_SORTED} | bcftools call -O z -v -m -P 0 > chrM_${VCF_FILE}.gz
tabix chrM_${VCF_FILE}.gz
samtools faidx $REF chrM | bcftools consensus chrM_${VCF_FILE}.gz -o ${YSEQID}_mtDNA.fasta
@tkrahn
tkrahn / bigY_hg38_pipeline.sh
Created April 8, 2018 19:27
Script to annotate a BigY VCF file and identify the derived and novel SNPs
#!/bin/bash
START=$(date +%s.%N)
clear
# setup parameters
YSEQID=${PWD##*/}
# YSEQID="1234" # (the above command simply gets the name of the last segment of the current working directory)
NUM_THREADS=80
REF="/genomes/0/refseq/hg38/hg38.fa"
@tkrahn
tkrahn / gist:41201fe5b1e4e3d6f8a923c48320ab88
Created February 14, 2018 14:35
unzip_and_merge_veritas_bam.sh
#!/bin/bash
tar xf 999999.bam.tgz
samtools merge 999999_merged.bam *.bam
# For extracting fastq files, the following steps are not required
# But if you want to use the BAM file directly you'll need to sort and index it to the correct reference sequence
samtools sort -o 999999.sorted.bam 99999_merged.bam
samtools index 999999.sorted.bam
#!/bin/bash
# Extracting paired fastq files directly from a BAM file
# https://gist.github.com/darencard/72ddd9e6c08aaff5ff64ca512a04a6dd
# This script requires a lot of disk space! (about 7 x BAM file size)
YSEQID="99999"
original_bam="${YSEQID}.bam"
threads=80
echo "Splitting BAM file..."