tkrahn’s gists

tkrahn / extract_fastq_from_bam.sh

Created February 14, 2018 14:15

	#!/bin/bash

	# Extracting paired fastq files directly from a BAM file
	# https://gist.github.com/darencard/72ddd9e6c08aaff5ff64ca512a04a6dd
	# This script requires a lot of disk space! (about 7 x BAM file size)
	YSEQID="99999"
	original_bam="${YSEQID}.bam"
	threads=80

	echo "Splitting BAM file..."

tkrahn / gist:41201fe5b1e4e3d6f8a923c48320ab88

Created February 14, 2018 14:35

unzip_and_merge_veritas_bam.sh

	#!/bin/bash
	tar xf 999999.bam.tgz
	samtools merge 999999_merged.bam *.bam

	# For extracting fastq files, the following steps are not required
	# But if you want to use the BAM file directly you'll need to sort and index it to the correct reference sequence

	samtools sort -o 999999.sorted.bam 99999_merged.bam
	samtools index 999999.sorted.bam

tkrahn / bigY_hg38_pipeline.sh

Created April 8, 2018 19:27

Script to annotate a BigY VCF file and identify the derived and novel SNPs

	#!/bin/bash
	START=$(date +%s.%N)
	clear
	# setup parameters

	YSEQID=${PWD##*/}
	# YSEQID="1234" # (the above command simply gets the name of the last segment of the current working directory)
	NUM_THREADS=80

	REF="/genomes/0/refseq/hg38/hg38.fa"

tkrahn / gist:484cb64430d5c4cea8a2b86c105318b3

Created April 15, 2019 16:32

Extracting mtDNA FASTA file from WGS BAM

	# mtDNA allele calling $ FASTA file generation
	samtools mpileup -r chrM -u -C 50 -v -f ${REF} ${BAMFILE_SORTED} \| bcftools call -O z -v -m -P 0 > chrM_${VCF_FILE}.gz
	tabix chrM_${VCF_FILE}.gz
	samtools faidx $REF chrM \| bcftools consensus chrM_${VCF_FILE}.gz -o ${YSEQID}_mtDNA.fasta

tkrahn / bwa_hg19_23andMe_pipeline.sh

Created May 13, 2019 16:14

	#!/bin/bash
	START=$(date +%s.%N)
	clear
	# setup parameters

	YSEQID=${PWD##*/}
	# YSEQID="1234" # (the above command simply gets the name of the last segment of the current working directory)

	NUM_THREADS=$(getconf _NPROCESSORS_ONLN)
	echo "We can use ${NUM_THREADS} threads."

tkrahn / microbiome_pipeline.sh

Created July 26, 2019 10:34

	#!/bin/bash
	clear
	YSEQ_ID=${PWD##*/}
	# Exract umapped reads from the BAM file
	samtools view -b ${YSEQ_ID}_bwa-mem_hg19_sorted.bam > ${YSEQ_ID}_unmapped.bam '*'

	bedtools bamtofastq -i ${YSEQ_ID}_unmapped.bam -fq ${READS_1} -fq2 ${READS_2}


	# Check if there are any bacteria?

tkrahn / wbtg2_pipeline.sh

Last active September 14, 2019 20:36

Nanopore De Novo Assembly Pipeline (Experimental)

	#!/bin/bash
	START=$(date +%s.%N)
	clear
	# setup parameters

	YSEQID=${PWD##*/}
	# YSEQID="1234" # (the above command simply gets the name of the last segment of the current working directory)

	NUM_THREADS=$(getconf _NPROCESSORS_ONLN)
	echo "We can use ${NUM_THREADS} threads."

tkrahn / minimap2_hg38_pipeline.sh

Last active September 22, 2019 22:04

This is my (simplified) pipeline to map Nanopore FastQ reads to a reference genome (here hg38). Note that this is certainly not the best way to use Nanopore results. It's just a quick check how the results look.

	#!/bin/bash
	START=$(date +%s.%N)
	clear
	# setup parameters

	YSEQID=${PWD##*/}
	# YSEQID="1234" # (the above command simply gets the name of the last segment of the current working directory)

	NUM_THREADS=$(getconf _NPROCESSORS_ONLN)
	echo "We can use ${NUM_THREADS} threads."

tkrahn / parallelMpileup.sh

Created April 7, 2020 01:19

Parallel SNP calling by chromosome

	#!/bin/bash
	NUM_THREADS=$(getconf _NPROCESSORS_ONLN)
	REF="MyReference.fa"
	BAMFILE_SORTED="MySortedAndIndexed.bam"
	VCF_FILE="My.vcf"

	# Parallel SNP calling by chromosome
	bcftools mpileup -r chr1 -Ou -C 50 -f $REF $BAMFILE_SORTED \| bcftools call -O z --threads $NUM_THREADS -v -V indels -m -P 0 > chr1_${VCF_FILE}.gz &
	bcftools mpileup -r chr2 -Ou -C 50 -f $REF $BAMFILE_SORTED \| bcftools call -O z --threads $NUM_THREADS -v -V indels -m -P 0 > chr2_${VCF_FILE}.gz &
	bcftools mpileup -r chr3 -Ou -C 50 -f $REF $BAMFILE_SORTED \| bcftools call -O z --threads $NUM_THREADS -v -V indels -m -P 0 > chr3_${VCF_FILE}.gz &

tkrahn / link2cladefinder.html

Created April 22, 2020 08:57

Example how to link to YSEQ Cladefinder

	<html>
	<body>
	Here is a Link to <a href="https://cladefinder.yseq.net/interactive_tree.php?snps=L21%2B">L21</a><br>
	The "Percent 2B" represents the encoding for the plus sign.
	</body>
	</html>