Last active
May 8, 2022 02:27
-
-
Save nuada/7f97ff4a92c39a607ddd to your computer and use it in GitHub Desktop.
Convert ab1 sanger sequencing traces to fastq and align them to reference
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Unzip files | |
for file in *.zip; do | |
unzip "$file" | |
done | |
# Extract fastqs | |
for file in *.ab1; do | |
seqret -sformat abi -osformat fastq -auto -stdout -sequence "$file" > "$(basename "$file" .ab1).fastq" | |
done | |
# Map reads to reference | |
#bwa_index=/resources/hg19/bwa/0.7.5/ucsc.hg19.fasta | |
bwa_index=/resources/b37/bwa/0.7.5/human_g1k_v37.fasta | |
for file in *.fastq; do | |
id=$(echo "$file" | cut -d . -f1) | |
sample=$(echo "$file" | cut -d _ -f1) | |
bwa mem -t 4 -R "@RG\tID:${id}\tSM:${sample}\tLB:${sample}\tPL:ILLUMINA\tCN:OMICRON" ${bwa_index} "$file" | samtools view -Sbh - -o "${id}.single.bam" | |
done | |
# Merge bams for same sample | |
for sample in $(ls -1 *.single.bam | cut -d _ -f1 | sort -u); do | |
samtools merge ${sample}.bam ${sample}*.single.bam | |
samtools index ${sample}.bam | |
done | |
# Cleanup | |
rm -rf *.seq *.ab1 *.fastq *.single.bam |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment