Created
July 25, 2023 07:10
-
-
Save pansapiens/e547018294fd380c7f0fe50283e64622 to your computer and use it in GitHub Desktop.
Deinterleave MGI FASTQ lanes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This script deinterleaves a FASTQ file generated by an MGI sequencer with two flowcell lanes | |
# where headers are in the format: @v300009551L1C002R003000000/1 with L1 or L2 indicating the lane | |
# This can be useful for some analyses since each lane can behave like a technical replicate (eg DADA2 error correction?) | |
set -e | |
set -o pipefail | |
input_fastq_gz="$1" # input FASTQ file | |
samplename="$(basename $input_fastq_gz .fastq.gz)" | |
lane1_file="${samplename}_lane1.fastq" | |
lane2_file="${samplename}_lane2.fastq" | |
# Use zcat to uncompress gz file, use awk to de-interleave | |
zcat "$input_fastq_gz" | awk -v l1="$lane1_file" -v l2="$lane2_file" 'BEGIN {FS="\n"; OFS="\n"} | |
{ | |
header = $0; | |
getline seq; | |
getline plus; | |
getline qual; | |
if (header ~ /L1C/) | |
print header, seq, plus, qual > l1 | |
else if (header ~ /L2C/) | |
print header, seq, plus, qual > l2 | |
}' | |
# Compress the de-interleaved FASTQ files | |
gzip ${samplename}_lane1.fastq | |
gzip ${samplename}_lane2.fastq |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@v300009551L1C002R003000000/1 | |
AAGGCTCTCTACGACAGGGTTTACTACGTTAGCTGAT | |
+ | |
III9IG9ICBGIH>FBGGIIGII;>=FBB9D:9?=BC; | |
@v300009551L2C002R003000001/1 | |
AGCTACTGAGTCCGATGCATGCTAGCTAGCTGATGAC | |
+ | |
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII | |
@v300009551L1C002R003000002/1 | |
GCGTAAGCTACGGTACGTTACGTTACGTGCGTGGTGGT | |
+ | |
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII | |
@v300009551L2C002R003000003/1 | |
TGCTAGCTGACGATCGTAGCTGATGCTAGCTAGCTGAT | |
+ | |
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment