Created
December 14, 2018 08:54
-
-
Save nilesh-tawari/08278f4d6edb56b0ca1d8ead597069b2 to your computer and use it in GitHub Desktop.
Merge fastq files from multiple lanes using shell script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# $1 is PATH | |
args=("$@") | |
input_dir=${args[0]} | |
output_dir=${args[1]} | |
main() { | |
set -e -x -o pipefail | |
# get R1 | |
declare -a fastq1=($(ls -d $input_dir/*R1*.fastq.gz)) #($(ls *_R1_001.fastq.gz)) | |
declare -a fastq2=($(ls -d $input_dir/*R2*.fastq.gz)) #($(ls *_R2_001.fastq.gz)) | |
# sanity-check inputs | |
pairs="${#fastq1[@]}" | |
if [ "$pairs" -ne "${#fastq2[@]}" ]; then | |
echo "fastq1 and fastq2 input arrays have different lengths. The fastq1 input array should contain the 'left' read files, and the fastq2 input array should contain the 'right' read files."; | |
exit 1 | |
fi | |
# Get name of 1st file within fastq1 array | |
name="${fastq1[0]}" | |
# Strip suffixes from filename to obtain sample_name | |
if [ "$sample_name" == "" ]; then | |
sample_name=${name%.gz} | |
sample_name=${sample_name%.fastq} | |
sample_name=${sample_name%.1} | |
sample_name=${sample_name%_1} | |
sample_name=${sample_name%.001} | |
sample_name=${sample_name%_001} | |
sample_name=${sample_name%.R1} | |
sample_name=${sample_name%_R1} | |
sample_name=${sample_name%.R2} | |
sample_name=${sample_name%_R2} | |
fi | |
# Assign output name | |
fastq1_outfile="${sample_name}_merged_R1_001.fastq.gz" | |
fastq2_outfile="${sample_name}_merged_R2_001.fastq.gz" | |
# remove prefix | |
fastq1_outfile=$output_dir/"${fastq1_outfile#$input_dir}" | |
fastq2_outfile=$output_dir/"${fastq2_outfile#$input_dir}" | |
echo $fastq1_outfile | |
echo $fastq2_outfile | |
if [[ $name =~ \.gz$ ]]; then | |
# gzipped files, can be concat-ed without decompression | |
cat "${fastq1[@]}" > $fastq1_outfile | |
cat "${fastq2[@]}" > $fastq2_outfile | |
else | |
# non-gzipped, assume no compression currently | |
cat "${fastq1[@]}" | bgzip -c > $fastq1_outfile | |
cat "${fastq2[@]}" | bgzip -c > $fastq2_outfile | |
fi | |
echo Done merging files!!! | |
} | |
main input_dir output_dir | |
time sh merge_fastq.sh path_to_fastq . >& m.log
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
USAGE: ./merge_fastq.sh input_dir output_dir