|
#!/usr/bin/bash |
|
ml purge |
|
set -e -o pipefail |
|
cd /path/to/project_downsample/ |
|
mkdir -p tmp/ |
|
|
|
ml picard/3.3.0-Java-17 |
|
|
|
for i in [^d]*_R1_*.fastq.gz; do |
|
|
|
READ1="$i" |
|
READ2=$(echo $i | perl -wpe 's/_R1_0/_R2_0/g') |
|
TMPREAD1=./tmp/$(echo $READ1| perl -wpe 's/.*(S\d\d\d_L\d\d\d_R[12]_\d\d\d.fastq.gz)/$1/g') |
|
TMPREAD2=./tmp/$(echo $READ2| perl -wpe 's/.*(S\d\d\d_L\d\d\d_R[12]_\d\d\d.fastq.gz)/$1/g') |
|
TMPREADDOWNSAMPLE05_1=./tmp/ds05_$(echo $READ1| perl -wpe 's/.*(S\d\d\d_L\d\d\d_R[12]_\d\d\d.fastq.gz)/$1/g') |
|
TMPREADDOWNSAMPLE05_2=./tmp/ds05_$(echo $READ2| perl -wpe 's/.*(S\d\d\d_L\d\d\d_R[12]_\d\d\d.fastq.gz)/$1/g') |
|
TMPREADDOWNSAMPLE01_1=./tmp/ds01_$(echo $READ1| perl -wpe 's/.*(S\d\d\d_L\d\d\d_R[12]_\d\d\d.fastq.gz)/$1/g') |
|
TMPREADDOWNSAMPLE01_2=./tmp/ds01_$(echo $READ2| perl -wpe 's/.*(S\d\d\d_L\d\d\d_R[12]_\d\d\d.fastq.gz)/$1/g') |
|
READDOWNSAMPLE05_1=./ds05_$READ1 |
|
READDOWNSAMPLE05_2=./ds05_$READ2 |
|
READDOWNSAMPLE01_1=./ds01_$READ1 |
|
READDOWNSAMPLE01_2=./ds01_$READ2 |
|
|
|
if [ ! -e $TMPREAD1 ] ; then |
|
ln --symbolic -T $(realpath $READ1) $TMPREAD1 |
|
ln --symbolic -T $(realpath $READ2) $TMPREAD2 |
|
fi |
|
# O=./tmp/$(basename $TMPREAD1 .fastq.gz ).bam \ |
|
|
|
java -Xmx2g -jar $EBROOTPICARD/picard.jar FastqToSam \ |
|
-F1 $TMPREAD1 \ |
|
-F2 $TMPREAD1 \ |
|
--COMPRESSION_LEVEL 0 \ |
|
-O /dev/stdout \ |
|
-SM "$(echo $READ1| perl -wpe 's/.*(S\d\d\d)_L\d\d\d_R[12]_\d\d\d.fastq.gz/$1/g' )" \ |
|
-RG rg001 | \ |
|
java -Xmx2g -jar $EBROOTPICARD/picard.jar DownsampleSam \ |
|
-I /dev/stdin \ |
|
-O /dev/stdout \ |
|
--STRATEGY Chained \ |
|
-P 0.5 \ |
|
--ACCURACY 0.0001 \ |
|
--METRICS_FILE ./tmp/$(basename $TMPREAD1 .fastq.gz ).downsample_metrics \ |
|
--COMPRESSION_LEVEL 0 | \ |
|
java -Xmx2g -jar $EBROOTPICARD/picard.jar SamToFastq \ |
|
-I /dev/stdin \ |
|
-FASTQ $TMPREADDOWNSAMPLE05_1 \ |
|
-SECOND_END_FASTQ $TMPREADDOWNSAMPLE05_2 |
|
|
|
mv $TMPREADDOWNSAMPLE05_1 $READDOWNSAMPLE05_1 |
|
mv $TMPREADDOWNSAMPLE05_2 $READDOWNSAMPLE05_2 |
|
|
|
java -Xmx2g -jar $EBROOTPICARD/picard.jar FastqToSam \ |
|
-F1 $TMPREAD1 \ |
|
-F2 $TMPREAD1 \ |
|
--COMPRESSION_LEVEL 0 \ |
|
-O /dev/stdout \ |
|
-SM "$(echo $READ1| perl -wpe 's/.*(S\d\d\d)_L\d\d\d_R[12]_\d\d\d.fastq.gz/$1/g' )" \ |
|
-RG rg001 | \ |
|
java -Xmx2g -jar $EBROOTPICARD/picard.jar DownsampleSam \ |
|
-I /dev/stdin \ |
|
-O /dev/stdout \ |
|
--STRATEGY Chained \ |
|
-P 0.1 \ |
|
--ACCURACY 0.0001 \ |
|
--METRICS_FILE ./tmp/$(basename $TMPREAD1 .fastq.gz ).downsample_metrics \ |
|
--COMPRESSION_LEVEL 0 | \ |
|
java -Xmx2g -jar $EBROOTPICARD/picard.jar SamToFastq \ |
|
-I /dev/stdin \ |
|
-FASTQ $TMPREADDOWNSAMPLE01_1 \ |
|
-SECOND_END_FASTQ $TMPREADDOWNSAMPLE01_2 |
|
|
|
mv $TMPREADDOWNSAMPLE01_1 $READDOWNSAMPLE01_1 |
|
mv $TMPREADDOWNSAMPLE01_2 $READDOWNSAMPLE01_2 |
|
|
|
FIXHEADER1=" $({ gzip -dc $TMPREAD1 ||:; } |head -n 1| cut -f 2 -d \ )" |
|
FIXHEADER2=" $({ gzip -dc $TMPREAD2 ||:; } |head -n 1| cut -f 2 -d \ )" |
|
|
|
if [ ! -e $TMPREADDOWNSAMPLE05_1 ]; then |
|
mv $READDOWNSAMPLE05_1 $TMPREADDOWNSAMPLE05_1 |
|
fi |
|
if [ ! -e $TMPREADDOWNSAMPLE05_2 ]; then |
|
mv $READDOWNSAMPLE05_2 $TMPREADDOWNSAMPLE05_2 |
|
fi |
|
|
|
if [ ! -e $TMPREADDOWNSAMPLE01_1 ]; then |
|
mv $READDOWNSAMPLE01_1 $TMPREADDOWNSAMPLE01_1 |
|
fi |
|
|
|
if [ ! -e $TMPREADDOWNSAMPLE01_2 ]; then |
|
mv $READDOWNSAMPLE01_2 $TMPREADDOWNSAMPLE01_2 |
|
fi |
|
|
|
gzip -dc "$TMPREADDOWNSAMPLE05_1" | perl -wpe 's/\/[12]$/'"$FIXHEADER1"'/g if $.%4==1;' | gzip -c > "$TMPREADDOWNSAMPLE05_1.fixed.fastq.gz" |
|
gzip -dc "$TMPREADDOWNSAMPLE05_2" | perl -wpe 's/\/[12]$/'"$FIXHEADER2"'/g if $.%4==1;' | gzip -c > "$TMPREADDOWNSAMPLE05_2.fixed.fastq.gz" |
|
|
|
gzip -dc "$TMPREADDOWNSAMPLE01_1" | perl -wpe 's/\/[12]$/'"$FIXHEADER1"'/g if $.%4==1;' | gzip -c > "$TMPREADDOWNSAMPLE01_1.fixed.fastq.gz" |
|
gzip -dc "$TMPREADDOWNSAMPLE01_2" | perl -wpe 's/\/[12]$/'"$FIXHEADER2"'/g if $.%4==1;' | gzip -c > "$TMPREADDOWNSAMPLE01_2.fixed.fastq.gz" |
|
|
|
#fix nameing retrostpectively |
|
READDOWNSAMPLE05_1=./ds05$READ1 |
|
READDOWNSAMPLE05_2=./ds05$READ2 |
|
READDOWNSAMPLE01_1=./ds01$READ1 |
|
READDOWNSAMPLE01_2=./ds01$READ2 |
|
|
|
mv $TMPREADDOWNSAMPLE01_1.fixed.fastq.gz $READDOWNSAMPLE01_1 |
|
mv $TMPREADDOWNSAMPLE01_2.fixed.fastq.gz $READDOWNSAMPLE01_2 |
|
|
|
mv $TMPREADDOWNSAMPLE05_1.fixed.fastq.gz $READDOWNSAMPLE05_1 |
|
mv $TMPREADDOWNSAMPLE05_2.fixed.fastq.gz $READDOWNSAMPLE05_2 |
|
done |