Generate a range of standard quality metrics with fastqc
module load FastQC
mkdir fastqc
fastqc *.fastq.gz -o fastqcThis produces full html reports for all sequences but it's challenging to manually inspect these. Use this to quickly check for poor quality sequences.
for f in *.html; do pc=`grep 'Sequences flagged as poor quality' $f | sed -E 's/.*Sequences flagged as poor quality.*([0-9]+).*td.*/\1/'`; printf "%s %d\n" $f $pc;doneCheck reads for species composition using Kraken.
make_kraken_report(){
f=$1;
outfile=${f%.fastq.gz}_krakenrep.txt
printf "Processing %s into %s\n" $f $outfile
kraken --fastq-input --gzip-compressed $f | kraken-report > ${f%.fastq.gz}_krakenrep.txt
}
export -f make_kraken_report
parallel -j 24 make_kraken_report {} ::: `ls *.fastq.gz`