Last active
June 13, 2020 02:50
-
-
Save kcho/9edcaf21f2669b72a767a500824f3849 to your computer and use it in GitHub Desktop.
SRR_fasta_download_trim_alignment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# 12th June 2020 | |
# workshop by Usan Ong | |
# Environment settings | |
analysis_dir=${PWD} | |
trimmomatic=/Users/kevin/Downloads/Trimmomatic-0.39/trimmomatic-0.39.jar | |
hisat2=/Users/kevin/Downloads/hisat2-2.2.0/hisat2 | |
#downloaded from hisat2 website | |
grch38_dir=/Users/kevin/Downloads/grch38 | |
# Move to the directory | |
cd ${analysis_dir} | |
# Download fasta using parallel-fastq-dump | |
# https://github.com/rvalieris/parallel-fastq-dump | |
parallel-fastq-dump \ | |
--sra-id SRR7868778 SRR7868777 SRR7868776 SRR7868775 SRR7868774 SRR7868773 \ | |
--threads 4 \ | |
--outdir ./ \ | |
--split-files —gzip | |
# Unzip the downloaded fasta files | |
for gzip_file in *gz | |
do | |
echo unziping ${gzip_file} | |
gunzip ${gzip_file} | |
done | |
# QC here using FastQC | |
# Trim out adaptor sequences | |
for unzipped_file in *fastq | |
do | |
echo trimming ${unzipped_file} | |
trimmed_file_name="${unzipped_file%.*}_trimmed.fastaq" | |
java -jar ${trimmomatic} \ | |
SE -phred33 \ | |
${unzipped_file} \ | |
${trimmed_file_name} \ | |
ILLUMINACLIP:TruSeq3-SE.fa:2:30:10 \ | |
LEADING:3 TRAILING:3 \ | |
SLIDINGWINDOW:4:15 \ | |
MINLEN:36 | |
done | |
# Alignment | |
for trimmed_file in *_trimmed.fastaq | |
do | |
echo aligning ${trimmed_file} | |
sam_file="${trimmed_file%.*}.sam" | |
# set python2 as the environment | |
export PATH=/usr/bin:${PATH} | |
# alignment | |
${hisat2} -x ${grch38_dir}/genome -U ${trimmed_file} -S ${sam_file} | |
done | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment