Last active
November 14, 2017 16:58
-
-
Save mbk0asis/86a30e09077b1ad6bcd303aed820694f to your computer and use it in GitHub Desktop.
Batch bisulfite primer design tool version 2 (NEW : Multiple results per template, allowed to set number of C's within a primer)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
printf "\n *** BIS BATCH PRIMER version 2.0 ***" | |
printf "\n\n !!! 'Primer3 & fastx-toolkit' must be installed on the system.\n\n !!! Edit parameters (e.g. sizes, Tm, and etc) before start\n\n " | |
printf "\n\n Usage : \n ./mp_primer.sh FASTA PARAMETER \n\n" | |
printf " >>> input FASTA = "$1 | |
printf " \n >>> parameters = "$2 | |
printf "\n\n\n ()()() Running... \n\n" | |
if [ -f $1 -a -f $2 ]; then | |
mkdir tmp | |
cd tmp | |
cp ../$2 . | |
cat ../$1 | while read L; do echo $L"_lower";read L ;echo "$L" | rev | tr "ATCG" "TAGC" ; done > $1.rev # reverse_complement | |
cat ../$1 $1.rev > $1.comb | |
sed 's/ /_/g' $1.comb | fasta_formatter -t | awk '{print $1"\t"toupper($2)}' |sed 's/CG/NG/g' | sed 's/C/T/g' > $1.comb.tab | |
# mask 'CG' to 'NG' & covert rest 'C' to 'T' | |
cut -f 1 $1.comb.tab | sort | while read line; do grep -w $line $1.comb.tab > $line.split; done | |
# split each header into a single file | |
#################################### | |
# sequence file generator (Add titles to tab. "SEQUENCE_ID=", "SEQUENCE_TEMPLATE=") | |
for s in ./*.split | |
do | |
awk '{print "SEQUENCE_ID="$1,"\n","SEQUENCE_TEMPLATE="$2}' $s | sed 's/ //g' > $s.seq | |
done | |
#################################### | |
# parameter file generator | |
for s in ./*.split # to generate a list of target sequence (CpG) positions | |
do | |
cut -f 2 $s | grep -bo 'NG' | sed 's/:/\t/g' | cut -f 1 | awk '{print $1+1",2"}' | tr '\n' ' ' | \ | |
awk '{print "SEQUENCE_TARGET="$0}' | cat - $2 > $s.param | |
done | |
#################################### | |
# combine sequence & parameter files | |
cut -f 1 $1.comb.tab | uniq | while read line; do cat $line.split.seq $line.split.param > $line.p3in ; done | |
#################################### | |
# PRIMER3 | |
for i in ./*.p3in # run PRIMER3 | |
do | |
primer3_core $i > $i.p3out | |
done | |
#################################### | |
# print ouf PRIMER3 results | |
for r in ./*.p3out | |
do | |
grep 'SEQUENCE=\|LEFT_.=\|RIGHT_.=\|LEFT_._TM\|RIGHT_._TM' $r | sed 's/=/\t/g' | cut -f 2 | paste -d "\t" - - - - - - | sed 's/,/\t/g' | \ | |
perl -ne 'print if !/N/ || /\b[ACTG]{0,4}N/' | \ | |
awk 'BEGIN{FS=OFS="\t"}{gsub("N","Y",$1);gsub("N","R",$2);print $1,$2,$5-$3+1,$3,$5,$4,$6,$7,$8}' > $r.int | |
done | |
for i in ./*.int | |
do | |
awk '{print FILENAME"_"NR,$0}' $i | sed 's/ /\t/g;s/.p3in.p3out.int//g' | awk -F "/" '{print $2}' | |
done | sort -k1 > $1.p3outs | |
echo 'ID Forward Reverse Product_Size Product_Start Product_End Len_forward Len_reverse Tm_forward Tm_reverse' | \ | |
cat - $1.p3outs > ../$1.p3.table | |
# p3out for isPCR | |
for r in ./*.p3out | |
do | |
grep 'SEQUENCE=' $r | paste -d "\t" - - | sed 's/=/\t/g' | cut -f 2,4| perl -ne 'print if !/N/ || /\b\S{0,4}N/' | \ | |
awk '{gsub("N","T",$1);gsub("N","A",$2); print}' > $r.ispcr | |
done | |
for i in ./*.ispcr | |
do | |
awk -F "/" '{print FILENAME"_"NR,$0}' $i | sed 's/ /\t/g;s/.p3in.p3out.ispcr//g' | awk -F "/" '{print $2}' | |
done | sort -k1 > ../$1.isPcrInput | |
################################# | |
# primer design report | |
printf "\n\n *** PRIMER REPORT\n\n * INPUT HEADERS = " | |
grep -c ">" ../$1 | |
printf "\n * DESIGNED PRIMER PAIRS = " | |
cat ../$1.isPcrInput | wc -l | |
printf "\n * PASSED HEADERS = " | |
sed 's/_/\t/g' ../$1.isPcrInput | cut -f1 | uniq | wc -l | |
printf "\n * FAILED HEADERS = " | |
sed 's/_/\t/g' ../$1.isPcrInput | cut -f1 | sort | uniq > header.isPcrInput | |
grep ">" ../$1 | sed 's/>//g' | sort | uniq > header.Input | |
join --check-order -t: -a 1 -o 1.1 2.1 -1 1 -2 1 header.Input header.isPcrInput |\ | |
awk -F':' '{if($2 == "") {print $1}}' | tr '\n' ' ' | |
printf "\n\n >>> '$1.p3.table' has been generated! (Use this file to order primers)\n" | |
printf " >>> '$1.isPcrInput' has been generated! (Use this file to obtain amplicon sequences)" | |
printf "\n\n !!! Done !!!\n\n" | |
cd .. | |
# rm -r tmp/ | |
else | |
printf "\n\n !!! ERROR:: FASTA or parameter file is missing!\n\n" | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment