Last active
February 22, 2017 19:41
-
-
Save photocyte/e59fa3d6f2333fb8b2bfec4d28433737 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##No warranty expressed or implied. Hackish code to use Seqkit to "rotate" circular genomes. | |
##Intended for use with Quiver, or other polishing tools like Pilon, to ensure that | |
##there isn't a coverage drop at the end of the circular sequence which prevents calling of the true consensus bases | |
##Graphmap is the only aligner that I'm aware of that properly deals with circular references! | |
FASTA=$1 | |
NAME=$(basename $FASTA) | |
LENGTH=$(seqkit stat $FASTA | grep "FASTA" | tr -s " "| cut -f 5 -d " " | tr -d ",") | |
HALF=$(expr $LENGTH / 2 ) | |
echo $LENGTH $HALF | |
##If length == even, will get 2 contigs and want to pick the 2nd one | |
if [(expr $LENGTH % 2) -eq 0] | |
then | |
FILTERME=$(seqkit sliding -C $FASTA -s $HALF -W $LENGTH | grep ">" | tail -n 1) | |
echo $FILTERME | |
seqkit sliding -C $FASTA -s $HALF -W $LENGTH | seqkit grep -v -p "$FILTERME" > rotated_$NAME | |
##If length == odd, will get three contigs and want to pick the middle one | |
elif | |
FILTERME1=$(seqkit sliding -C $FASTA -s $HALF -W $LENGTH | grep ">" | tail -n 1) | |
FILTERME1=${FILTERME#">"} | |
FILTERME2=$(seqkit sliding -C $FASTA -s $HALF -W $LENGTH | grep ">" | head -n 1) | |
FILTERME2=${FILTERME#">"} | |
FILTERME="$FILTERME1 $FILTERME2" | |
seqkit sliding -C $FASTA -s $HALF -W $LENGTH | seqkit grep -v -p "$FILTERME" > rotated_$NAME | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment