Created
July 29, 2024 07:27
-
-
Save colindaven/79a6d1fa40143060a8e93331ad3ff34c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Goals: | |
# - Add bed annotation using odgi to try to display gene annotations in seqtubemap | |
# Contact: Colin Davenport | |
echo "Requires: odgi and vg toolsets" | |
echo "Usage: bash vg_annotate.sh input.og input_gff3 corrected_path" | |
echo "Usage: bash vg_annotate.sh input.og input_gff3 MA_genotype=chr9_133258121-134258121" | |
infile=$1 | |
features=$2 | |
corrected_path=$3 | |
if [ -z $infile ]; | |
then | |
echo "Specify input odgi as argument 1" && exit 1 | |
else | |
echo "Input file: $infile" | |
fi | |
if [ -z $features ]; | |
then | |
echo "Specify input features as argument 2" && exit 1 | |
else | |
echo "Input file: $features" | |
fi | |
if [ -z $corrected_path ]; | |
then | |
echo "Specify corrected_path as argument 3" && exit 1 | |
else | |
echo "Input path: $corrected_path" | |
fi | |
#paths to containers | |
#vg='singularity exec image.sif vg ' | |
odgi='singularity exec image.sif odgi ' | |
############ | |
## odgi add annotation | |
########### | |
# Convert gene lines in the gff3 to bed, correcting coordinates. Can be more legible as an awk script. No gene annotation. | |
awk 'BEGIN {OFS = "\t";} $3 == "exon" {print $1, $4 - 1, $5, $9}' $features > genes.bed | |
# Correct path coords in the bed | |
awk -v path="$corrected_path" 'BEGIN {OFS = "\t";} {print path, $2, $3, $4}' genes.bed > genes_corrected.bed | |
echo "INFO: Reporting paths in odgi " | |
$odgi paths -L -i $infile | |
echo "INFO: Reporting 2 paths from each bed before and after modification - must match those in odgi to be displayed " | |
head -n 2 genes.bed | |
head -n 2 genes_corrected.bed | |
# inject the bed into the odgi | |
$odgi inject -b genes_corrected.bed -i $infile -o $infile.annot.og | |
## vg part - unused at present | |
############ | |
## Convert, simplify, reconvert to odgi - uses vg old protobuf format, which vg simplify probably needs, 1 warning, no errors | |
########### | |
# Convert odgi to gfa | |
#odgi convert or view, is this needed? Is gfa or odgi standard interchange format | |
# Convert gfa to vg | |
#$vg convert -v -g $infile > $infile.vg | |
# Simplify - remove SNPs, indels below 10bp | |
#$vg simplify -t 4 -m 10 $infile.vg > $infile.nosnarls.vg | |
# now convert back to gfa -> then to odgi | |
#$vg convert -f $infile.nosnarls.vg > $infile.nosnarls.gfa | |
#$odgi build -g $infile.nosnarls.gfa -o $infile.nosnarls.og | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
These are just ideas which did work, I'm not very satisfied with it. I think vg and odgi annotation techniques need to improve a lot.