Last active
August 29, 2015 14:01
-
-
Save dbolser/dc76ac3de2f56e7472f5 to your computer and use it in GitHub Desktop.
Convert intron-style GFF to exon-style GFF with Perl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! perl | |
use strict; | |
use warnings; | |
die "pass an intron-style GFF for me to calcualte exons over\n" | |
unless @ARGV; | |
## Tracking variables | |
my $prev_type = 'dumped'; | |
my $prev_beg = -1; | |
my $prev_end = -1; | |
## Variables used for formatting only | |
my $prev_seq; | |
my $prev_ori; | |
my $prev_parent; | |
while(<>){ | |
if (/^#/){ | |
print unless /^###$/; | |
next; | |
} | |
my ($seq, $meth, $type, $beg, $end, $score, $ori, $phase, $nine) | |
= split /\t/; | |
## Time to dump any pending exonic regions and reset our counters... | |
if($type eq 'gene' || | |
$type eq 'mRNA' || | |
$type eq 'intron'){ | |
if ($prev_type ne 'dumped'){ | |
print join("\t", $prev_seq, "AUGUSTUS", "exon", | |
$prev_beg, $prev_end, '.', $prev_ori, '.', | |
"Parent=$prev_parent"), "\n"; | |
$prev_type = 'dumped'; | |
$prev_beg = -1; | |
$prev_end = -1; | |
} | |
## Grab some book keeping stuff... | |
if ($type eq 'mRNA'){ | |
$prev_seq = $seq; | |
$prev_ori = $ori; | |
die unless /\tID=(\S+?);/; | |
$prev_parent = $1; | |
} | |
} | |
## One exception is when a UTR follows a UTR. This handy GFF | |
## decides not to put an intron here... just for consistency! | |
elsif (($type eq 'five_prime_UTR' || | |
$type eq 'three_prime_UTR') && $type eq $prev_type){ | |
print join("\t", $prev_seq, "AUGUSTUS", "exon", | |
$prev_beg, $prev_end, '.', $prev_ori, '.', | |
"Parent=$prev_parent"), "\n"; | |
$prev_type = $type; | |
$prev_beg = $beg; | |
$prev_end = $end; | |
} | |
## Er.. back where we were... | |
elsif ($type eq 'five_prime_UTR' || | |
$type eq 'three_prime_UTR' || | |
$type eq 'CDS'){ | |
## We don't dump an exon here! | |
## We just set the start position if needed | |
$prev_beg = $beg if $prev_type eq 'dumped'; | |
## and increment the end | |
$prev_end = $end; | |
## and record this to handle the exception above... | |
$prev_type = $type; | |
} | |
## We don't care about any other types | |
print; | |
} | |
## Finishing touches: | |
if ($prev_type ne 'dumped'){ | |
print join("\t", $prev_seq, "AUGUSTUS", "exon", | |
$prev_beg, $prev_end, '.', $prev_ori, '.', | |
"Parent=$prev_parent"), "\n"; | |
$prev_type = 'dumped'; | |
} | |
warn "OK\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment