Skip to content

Instantly share code, notes, and snippets.

@standage
Created July 8, 2013 17:43
Show Gist options
  • Select an option

  • Save standage/5950872 to your computer and use it in GitHub Desktop.

Select an option

Save standage/5950872 to your computer and use it in GitHub Desktop.
Convert alignments in GeneSeqer (or GenomeThreader) output to GFF3 format.
#!/usr/bin/env perl
use strict;
my %alignments;
my @exon_scores;
my $align_score;
while(my $line = <STDIN>)
{
if($line =~ m/ Exon +\d+.+; score: (.+)/)
{
push(@exon_scores, $1);
}
elsif($line =~ m/MATCH\s+(\S+)\s+(\S+)\s+(\S+)/)
{
$align_score = $3;
}
elsif($line =~ m/(^PGS_(.+)([+-])_(.+)[+-])\s+\((.+)\)/)
{
my $alignid = $1;
my $gstrand = $3;
my $alignment =
{
"gseqid" => $2,
"gstrand" => $3,
"aseqid" => $4,
"score" => $align_score,
};
my @structure = split(/,/, $5);
@structure = reverse @structure if($gstrand eq "-");
@structure = map {
my @coords = split(/\s+/);
@coords = reverse @coords if($gstrand eq "-");
\@coords
} @structure;
if(scalar(@structure) != scalar(@exon_scores))
{
printf(STDERR "warning: %d exons, %d exon scores: %s\n",
scalar(@structure), scalar(@exon_scores), $alignid);
}
$alignment->{"structure"} = \@structure;
my $nexons = scalar(@structure);
$alignment->{"start"} = $structure[0]->[0];
$alignment->{"end"} = $structure[$nexons-1]->[1];
my @escores = @exon_scores;
@escores = reverse @escores if($gstrand eq "-");
$alignment->{"exon_scores"} = \@escores;
$alignments{ $alignid } = [] unless($alignments{ $alignid });
push(@{ $alignments{$alignid} }, $alignment);
@exon_scores = ();
$align_score = "";
}
elsif($line =~ m/^\$ date finished/ or $line =~ m/^\.{3} finished at/)
{
@exon_scores = ();
$align_score = "";
}
}
foreach my $alignid( sort(keys(%alignments)) )
{
my $align_list = $alignments{ $alignid };
my $nalignments = scalar(@$align_list);
my $count = 0;
foreach my $alignment(@$align_list)
{
$count++;
my @gff3 = ($alignment->{"gseqid"}, "GeneSeqer", "expressed_sequence_match",
$alignment->{"start"}, $alignment->{"end"},
$alignment->{"score"}, $alignment->{"gstrand"}, ".",
"ID=$alignid");
$gff3[8] .= ".$count" if($nalignments > 1);
print join("\t", @gff3), "\n";
foreach my $exon(@{$alignment->{"structure"}})
{
my $exon_score = shift(@{$alignment->{"exon_scores"}});
my @egff3 = ($alignment->{"gseqid"}, "GeneSeqer", "match_part",
$exon->[0], $exon->[1], $exon_score, $alignment->{"gstrand"},
".", "Parent=$alignid");
$egff3[8] .= ".$count" if($nalignments > 1);
print join("\t", @egff3), "\n";
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment