Created
December 10, 2011 17:56
-
-
Save ship561/1455761 to your computer and use it in GitHub Desktop.
find the longest orf in an aa seq.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
my $p = 'AAMAAT-ATAMAAAT-AT'; #example protein | |
my @l = find_orfs($p); #calls subfunction | |
my ($longest, $longest_aa) = longest_orf($p, @l); #calls subfunction | |
print "protein = $longest_aa longest = $longest\n"; | |
sub find_orfs ($protein) { | |
my ($protein) = @_; | |
print "protein $protein\n"; | |
my $len = length ($protein); | |
my $aa; | |
my $new_aa; | |
for( $i=0; $i < $len; $i++) { | |
$aa = substr($protein, $i, 1); #look at single aa | |
if ($aa eq 'M') { #start aa | |
for( $j=$i+1; $j < $len; $j++) { | |
$new_aa = substr($protein, $j, 1); #searches after the start aa | |
if($new_aa eq '-') { | |
$locations[$i][$j] = $j - $i; #records length of orf found | |
last; | |
} else { | |
$locations[$i][$j] = 0; | |
} | |
} | |
} | |
} | |
return (@locations); | |
} | |
sub longest_orf { #find longest orf in list of orfs | |
my ($protein, @locations) = @_; | |
my $longest = 0; | |
my $longest_aa = ''; | |
my $len = length($protein); | |
for( $i=0; $i < $len; $i++) { | |
for( $j=0; $j< $len; $j++) { | |
if ($longest < $locations[$i][$j]) { #current longest orf is longer than longest orf so far | |
$longest = $locations[$i][$j]; #records length | |
$longest_aa = substr($protein, $i, $longest); #records aa | |
} | |
} | |
} | |
return ($longest, $longest_aa); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
updated to be functions instead of 1 giant script