Created
September 4, 2012 01:50
-
-
Save swuecho/3615677 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/perl | |
| use strict; | |
| use warnings; | |
| # the specification is https://dl.dropbox.com/u/40857893/cv/spTag_database_creation.docx | |
| # the input file is https://dl.dropbox.com/u/40857893/cv/Input.fasta | |
| #use Bio::SeqIO; fail to install the bioperl, so analysis the seq manually | |
| my %mass_table=( | |
| A => 71.03711, | |
| C => 156.10111, | |
| D => 114.04293, | |
| E => 115.02694, | |
| F => 103.00919, | |
| G => 129.04259, | |
| H => 128.05858, | |
| I => 57.02146, | |
| K => 137.05891, | |
| L => 113.08406, | |
| M => 113.08406, | |
| N => 128.09496, | |
| P => 131.04049, | |
| Q => 147.06841, | |
| R => 97.05276, | |
| S => 87.03203, | |
| T => 101.04768, | |
| V => 186.07931, | |
| W => 163.06333, | |
| Y => 99.06841, | |
| ); | |
| #digest seq->peps | |
| sub digest { | |
| my $seq = shift; | |
| $seq =~ s/R/R;/g; | |
| $seq =~ s/K/K;/g; | |
| split(/;/,$seq); | |
| } | |
| #mass pep->mass | |
| sub mass { | |
| my $pep = shift; | |
| my $mass_pep = 0; | |
| foreach my $amino (split //, $pep) { | |
| $mass_pep = $mass_pep + $mass_table{$amino}; | |
| } | |
| return $mass_pep; | |
| } | |
| ################################################## | |
| # seqs process begins here. | |
| # "input.fasta" should be the file to be processed | |
| ################################################## | |
| open INPUT, "<", "input.fasta" or die $!; | |
| open OUT1, ">", "outfile1.txt" or die $!; | |
| my @mass_freq = (); | |
| local $/ = '>'; | |
| while( <INPUT> ) { | |
| chomp; | |
| my ($desc,$seq) = split("\n",$_,2); | |
| if ($seq){ | |
| $seq =~ s[\n][]g; | |
| my @peps = digest($seq); | |
| foreach my $pep (sort {length($a) <=> length($b)} @peps) | |
| { | |
| my $mass_temp = mass($pep); | |
| if ($mass_temp >= 400 && $mass_temp <= 6000){ | |
| push(@mass_freq, $mass_temp); | |
| printf OUT1 ">Mass= %.2f | %s\n\n %s \n\n", $mass_temp,$desc,$pep; | |
| } | |
| } | |
| } | |
| } | |
| close INPUT; | |
| close OUT1; | |
| open OUT2, ">", "outfile2.txt" or die $!; | |
| # print freq table; | |
| my %freq; | |
| foreach my $mas (@mass_freq) { | |
| $freq{$mas}++; | |
| } | |
| print OUT2 "Mass Frequency\n"; | |
| foreach my $mas (sort {$a <=> $b} keys %freq) { | |
| printf OUT2 "%.2f %d\n",$mas,$freq{$mas}; | |
| } | |
| close OUT2; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment