Last active
November 19, 2015 16:10
-
-
Save nickpettican/a3d67481f09449cde173 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use diagnostics; | |
use strict; | |
use warnings; | |
use Getopt::Long; | |
# Defining bin size and parameters | |
print "Enter file name:\n"; | |
my $data = <>; | |
#my $AAfilterContains = <>; | |
print "Enter m/z minimum range limit:\n"; | |
my $massmin = <>; | |
print "Enter m/z maximum range limit:\n"; | |
my $massmax = <>; | |
print "Enter binwidth:\n"; | |
my $binwidth = <>; | |
my @sorted; | |
my $mzcolmatch; | |
handleopts(); | |
sub handleopts { | |
if (!GetOptions ( | |
"bin-width=i" => \$binwidth, | |
"file=s" => \$data, | |
"mass-min=i" => \$massmin, | |
"mass-max=i" => \$massmax, | |
# "aa-filter-contains=s" => \$AAfilterContains | |
)) { | |
print STDERR "Error, failed to obtain information from user\n"; | |
print "Help_page\n"; | |
exit(0); | |
} | |
} | |
open(IFILE, $data) or die "Error, could not open input file\n"; | |
my @lines = <IFILE>; | |
my @MZvalues; | |
my $AA; | |
my $line; | |
foreach my $line (@lines) { | |
# if ($AAfilterContains =~ /[a-zA-z]/) { | |
# my $MZvalue = processAA($line); | |
# } else { | |
my $MZvalue = processMZValue($line); | |
# } | |
} | |
sub processMZValue { | |
my $line = shift; | |
next if ($line =~ /^$/); | |
# this error-proofs the read so that at every loop it will ig$ | |
my @column = split( /\s+/, $line ); | |
# splits the column | |
my $mzcol = $column[2]; | |
if ( $mzcol =~ /(\d+)/ ) { | |
my $mzcolmatch = $1; | |
# capture m/z value for the current row in table | |
push @MZvalues, $mzcolmatch; | |
} | |
} | |
#Failed attempt at screening for peptides with certain AAs; | |
#sub processAA { | |
# my $line = shift; | |
# chomp($AAfilterContains); | |
# next if ($line =~ /^$/); | |
# my @AAcolumn = split( /\t|\n/, $line); | |
# my $AAfilcolumn = $AAcolumn[2]; | |
# if ( $AAfilcolumn =~ /$AAfilterContains/) { | |
# next if ($line =~ /^$/); | |
# my @column = split( /\t|\n/, $line ); | |
# my $mzcol = $column[3]; | |
# my $mzcolmatch = $1; | |
# print "$mzcolmatch\n" or die "Nope"; | |
# push @MZvalues, $mzcolmatch; | |
# } | |
#} | |
print "$data was the file used.\n The binwidth is $binwidth.\n"; | |
generateBins(); | |
sub generateBins { | |
my $i; | |
my @finalCount; | |
my $MZvalue; | |
my $binline; | |
my $binmin = $massmin; | |
my $binmax = $massmin + $binwidth; | |
my $num_bins = ($massmax - $massmin)/$binwidth; | |
my $bin_id = 0; # start at bin 0 | |
# start loop with $binmin = $massmin and $binmax = $massmin + $binwidth | |
for $MZvalue (@MZvalues) { | |
# my @binned_mz_values; | |
# SORT: | |
# if ($MZvalue >= $binmin and $MZvalue <= $binmax) { | |
# addToCurrentBin($mzValue); | |
# } else { | |
# while ($MZvalue > $binmax) { | |
# incrementBinBoundaries(); # increase $binmin and $binmax and return, exit if $binm$ | |
# goto SORT; | |
# } | |
if ($MZvalue >= $binmin and $MZvalue <= $massmax) { | |
push @sorted, (int(($MZvalue-$massmin)/$binwidth)); | |
} | |
} | |
my @sortSorted = sort @sorted; | |
for ($i = 0; $i < $num_bins; $i++) { | |
$finalCount[$i]=0; | |
foreach $binline (@sortSorted) { | |
if ($binline == $i) { | |
$finalCount[$i]++; | |
} | |
} | |
my $printA = $massmin + $binwidth * $i; | |
my $printB = $massmin + $binwidth * ($i+1); | |
print "BIN $printA\-$printB: $finalCount[$i]\n"; | |
} | |
} | |
# Failed attempt at sliding window approach | |
#sub addToCurrentBin { | |
# push @bin_id ($MZvalue); | |
# push @{sorted{}{}{}}, $MZvalue; | |
#} | |
#sub incrementBinBoundaries { | |
# if ($binmax > $massmax) { exit; } | |
# else { | |
# $binmin = $binmin + $binwidth; | |
# $binmax = $binmin + $binwidth; | |
# $bin_id++; | |
# $sorted{}->{"bin_id}++; | |
# } | |
#} | |
#foreach my $sortedBins (sort keys %sorted) { | |
# print scalar($sorted{}{}); | |
exit; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
package ProNTo::IonStats; | |
use diagnostics; | |
use strict; | |
use warnings; | |
use Getopt::Long; | |
use Exporter; | |
our @ISA = qw(Exporter); | |
# ################################# | |
# TBC : which variables to export ? | |
# ################################# | |
our @EXPORT_OK = qw(); | |
# Defining bin size and parameters | |
my $data = "table.tsv"; | |
my ($massmax, | |
$massmin, | |
$binwidth, | |
%sorted, | |
$AAfilterContains, | |
@mzdata, | |
@usage | |
); | |
BEGIN { | |
$data = "../../data/mztable.tsv"; | |
$binwidth = 10; | |
$massmin = 1000; | |
$massmax = 1500; | |
}; | |
__PACKAGE__->main(@ARGV) unless caller; | |
sub main { | |
handleopts(); | |
print "Bin width is $binwidth m/z; range is $massmin to $massmax m/z units.\n"; | |
@mzdata = reader(); | |
print "\nAll done\n"; | |
} | |
sub handleopts { | |
if (!GetOptions ( | |
"bin-max=i" => \$massmax, | |
"file=s" => \$data, | |
"bin-min=i" => \$massmin, | |
"bin-width=i" => \$binwidth)){ | |
print "Help_page\n"; # to be replaced by generic help subroutine | |
$usage = "Error, failed to obtain sufficient arguments to calculate ion statistics."; | |
die $usage; | |
} | |
} | |
sub reader { | |
open(IFILE, $data) or die "Error, could not open input file\n"; | |
my @lines = <IFILE>; | |
my @MZvalues; | |
my $AA; | |
my $line; | |
foreach my $line (@lines) { | |
# if ($AAfilterContains =~ /[a-zA-z]/) { | |
# my $MZvalue = processAA($line); | |
# } else { | |
my $MZvalue = processMZValue($line); | |
# } | |
} | |
} | |
sub processMZValue { | |
my $line = shift; | |
next if ($line =~ /^$/); | |
# this error-proofs the read so that at every loop it will ig$ | |
my @column = split( /\t|\n/, $line ); | |
# splits the column | |
my $mzcol = $column[3]; | |
if ( $mzcol =~ /(\d+)/ ) { | |
my $mzcolmatch = $1; | |
# capture m/z value for the current row in table | |
print "$mzcolmatch\n" or die "Nope"; | |
push @MZvalues, $mzcolmatch; | |
} | |
} | |
#sub processAA { | |
# my $line = shift; | |
# chomp($AAfilterContains); | |
# next if ($line =~ /^$/); | |
# my @AAcolumn = split( /\t|\n/, $line ); | |
# my $AAfilcolumn = $AAcolumn[2]; | |
# if ( $AAfilcolumn =~ /$AAfilterContains/) { | |
# next if ($line =~ /^$/); | |
# my @column = split( /\t|\n/, $line ); | |
# my $mzcol = $column[3]; | |
# my $mzcolmatch = $1; | |
# push @MZvalues, $mzcolmatch; | |
# } | |
#} | |
sub generateBins { | |
my $MZvalue; | |
my $binmin = $massmin; | |
my $binmax = $massmin + $binwidth; | |
my $num_bins = ($massmax - $massmin)/$binwidth; | |
my $bin_id = 0; # start at bin 0 | |
# start loop with $binmin = $massmin and $binmax = $massmin + $binwidth | |
for $MZvalue (@MZvalues) { | |
# my @binned_mz_values; | |
SORT: | |
if ($MZvalue >= $binmin and $MZvalue <= $binmax) { | |
addToCurrentBin($mzValue); | |
} else { | |
while ($MZvalue > $binmax) { | |
incrementBinBoundaries(); # increase $binmin and $binmax and return, exit if $binmax > $massmax | |
goto SORT; | |
} | |
} | |
} | |
sub addToCurrentBin { | |
# push @bin_id ($MZvalue); | |
push @{sorted{}{}{}}, $MZvalue; | |
} | |
sub incrementBinBoundaries { | |
if ($binmax > $massmax) { exit; } | |
else { | |
$binmin = $binmin + binwidth; | |
$binmax = $binmin + $binwidth; | |
$sorted{}->{"bin_id}++; | |
} | |
} | |
foreach my $sortedBins (sort keys %sorted) { | |
print scalar($sorted{}{}); | |
# End of module evaluates to true | |
1; | |
__END__ | |
# End of file evaluates to false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use diagnostics; | |
use strict; | |
use warnings; | |
use Getopt::Long; | |
# Defining bin size and parameters | |
my $data = "table.tsv"; | |
my $AAfilterContains = <>; | |
my $binmax = <>; | |
my $binmin = <>; | |
my $binsize = <>; | |
my @sorted; | |
my $mzcolmatch; | |
handleopts(); | |
sub handleopts { | |
if (!GetOptions ( | |
"bin-width=i" => \$binmax, | |
"file=s" => \$data, | |
"bin-value=i" => \$binmin, | |
"bin-size=i" => \$binsize, | |
"aa-filter-contains=s" => \$AAfilterContains)) { | |
print STDERR "Error, failed to obtain information from user\n"; | |
print "Help_page\n"; | |
exit(0); | |
} | |
} | |
open(IFILE, $data) or die "Error, could not open input file\n"; | |
my @lines = <IFILE>; | |
my @MZvalues; | |
my $AA; | |
my $line; | |
foreach my $line (@lines) { | |
# if ($AAfilterContains =~ /[a-zA-z]/) { | |
# my $MZvalue = processAA($line); | |
# } else { | |
my $MZvalue = processMZValue($line); | |
# } | |
} | |
sub processMZValue { | |
my $line = shift; | |
next if ($line =~ /^$/); # this error-proofs the read so that at every loop it will ig$ | |
my @column = split( /\t|\n/, $line ); # splits the column | |
my $mzcol = $column[3]; | |
if ( $mzcol =~ /(\d+)/ ) { | |
my $mzcolmatch = $1; | |
# capture m/z value for the current row in table | |
print "$mzcolmatch\n" or die "Nope"; | |
push @MZvalues, $mzcolmatch; | |
} | |
} | |
#sub processAA { | |
# my $line = shift; | |
# chomp($AAfilterContains); | |
# next if ($line =~ /^$/); | |
# my @AAcolumn = split( /\t|\n/, $line); | |
# my $AAfilcolumn = $AAcolumn[2]; | |
# if ( $AAfilcolumn =~ /$AAfilterContains/) { | |
# next if ($line =~ /^$/); | |
# my @column = split( /\t|\n/, $line ); | |
# my $mzcol = $column[3]; | |
# my $mzcolmatch = $1; | |
# print "$mzcolmatch\n" or die "Nope"; | |
# push @MZvalues, $mzcolmatch; | |
# } | |
#} | |
generateBins(); | |
sub generateBins { | |
my $MZvalue; | |
my $binmin = $massmin; | |
my $binmax = $massmin + $binwidth; | |
my $num_bins = ($massmax - $massmin)/$binwidth; | |
my $bin_id = 0; # start at bin 0 | |
# start loop with $binmin = $massmin and $binmax = $massmin + $binwidth | |
for $MZvalue (@MZvalues) { | |
# my @binned_mz_values; | |
# SORT: | |
# if ($MZvalue >= $binmin and $MZvalue <= $binmax) { | |
# addToCurrentBin($mzValue); | |
# } else { | |
# while ($MZvalue > $binmax) { | |
# incrementBinBoundaries(); # increase $binmin and $binmax and return, exit if $binmax > $massmax | |
# goto SORT; | |
# } | |
if ($MZvalue >= $binmin and $MZvalue <= massmax) { | |
push @sorted (int(($MZvalue-$massmin)/$binwidth)); | |
} | |
} | |
print "@sorted"; | |
} | |
#sub addToCurrentBin { | |
# push @bin_id ($MZvalue); | |
# push @{sorted{}{}{}}, $MZvalue; | |
} | |
#sub incrementBinBoundaries { | |
# if ($binmax > $massmax) { exit; } | |
# else { | |
# $binmin = $binmin + $binwidth; | |
# $binmax = $binmin + $binwidth; | |
# $bin_id++; | |
# $sorted{}->{"bin_id}++; | |
# } | |
#} | |
#foreach my $sortedBins (sort keys %sorted) { | |
# print scalar($sorted{}{}); | |
exit; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment