Skip to content

Instantly share code, notes, and snippets.

@nickpettican
Last active November 19, 2015 16:10
Show Gist options
  • Save nickpettican/a3d67481f09449cde173 to your computer and use it in GitHub Desktop.
Save nickpettican/a3d67481f09449cde173 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use diagnostics;
use strict;
use warnings;
use Getopt::Long;
# Defining bin size and parameters
print "Enter file name:\n";
my $data = <>;
#my $AAfilterContains = <>;
print "Enter m/z minimum range limit:\n";
my $massmin = <>;
print "Enter m/z maximum range limit:\n";
my $massmax = <>;
print "Enter binwidth:\n";
my $binwidth = <>;
my @sorted;
my $mzcolmatch;
handleopts();
sub handleopts {
if (!GetOptions (
"bin-width=i" => \$binwidth,
"file=s" => \$data,
"mass-min=i" => \$massmin,
"mass-max=i" => \$massmax,
# "aa-filter-contains=s" => \$AAfilterContains
)) {
print STDERR "Error, failed to obtain information from user\n";
print "Help_page\n";
exit(0);
}
}
open(IFILE, $data) or die "Error, could not open input file\n";
my @lines = <IFILE>;
my @MZvalues;
my $AA;
my $line;
foreach my $line (@lines) {
# if ($AAfilterContains =~ /[a-zA-z]/) {
# my $MZvalue = processAA($line);
# } else {
my $MZvalue = processMZValue($line);
# }
}
sub processMZValue {
my $line = shift;
next if ($line =~ /^$/);
# this error-proofs the read so that at every loop it will ig$
my @column = split( /\s+/, $line );
# splits the column
my $mzcol = $column[2];
if ( $mzcol =~ /(\d+)/ ) {
my $mzcolmatch = $1;
# capture m/z value for the current row in table
push @MZvalues, $mzcolmatch;
}
}
#Failed attempt at screening for peptides with certain AAs;
#sub processAA {
# my $line = shift;
# chomp($AAfilterContains);
# next if ($line =~ /^$/);
# my @AAcolumn = split( /\t|\n/, $line);
# my $AAfilcolumn = $AAcolumn[2];
# if ( $AAfilcolumn =~ /$AAfilterContains/) {
# next if ($line =~ /^$/);
# my @column = split( /\t|\n/, $line );
# my $mzcol = $column[3];
# my $mzcolmatch = $1;
# print "$mzcolmatch\n" or die "Nope";
# push @MZvalues, $mzcolmatch;
# }
#}
print "$data was the file used.\n The binwidth is $binwidth.\n";
generateBins();
sub generateBins {
my $i;
my @finalCount;
my $MZvalue;
my $binline;
my $binmin = $massmin;
my $binmax = $massmin + $binwidth;
my $num_bins = ($massmax - $massmin)/$binwidth;
my $bin_id = 0; # start at bin 0
# start loop with $binmin = $massmin and $binmax = $massmin + $binwidth
for $MZvalue (@MZvalues) {
# my @binned_mz_values;
# SORT:
# if ($MZvalue >= $binmin and $MZvalue <= $binmax) {
# addToCurrentBin($mzValue);
# } else {
# while ($MZvalue > $binmax) {
# incrementBinBoundaries(); # increase $binmin and $binmax and return, exit if $binm$
# goto SORT;
# }
if ($MZvalue >= $binmin and $MZvalue <= $massmax) {
push @sorted, (int(($MZvalue-$massmin)/$binwidth));
}
}
my @sortSorted = sort @sorted;
for ($i = 0; $i < $num_bins; $i++) {
$finalCount[$i]=0;
foreach $binline (@sortSorted) {
if ($binline == $i) {
$finalCount[$i]++;
}
}
my $printA = $massmin + $binwidth * $i;
my $printB = $massmin + $binwidth * ($i+1);
print "BIN $printA\-$printB: $finalCount[$i]\n";
}
}
# Failed attempt at sliding window approach
#sub addToCurrentBin {
# push @bin_id ($MZvalue);
# push @{sorted{}{}{}}, $MZvalue;
#}
#sub incrementBinBoundaries {
# if ($binmax > $massmax) { exit; }
# else {
# $binmin = $binmin + $binwidth;
# $binmax = $binmin + $binwidth;
# $bin_id++;
# $sorted{}->{"bin_id}++;
# }
#}
#foreach my $sortedBins (sort keys %sorted) {
# print scalar($sorted{}{});
exit;
#!/usr/bin/perl
package ProNTo::IonStats;
use diagnostics;
use strict;
use warnings;
use Getopt::Long;
use Exporter;
our @ISA = qw(Exporter);
# #################################
# TBC : which variables to export ?
# #################################
our @EXPORT_OK = qw();
# Defining bin size and parameters
my $data = "table.tsv";
my ($massmax,
$massmin,
$binwidth,
%sorted,
$AAfilterContains,
@mzdata,
@usage
);
BEGIN {
$data = "../../data/mztable.tsv";
$binwidth = 10;
$massmin = 1000;
$massmax = 1500;
};
__PACKAGE__->main(@ARGV) unless caller;
sub main {
handleopts();
print "Bin width is $binwidth m/z; range is $massmin to $massmax m/z units.\n";
@mzdata = reader();
print "\nAll done\n";
}
sub handleopts {
if (!GetOptions (
"bin-max=i" => \$massmax,
"file=s" => \$data,
"bin-min=i" => \$massmin,
"bin-width=i" => \$binwidth)){
print "Help_page\n"; # to be replaced by generic help subroutine
$usage = "Error, failed to obtain sufficient arguments to calculate ion statistics.";
die $usage;
}
}
sub reader {
open(IFILE, $data) or die "Error, could not open input file\n";
my @lines = <IFILE>;
my @MZvalues;
my $AA;
my $line;
foreach my $line (@lines) {
# if ($AAfilterContains =~ /[a-zA-z]/) {
# my $MZvalue = processAA($line);
# } else {
my $MZvalue = processMZValue($line);
# }
}
}
sub processMZValue {
my $line = shift;
next if ($line =~ /^$/);
# this error-proofs the read so that at every loop it will ig$
my @column = split( /\t|\n/, $line );
# splits the column
my $mzcol = $column[3];
if ( $mzcol =~ /(\d+)/ ) {
my $mzcolmatch = $1;
# capture m/z value for the current row in table
print "$mzcolmatch\n" or die "Nope";
push @MZvalues, $mzcolmatch;
}
}
#sub processAA {
# my $line = shift;
# chomp($AAfilterContains);
# next if ($line =~ /^$/);
# my @AAcolumn = split( /\t|\n/, $line );
# my $AAfilcolumn = $AAcolumn[2];
# if ( $AAfilcolumn =~ /$AAfilterContains/) {
# next if ($line =~ /^$/);
# my @column = split( /\t|\n/, $line );
# my $mzcol = $column[3];
# my $mzcolmatch = $1;
# push @MZvalues, $mzcolmatch;
# }
#}
sub generateBins {
my $MZvalue;
my $binmin = $massmin;
my $binmax = $massmin + $binwidth;
my $num_bins = ($massmax - $massmin)/$binwidth;
my $bin_id = 0; # start at bin 0
# start loop with $binmin = $massmin and $binmax = $massmin + $binwidth
for $MZvalue (@MZvalues) {
# my @binned_mz_values;
SORT:
if ($MZvalue >= $binmin and $MZvalue <= $binmax) {
addToCurrentBin($mzValue);
} else {
while ($MZvalue > $binmax) {
incrementBinBoundaries(); # increase $binmin and $binmax and return, exit if $binmax > $massmax
goto SORT;
}
}
}
sub addToCurrentBin {
# push @bin_id ($MZvalue);
push @{sorted{}{}{}}, $MZvalue;
}
sub incrementBinBoundaries {
if ($binmax > $massmax) { exit; }
else {
$binmin = $binmin + binwidth;
$binmax = $binmin + $binwidth;
$sorted{}->{"bin_id}++;
}
}
foreach my $sortedBins (sort keys %sorted) {
print scalar($sorted{}{});
# End of module evaluates to true
1;
__END__
# End of file evaluates to false
#!/usr/bin/perl
use diagnostics;
use strict;
use warnings;
use Getopt::Long;
# Defining bin size and parameters
my $data = "table.tsv";
my $AAfilterContains = <>;
my $binmax = <>;
my $binmin = <>;
my $binsize = <>;
my @sorted;
my $mzcolmatch;
handleopts();
sub handleopts {
if (!GetOptions (
"bin-width=i" => \$binmax,
"file=s" => \$data,
"bin-value=i" => \$binmin,
"bin-size=i" => \$binsize,
"aa-filter-contains=s" => \$AAfilterContains)) {
print STDERR "Error, failed to obtain information from user\n";
print "Help_page\n";
exit(0);
}
}
open(IFILE, $data) or die "Error, could not open input file\n";
my @lines = <IFILE>;
my @MZvalues;
my $AA;
my $line;
foreach my $line (@lines) {
# if ($AAfilterContains =~ /[a-zA-z]/) {
# my $MZvalue = processAA($line);
# } else {
my $MZvalue = processMZValue($line);
# }
}
sub processMZValue {
my $line = shift;
next if ($line =~ /^$/); # this error-proofs the read so that at every loop it will ig$
my @column = split( /\t|\n/, $line ); # splits the column
my $mzcol = $column[3];
if ( $mzcol =~ /(\d+)/ ) {
my $mzcolmatch = $1;
# capture m/z value for the current row in table
print "$mzcolmatch\n" or die "Nope";
push @MZvalues, $mzcolmatch;
}
}
#sub processAA {
# my $line = shift;
# chomp($AAfilterContains);
# next if ($line =~ /^$/);
# my @AAcolumn = split( /\t|\n/, $line);
# my $AAfilcolumn = $AAcolumn[2];
# if ( $AAfilcolumn =~ /$AAfilterContains/) {
# next if ($line =~ /^$/);
# my @column = split( /\t|\n/, $line );
# my $mzcol = $column[3];
# my $mzcolmatch = $1;
# print "$mzcolmatch\n" or die "Nope";
# push @MZvalues, $mzcolmatch;
# }
#}
generateBins();
sub generateBins {
my $MZvalue;
my $binmin = $massmin;
my $binmax = $massmin + $binwidth;
my $num_bins = ($massmax - $massmin)/$binwidth;
my $bin_id = 0; # start at bin 0
# start loop with $binmin = $massmin and $binmax = $massmin + $binwidth
for $MZvalue (@MZvalues) {
# my @binned_mz_values;
# SORT:
# if ($MZvalue >= $binmin and $MZvalue <= $binmax) {
# addToCurrentBin($mzValue);
# } else {
# while ($MZvalue > $binmax) {
# incrementBinBoundaries(); # increase $binmin and $binmax and return, exit if $binmax > $massmax
# goto SORT;
# }
if ($MZvalue >= $binmin and $MZvalue <= massmax) {
push @sorted (int(($MZvalue-$massmin)/$binwidth));
}
}
print "@sorted";
}
#sub addToCurrentBin {
# push @bin_id ($MZvalue);
# push @{sorted{}{}{}}, $MZvalue;
}
#sub incrementBinBoundaries {
# if ($binmax > $massmax) { exit; }
# else {
# $binmin = $binmin + $binwidth;
# $binmax = $binmin + $binwidth;
# $bin_id++;
# $sorted{}->{"bin_id}++;
# }
#}
#foreach my $sortedBins (sort keys %sorted) {
# print scalar($sorted{}{});
exit;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment