Skip to content

Instantly share code, notes, and snippets.

@nickpettican
Last active November 17, 2015 17:38
Show Gist options
  • Save nickpettican/e63b6acefc7df2e0f257 to your computer and use it in GitHub Desktop.
Save nickpettican/e63b6acefc7df2e0f257 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use diagnostics;
use strict;
use warnings;
use Getopt::Long;
# Defining bin size and parameters
my $data = "table.tsv";
my $AAfilterContains = <>;
my $AAfilterTerm = "None";
my $binmax = <>;
my $binmin = <>;
my $binsize = <>;
my @sorted;
handleopts();
sub handleopts {
if (!GetOptions (
"bin-max=i" => \$binmax,
"file=s" => \$data,
"bin-min=i" => \$binmin,
"bin-size=i" => \$binsize,
"aa-filter-contains=s" => \$AAfilterContains,
"aa-filter-term=s" => \$AAfilterTerm)){
print STDERR "Error, failed to obtain information from user\n";
print "Help_page\n";
exit(0);
}
}
open(IFILE, $data) or die "Error, could not open input file\n";
my @lines = <IFILE>;
my @MZvalues;
my $AA;
my $line;
my %hash;
foreach my $line (@lines) {
if ($AAfilterContains =~ /[a-zA-z]/) {
my $MZvalue = processAA($line);
} else {
my $MZvalue = processMZValue($line);
}
}
sub processMZValue {
my $line = shift;
next if ($line =~ /^$/); # this error-proofs the read so that at every loop it will ig$
my @column = split( /\t|\n/, $line ); # splits the column
my $mzcol = $column[3];
if ( $mzcol =~ /(\d+)/ ) {
my $mzcolmatch = $1;
# capture m/z value for the current row in table
print "$mzcolmatch\n" or die "Nope";
push @MZvalues, $mzcolmatch;
my $AA = $column[2];
$hash{$AA}->{"AAnum"}++;
push @{$hash{$AA}{'mass'}{'mzcol'}}, $mzcolmatch;
push @{$hash{$AA}{'mass'}{'aa'}}, $AA;
my $aakey;
# foreach my $aakey (sort keys %hash) {
print scalar($hash{$aakey}{'mass'})
# }
}
}
sub processAA {
my $line = shift;
chomp($AAfilterContains);
next if ($line =~ /^$/);
my @AAcolumn = split( /\t|\n/, $line );
my $AAfilcolumn = $AAcolumn[2];
if ( $AAfilcolumn =~ /$AAfilterContains/) {
my $AAmatch = $1;
print "Here's one\n";
}
}
exit;
#!/usr/bin/perl
package ProNTo::IonStats;
use diagnostics;
use strict;
use warnings;
use Getopt::Long;
our @ISA = qw(Exporter);
# Defining bin size and parameters
our @EXPORT_OK = qw();
my $data = "table.tsv";
my $AAfilterContains = <>;
my $AAfilterTerm = "None";
my $binmax = <>;
my $binmin = <>;
my $binsize = <>;
my @sortedmzvals;
my $usage;
handleopts();
sub handleopts {
if (!GetOptions (
"bin-max=i" => \$binmax,
"file=s" => \$data,
"bin-min=i" => \$binmin,
"bin-size=i" => \$binsize,
"aa-filter-contains=s" => \$AAfilterContains,
"aa-filter-term=s" => \$AAfilterTerm)){
print STDERR "Error, failed to obtain information from user\n";
print "Help_page\n";
exit(0);
}
}
open(IFILE, $data) or die "Error, could not open input file\n";
my @lines = <IFILE>;
my @MZvalues;
my %MZaavalues;
my $AA;
my $line;
my @array;
foreach my $line (@lines) {
if ($AAfilterContains =~ /[a-zA-z]/) {
my $MZvalue = processAA($line);
} else {
my $MZvalue = processMZValue($line);
}
}
sub processMZValue {
my $array = shift;
my $line = shift;
next if ($line =~ /^$/); # this error-proofs the read so that at every loop it will ig$
my @column = split( /\t|\n/, $line ); # splits the column
my $mzcol = $column[3];
for ( my $i = 0; $i < @lines; $i++) {$MZvalues[$array-1][$i] = $array[$i];}
# if ( $mzcol =~ /(\d+)/ ) {
# my $mzcolmatch = $1;
# capture m/z value for the current row in table
# print "$mzcolmatch\n" or die "Nope";
# push @MZvalues, $mzcolmatch;
# }
}
sub processAA {
my $line = shift;
chomp($AAfilterContains);
next if ($line =~ /^$/);
my @AAcolumn = split( /\t|\n/, $line );
my $AAfilcolumn = $AAcolumn[2];
if ( $AAfilcolumn =~ /$AAfilterContains/) {
my $mz = $AAcolumn[3];
my $AAmatch = $1;
$MZaavalues{$AAfilcolumn}->{"AA"}++;
push @{$MZaavalues{$AAfilcolumn}}, $mz;
print "Here's one\n";
}
}
exit;
#!/usr/bin/perl
use diagnostics;
use strict;
use warnings;
use Getopt::Long;
# Defining bin size and parameters
my $data = "table.tsv";
my $AAfilterContains = <>;
my $AAfilterTerm = "None";
my $binmax = <>;
my $binmin = <>;
my $binsize = <>;
my @sorted;
handleopts();
sub handleopts {
if (!GetOptions (
"bin-width=i" => \$binmax,
"file=s" => \$data,
"bin-value=i" => \$binmin,
"bin-size=i" => \$binsize,
"aa-filter-contains=s" => \$AAfilterContains,
"aa-filter-term=s" => \$AAfilterTerm)){
print STDERR "Error, failed to obtain information from user\n";
print "Help_page\n";
exit(0);
}
}
open(IFILE, $data) or die "Error, could not open input file\n";
my @lines = <IFILE>;
my @MZvalues;
my $AA;
my $line;
foreach my $line (@lines) {
if ($AAfilterContains =~ /[a-zA-z]/) {
my $MZvalue = processAA($line);
} else {
my $MZvalue = processMZValue($line);
}
}
sub processMZValue {
my $line = shift;
next if ($line =~ /^$/); # this error-proofs the read so that at every loop it will ig$
my @column = split( /\t|\n/, $line ); # splits the column
my $mzcol = $column[3];
if ( $mzcol =~ /(\d+)/ ) {
my $mzcolmatch = $1;
# capture m/z value for the current row in table
print "$mzcolmatch\n" or die "Nope";
push @MZvalues, $mzcolmatch;
}
}
sub processAA {
my $line = shift;
chomp($AAfilterContains);
next if ($line =~ /^$/);
my @AAcolumn = split( /\t|\n/, $line );
my $AAfilcolumn = $AAcolumn[2];
if ( $AAfilcolumn =~ /$AAfilterContains/) {
my $AAmatch = $1;
print "Here's one\n";
}
}
exit;
#!/usr/bin/perl
use diagnostics;
use strict;
use warnings;
use Getopt::Long;
# Defining bin size and parameters
my $data = "table.tsv";
my $AAfilterContains = <>;
my $AAfilterTerm = "None";
my $binmax = <>;
my $binmin = <>;
my $binsize = <>;
my @sorted;
handleopts();
sub handleopts {
if (!GetOptions (
"bin-width=i" => \$binmax,
"file=s" => \$data,
"bin-value=i" => \$binmin,
"bin-size=i" => \$binsize,
"aa-filter-contains=s" => \$AAfilterContains,
"aa-filter-term=s" => \$AAfilterTerm)){
print STDERR "Error, failed to obtain information from user\n";
print "Help_page\n";
exit(0);
}
}
open(IFILE, $data) or die "Error, could not open input file\n";
# READING THE DATA
my @lines = <IFILE>;
my @MZvalues;
my $AA = shift;
my $AAfilcolumn;
if ($AAfilterContains =~ /[a-zA-z]/) {
chomp($AAfilterContains);
foreach my $AA (@lines) {
next if ($AA =~ /^$/);
my @AAcolumn = split( /\t|\n/, $AA );
my $AAfilcolumn = $AAcolumn[2];
if ( $AAfilcolumn =~ /$AAfilterContains/) {
my $AAmatch = $1;
print "Here's one\n";
}
}
} else {
foreach my $line (@lines) {
my $MZvalue = processMzValue($line, $AAfilterTerm);
}
}
sub processMzValue {
# take the first parameter to the subroutine as a scalar variable, called line
my $line = shift;
if ($AAfilterTerm ne "None") {
# Specify the terminus and residues in the format:
# --aa-filter-term N:RKH
# to indicate N-terminal R, K, or H residues
# or
# --aa-filter-term C:WQD
# to indicate C-terminal W, Q, or D residues
my $terminus =~ /(^.):(.+)/;
# The above regular expression has 2 match groups:
# $1 is the terminus (C or N)
# $2 is one or more residues
processAA($2, $1);
}
next if ($line =~ /^$/); # this error-proofs the read so that at every loop it will ig$
my @column = split( /\t|\n/, $line ); # splits the column
my $mzcol = $column[3];
if ( $mzcol =~ /(\d+)/ ) {
my $mzcolmatch = $1;
# capture m/z value for the current row in table
print "$mzcolmatch\n" or die "Nope";
push @MZvalues, $mzcolmatch;
}
}
sub processAA {
# Filter for amino acid composition/terminal residue to generate stats
}
# Take values in @MZvalues and puts them into bins
@sorted = sort (@MZvalues);
print "\n@sorted\n";
print "\nAll done\n";
exit;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment