Last active
November 14, 2015 19:20
-
-
Save nickpettican/55cc6b05fa9ea186a0b3 to your computer and use it in GitHub Desktop.
Programming Project
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
use strict; | |
use warnings; | |
my $filename = 'dummy.pepmasses'; | |
my $from = 0; | |
my $to = 0; | |
print "What range of information do you want to know ?\n"; | |
{ | |
my $str = <STDIN>; | |
chomp $str; | |
($from, $to) = split /\.\./, $str; | |
} | |
# open file | |
my $ifile_fh; | |
open $ifile_fh, '<', $filename or | |
die "Cannot open file $filename: $!\n"; | |
# skip header | |
readline $ifile_fh; | |
my $prepnumber = 0; | |
my $summass = 0; | |
while (defined (my $line = <$ifile_fh>) ) { | |
# read each line in file | |
chomp $line; | |
my $mz_value = (split /\s+/, $line)[2]; | |
if ($mz_value >= $from and $mz_value <= $to) { | |
# print matched line | |
print "$line\n"; | |
++$prepnumber; | |
$summass = $mz_value+$summass; | |
} | |
} | |
print "prepnumber=$prepnumber\n" if $prepnumber; | |
print "average m/z number = $summass/$prepnumber\n"; | |
close $ifile_fh; | |
exit; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
# | |
BEGIN{ | |
use File::Basename; | |
push @INC, | |
use Getopt::Long | |
# | |
my $binwidth; | |
my $bintarget; | |
GetOptions ("binwidth" => \$binwidth, | |
"bintarget" => \$bintarget); | |
if (GetOptions("binwidth" => \$binwidth, | |
"bintarget" => \$bintarget) { | |
print "OK";} | |
exit; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
# | |
use strict; | |
use warnings; | |
use Statistics::Descriptive::LogScale; | |
my $stat = Statistics::Descriptive::LogScale->new ( | |
base => 1.01, | |
linear_width => 0.001, | |
); | |
while (<>) { | |
$stat->add_data($_) for /(-?\d+(?:\.\d*)?)/g; | |
}; | |
# add more data.... | |
printf " minimal value: %f\n", $stat->min; | |
printf "%3uth percentile: %f\n", $_*10, $stat->percentile($_*10) | |
for 1..10; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# SUMMARY | |
# | |
# This is a script that demonstrates the very basic abilities | |
# of Statistics::Secriptive::LogScale. | |
# | |
# It reads numbers from STDIN or input files | |
# and adds them to statistical data pool | |
# then prints a summary with mean, median, percentiles etc. | |
use strict; | |
use warnings; | |
# We would to have some extra modules, but that's optional. | |
my $can_size = eval { require Devel::Size; 1; }; | |
my $can_getopt = eval { require Getopt::Long; 1; }; | |
# Always prefer local version of our module | |
use FindBin qw($Bin); | |
use lib "$Bin/../lib"; | |
use Statistics::Descriptive::LogScale; | |
# These are our storage parameters | |
my $base; | |
my $linear; | |
# Parse options, if absolutely needed | |
my $want_options = grep { qr/^-/ } @ARGV; | |
if ( $want_options ) { | |
if (grep { $_ eq '--help' } @ARGV) { | |
print "Usage: $0 [--base <1+small o> --precision <nnn>]\n"; | |
print "Read numbers from STDIN, output stat summary\n"; | |
print "NOTE that specifying options requires Getopt::Long"; | |
exit 1; | |
}; | |
$can_getopt or die "Options given, but Getopt::Long not detected!"; | |
Getopt::Long->import; | |
GetOptions ( | |
'base=s' => \$base, | |
'precision=s' => \$linear, | |
); | |
}; | |
# HERE WE GO | |
# Initialize statistical storage | |
my $stat = Statistics::Descriptive::LogScale->new( | |
base => $base, linear_width => $linear); | |
# Read input | |
# We want to catch scientific notation as well. | |
my $re_num = qr/(?:[-+]?(?:\d+\.?\d*|\.\d+)(?:[Ee][-+]?\d+)?)/; | |
while (<>) { | |
$stat->add_data(/($re_num)/g); | |
}; | |
# Print the most basic statistics. These can be done precisely in O(1) memory. | |
# See Statistics::Descriptive. | |
printf "Count: %u\nAverage/std. deviation: %f +- %f\nRange: %f .. %f\n", | |
$stat->count, $stat->mean, $stat->standard_deviation, | |
$stat->min, $stat->max; | |
# These two can be done in O(1) as well... But nobody does. | |
printf "Skewness: %f; kurtosis: %f\n", | |
$stat->skewness, $stat->kurtosis; | |
# The following requires storing data in memory | |
# Trimmed mean is the average of data w/o outliers | |
# in this case, 25% lowest and 25% highest numbers were discarded | |
printf "Trimmed mean(0.25): %f\n", | |
$stat->trimmed_mean(0.25); | |
# Print percentiles. | |
# Xth percentile is the point below which X% or data lies. | |
foreach (0.5, 1, 5, 10, 25, 50, 75, 90, 95, 99, 99.5) { | |
my $x = $stat->percentile($_); | |
$x = "-inf" unless defined $x; | |
printf "%4.1f%%: %f\n", $_, $x; | |
}; | |
# Print how much memory we used (if possible) | |
if ($can_size) { | |
print "Memory usage: ".Devel::Size::total_size($stat)."\n"; | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# downloads Boston dataset from STATLIB | |
use LWP::Simple; | |
getstore( | |
"http://lib.stat.cmu.edu/datasets/boston", | |
"boston.raw" ); | |
# corrects for record spanning two lines | |
open( IN, "boston.raw" ); | |
open( OUT, ">boston.asc" ); | |
do { $line = <IN> } until $. == 22 | |
or eof; # Skips the first 22 lines of header | |
while ( $line = <IN> ) { | |
chomp $line; | |
$line .= <IN>; # joins two lines | |
print OUT $line; | |
} | |
close(OUT); | |
# sends data to R for regression analysis | |
open( RPROG, | |
"| c:/rw1081/bin/Rterm.exe --no-restore --no-save" | |
); | |
select(RPROG); | |
print <<’CODE’; | |
bost<-read.table("boston.asc",header=F) | |
names(bost)<- c( "CRIM", "ZN", "INDUS", | |
"CHAS", "NOX", "RM", | |
"AGE", "DIS", "RAD", | |
"TAX", "PTRAT", "B", | |
"LSTAT", "MEDV") | |
attach(bost) | |
boston.fit <- lm(log(MEDV) ~ CRIM + ZN + INDUS + | |
CHAS + I(NOX^2) + I(RM^2) + AGE + log(DIS) + | |
log(RAD) + TAX + PTRAT + B + log(LSTAT)) | |
sum <- summary(boston.fit)$coe[,1:2] | |
write.table(sum,"boston.out",quote = FALSE) | |
q() | |
CODE | |
close(RPROG); | |
# creates LaTeX table with regression results | |
open( TABLE, "boston.out" ); | |
open( TEX, ">table.tex" ); | |
$prec = 3; # sets number of decimals | |
$width = 9; # sets the width of the field | |
do { <TABLE> } until $. == 1 or eof; # Skips the first line of header | |
while (<TABLE>) { | |
chomp; | |
@line = split; | |
printf TEX "%11s & %${width}.${prec}g & %${width}.${prec}g\\\\ \n", $line[0], | |
$line[1], $line[2]; | |
} | |
close(TABLE); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
use strict; | |
use Getopt::Long; | |
print "Please enter criteria for bin width\n"; | |
my $data = "data_file"; | |
my $binwidth = <>; | |
print "Please enter criteria for bin target\n"; | |
my $bintarget = <>; | |
GetOptions ("length=i" => \$binwidth, | |
"file=s" => \$data, | |
"binlength" => \$bintarget) | |
or die("Error, could not run. Try again.\n"); | |
exit; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# Program to extract ion intensities from a .peptSpectra.xml file | |
# Copyright (C) 2005 Jacques Colinge | |
# This program is free software; you can redistribute it and/or | |
# modify it under the terms of the GNU General Public License | |
# as published by the Free Software Foundation; either version 2 | |
# of the License, or (at your option) any later version. | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# You should have received a copy of the GNU General Public License | |
# along with this program; if not, write to the Free Software | |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
# Contact: | |
# Prof. Jacques Colinge | |
# Upper Austria University of Applied Sciences at Hagenberg | |
# Hauptstrasse 117 | |
# A-4232 Hagenberg, Austria | |
# http://www.fhs-hagenberg.ac.at | |
Show 31 lines of Pod | |
BEGIN{ | |
use File::Basename; | |
push @INC, (dirname $0).'/../../lib'; | |
} | |
use strict; | |
use XML::Parser; | |
use Getopt::Long; | |
use InSilicoSpectro; | |
use InSilicoSpectro::InSilico::MassCalculator; | |
use InSilicoSpectro::InSilico::Peptide; | |
use InSilicoSpectro::Spectra::PeakDescriptor; | |
use InSilicoSpectro::Spectra::ExpSpectrum; | |
use InSilicoSpectro::InSilico::MSMSTheoSpectrum; | |
use InSilicoSpectro::InSilico::MSMSOutput; | |
my ($help, $peptFile, $imposedCharge, $relint, $rFormat); | |
my $fragTypes = 'a,b,b-NH3,b-H2O,b++,y,y-NH3,y-H2O,y++'; | |
my $tol = 600; | |
my $minTol = 0.2; | |
my $intSel = 'order'; | |
my $matchSel = 'closest'; | |
my $withPlots; | |
my $configFile; | |
if (!GetOptions('help' => \$help, | |
'h' => \$help, | |
'configfile=s' => \$configFile, | |
'pept=s' => \$peptFile, | |
'charge=i' => \$imposedCharge, | |
'tol=f' => \$tol, | |
'mintol=f' => \$minTol, | |
'intsel=s' => \$intSel, | |
'matchsel=s' => \$matchSel, | |
'withplots' => \$withPlots, | |
'rformat' => \$rFormat, | |
'frag=s' => \$fragTypes) || defined($help) || !defined($peptFile)){ | |
print STDERR "Usage: getIonIntensities.pl --pept=peptfile [options] | |
--configFile=string xml configuration file for Masscalculator.pm | |
--frag=string list of fragment types separated by comas, default is [$fragTypes] | |
--charge=int only look at peptides at this charge state, default is no imposed charge | |
--tol=float relative mass error tolerance in ppm, default [$tol] | |
--mintol=float absolute mass error tolerance in Da, default [$minTol] | |
--intsel=string select the intensity normalization (original|log|order|relative) | |
--matchsel=string select the match algorithm (closest|greedy|mostintense), the order is given by frag in case of greedy | |
-withplots generates individual plots for all the matches (png format) | |
-rformat formats the output for R | |
-help | |
-h\n"; | |
print "'$peptFile'\n"; | |
exit(0); | |
} | |
if($configFile){ | |
InSilicoSpectro::init($configFile); | |
} | |
else{ | |
InSilicoSpectro::init(); | |
} | |
my @fragTypes = split(/,/, $fragTypes); | |
my %intensStat; | |
my $nPlot = 0; | |
open(F, $peptFile); | |
my $parser = new XML::Parser(Style => 'Stream'); | |
$parser->parse(\*F); | |
close(F); | |
my $maxLen = 0; | |
foreach my $frag (keys(%intensStat)){ | |
if ((@{$intensStat{$frag}} > $maxLen)){ | |
$maxLen = scalar(@{$intensStat{$frag}}); | |
} | |
} | |
if (defined($rFormat)){ | |
foreach my $frag (sort InSilicoSpectro::InSilico::MSMSOutput::cmpFragTypes (keys(%intensStat))){ | |
print "\"$frag\"\t"; | |
} | |
print "\n"; | |
for (my $i = 1; $i <= $maxLen; $i++){ | |
print "\"$i\""; | |
foreach my $frag (sort InSilicoSpectro::InSilico::MSMSOutput::cmpFragTypes (keys(%intensStat))){ | |
if (defined($intensStat{$frag}[$i-1])){ | |
printf "\t%.3f", $intensStat{$frag}[$i-1]; | |
} | |
else{ | |
print "\tNA"; | |
} | |
} | |
print "\n"; | |
} | |
} | |
else{ | |
foreach my $frag (sort InSilicoSpectro::InSilico::MSMSOutput::cmpFragTypes (keys(%intensStat))){ | |
print $frag; | |
foreach (@{$intensStat{$frag}}){ | |
print "\t$_"; | |
} | |
print "\n"; | |
} | |
} | |
# ------------------------------------------------------------------------ | |
# XML parsing | |
# ------------------------------------------------------------------------ | |
my ($curChar, $massIndex, $intensityIndex, $charge, $peptide, $modif, $peaks, $itemIndex); | |
sub Text | |
{ | |
$curChar .= $_; | |
} # Text | |
sub StartTag | |
{ | |
my ($p, $el) = @_; | |
if ($el eq 'ple:ItemOrder'){ | |
$itemIndex = 0; | |
} | |
elsif ($el eq 'ple:item'){ | |
if ($_{type} eq 'mass'){ | |
$massIndex = $itemIndex; | |
} | |
elsif (($_{type} eq 'intensity') || ($_{type} eq 'height')){ | |
$intensityIndex = $itemIndex; | |
} | |
$itemIndex++; | |
} | |
undef($curChar); | |
} # StartTag | |
sub EndTag | |
{ | |
my($p, $el)= @_; | |
if ($el eq 'idi:sequence'){ | |
$peptide = $curChar; | |
} | |
elsif ($el eq 'idi:modif'){ | |
$modif = $curChar; | |
} | |
elsif ($el eq 'idi:charge'){ | |
$charge = $curChar; | |
} | |
elsif ($el eq 'ple:peaks'){ | |
$peaks = $curChar; | |
} | |
elsif (($el eq 'idi:OneIdentification') && (!defined($imposedCharge) || ($charge == $imposedCharge))){ | |
# Builds the peak list | |
my @lines = split(/\n/, $peaks); | |
my (@peaks, $maxIntensity); | |
foreach my $line (@lines){ | |
$line =~ s/\r//g; | |
my @part = split(/\s+/, $line); | |
if ($part[$massIndex] > 0.0){ | |
push(@peaks, [$part[$massIndex], $part[$intensityIndex]]); | |
$maxIntensity = $part[$intensityIndex] if ($part[$intensityIndex] > $maxIntensity); | |
} | |
} | |
if (defined($maxIntensity)){ | |
# Matches with theoretical masses | |
my %spectrum; | |
if ($matchSel eq 'closest'){ | |
matchSpectrumClosest(pept=>$peptide, modif=>$modif, spectrum=>\%spectrum, expSpectrum=>\@peaks, fragTypes=>\@fragTypes); | |
} | |
elsif ($matchSel eq 'greedy'){ | |
matchSpectrumGreedy(pept=>$peptide, modif=>$modif, spectrum=>\%spectrum, expSpectrum=>\@peaks, fragTypes=>\@fragTypes, order=>\@fragTypes, tol=>$tol); | |
} | |
elsif ($matchSel eq 'mostintense'){ | |
matchSpectrumGreedy(pept=>$peptide, modif=>$modif, spectrum=>\%spectrum, expSpectrum=>\@peaks, fragTypes=>\@fragTypes, tol=>$tol); | |
} | |
else{ | |
CORE::die("Unknown match type [$matchSel]"); | |
} | |
if (defined($withPlots)){ | |
my $msms = new InSilicoSpectro::InSilico::MSMSOutput(spectrum=>\%spectrum, prec=>2, modifLvl=>1, expSpectrum=>\@peaks, intSel=>'order', tol=>$tol, minTol=>$minTol); | |
$msms->plotSpectrumMatch(fname=>"$peptide-$$-$nPlot", format=>'png', fontChoice=>'default:Large', changeColModifAA=>1, legend=>'right', plotIntern=>1); | |
$nPlot++; | |
} | |
# Normalizes intensities | |
my %normInt; | |
normalizeIntensities($intSel, \@peaks, \%normInt); | |
# Extracts statistics | |
my $len = length($peptide); | |
foreach my $frag (keys(%{$spectrum{mass}{term}})){ | |
for (my $i = 0; $i < @{$spectrum{ionType}{$frag}}; $i++){ | |
for (my $j = $i*$len; $j < ($i+1)*$len; $j++){ | |
if (defined($spectrum{mass}{term}{$frag}[$j]) && defined($spectrum{match}{term}{$frag}[$j])){ | |
my $theoMass = $spectrum{mass}{term}{$frag}[$j]; | |
my $expMass = $spectrum{match}{term}{$frag}[$j][0]; | |
if ((abs($theoMass-$expMass)/($theoMass+$expMass)*2.0e6 <= $tol) || (abs($theoMass-$expMass) <= $minTol)){ | |
push(@{$intensStat{$spectrum{ionType}{$frag}[$i]}}, $normInt{$spectrum{match}{term}{$frag}[$j][0]}); | |
} | |
} | |
} | |
} | |
} | |
foreach my $frag (keys(%{$spectrum{match}{intern}})){ | |
foreach my $aa (keys(%{$spectrum{match}{intern}{$frag}})){ | |
if (defined($spectrum{match}{intern}{$frag}{$aa})){ | |
my $theoMass = $spectrum{mass}{intern}{$frag}{$aa}; | |
my $expMass = $spectrum{match}{intern}{$frag}{$aa}[0]; | |
if ((abs($theoMass-$expMass)/($theoMass+$expMass)*2.0e6 <= $tol) || (abs($theoMass-$expMass) <= $minTol)){ | |
push(@{$intensStat{"$frag($aa)"}}, $normInt{$spectrum{match}{intern}{$frag}{$aa}[0]}); | |
} | |
} | |
} | |
} | |
} | |
} | |
} # EndTag |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
# | |
use strict; | |
use Getopt::Long; | |
# Defining bin size and parameters | |
my $data = "table.tsv"; | |
my $AAfilter = "None"; | |
print "Before handling options, the AA filter is: $AAfilter\n"; | |
handleopts(); | |
print "After handling options, the AA filter is: $AAfilter\n"; | |
print "Please enter criteria for bin width\n"; | |
my $binwidth = <>; | |
print "Please enter criteria for bin length\n"; | |
my $binlength = <>; | |
sub handleopts { | |
if (!GetOptions ( | |
"binwidth=i" => \$binwidth, | |
"file=s" => \$data, | |
"binlength=i" => \$binlength, | |
"aa-filter=s" => \$AAfilter)){ | |
print STDERR "Error, failed to obtain information from user\n"; | |
print "Help_page\n"; | |
exit(0); | |
} | |
} | |
print "\n"; | |
# Opening the data file | |
open(IFILE, $data) or die "Error, could not open input file\n"; | |
my @lines = <IFILE>; | |
foreach my $line (@lines) { | |
processMzValue($line, $AAfilter); | |
} | |
# Reading the data | |
sub processMzValue { | |
# take the first parameter to the subroutine as a scalar variable, called line | |
my $line = shift; | |
my $AAfilter = shift; | |
if ($AAfilter ne "None") { | |
# For now this is not implemented | |
# Will filter for sequence column to generate stats | |
print "$AAfilter"; | |
} | |
next if ($line =~ /^$/); | |
# this error-proofs the read so that at every loop it will ignore the spaces | |
my @column = split( /\t|\n/, $line ); | |
# splits the column | |
my $mzcol = $column[2]; | |
my ($mzcolmatch) = $mzcol =~ m#(\d+)#; | |
if ( $mzcol =~ /(\d+)/ ) { | |
my $mzcolmatch = $1; | |
# capture m/z value for the current row in table | |
print "$mzcolmatch\n" or die "Nope"; | |
} | |
} | |
print "\n\nAll done\n"; | |
exit; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment