nickpettican · November 14, 2015 19:20
diff --git a/dejado.pl b/dejado.pl
 #!/usr/bin/perl -w
 use strict;
 use warnings;

 my $filename = 'dummy.pepmasses';
 my $from     = 0;
 my $to       = 0;
 print "What range of information do you want to know ?\n";
 {
    my $str = <STDIN>;
    chomp $str;
    ($from, $to) =  split /\.\./, $str;
 }

 # open file
 my $ifile_fh;
 open $ifile_fh, '<', $filename or
    die "Cannot open file $filename: $!\n";

 # skip header
 readline $ifile_fh;

 my $prepnumber = 0;
 my $summass = 0;
 while (defined (my $line = <$ifile_fh>) ) {
    # read each line in file
    chomp $line;
    my $mz_value = (split /\s+/, $line)[2];
    if ($mz_value >= $from and $mz_value <= $to) {
        # print matched line
        print "$line\n";
        ++$prepnumber;
                $summass = $mz_value+$summass;
    }
 }
 print "prepnumber=$prepnumber\n" if $prepnumber;
 print "average m/z number = $summass/$prepnumber\n";

 close $ifile_fh;
 exit;
diff --git a/Ion_Stats.pl b/Ion_Stats.pl
 #!/usr/bin/perl -w
 #
 BEGIN{
        use File::Basename;
        push @INC,
 use Getopt::Long
 #
 my $binwidth;
 my $bintarget;
 GetOptions ("binwidth" => \$binwidth,
        "bintarget" => \$bintarget);
 if (GetOptions("binwidth" => \$binwidth,
        "bintarget" => \$bintarget) {
                print "OK";}
 exit;
diff --git a/Ion_Stats02.pl b/Ion_Stats02.pl
 #!/usr/bin/perl -w
 #
 use strict;
 	use warnings;
    use Statistics::Descriptive::LogScale;

    my $stat = Statistics::Descriptive::LogScale->new (
        base => 1.01,
        linear_width => 0.001,
    );

 	while (<>) {
 		$stat->add_data($_) for /(-?\d+(?:\.\d*)?)/g;
 	};
    # add more data....

 	printf "   minimal value: %f\n", $stat->min;
    printf "%3uth percentile: %f\n", $_*10, $stat->percentile($_*10)
 		for 1..10;
diff --git a/Ion_Stats03.pl b/Ion_Stats03.pl
 #!/usr/bin/env perl

 # SUMMARY
 #
 # This is a script that demonstrates the very basic abilities
 # of Statistics::Secriptive::LogScale.
 #
 # It reads numbers from STDIN or input files
 # and adds them to statistical data pool
 # then prints a summary with mean, median, percentiles etc.

 use strict;
 use warnings;

 # We would to have some extra modules, but that's optional.
 my $can_size   = eval { require Devel::Size; 1; };
 my $can_getopt = eval { require Getopt::Long; 1; };

 # Always prefer local version of our module
 use FindBin qw($Bin);
 use lib "$Bin/../lib";
 use Statistics::Descriptive::LogScale;

 # These are our storage parameters
 my $base;
 my $linear;

 # Parse options, if absolutely needed
 my $want_options =  grep { qr/^-/ } @ARGV;

 if ( $want_options ) {
 	if (grep { $_ eq '--help' } @ARGV) {
 		print "Usage: $0 [--base <1+small o> --precision <nnn>]\n";
 		print "Read numbers from STDIN, output stat summary\n";
 		print "NOTE that specifying options requires Getopt::Long";
 		exit 1;
 	};
 	$can_getopt or die "Options given, but Getopt::Long not detected!";
 	Getopt::Long->import;
 	GetOptions (
 		'base=s' => \$base,
 		'precision=s' => \$linear,
 	);
 };

 # HERE WE GO
 # Initialize statistical storage
 my $stat = Statistics::Descriptive::LogScale->new(
 	base => $base, linear_width => $linear);

 # Read input
 # We want to catch scientific notation as well.
 my $re_num = qr/(?:[-+]?(?:\d+\.?\d*|\.\d+)(?:[Ee][-+]?\d+)?)/;
 while (<>) {
 	$stat->add_data(/($re_num)/g);
 };

 # Print the most basic statistics. These can be done precisely in O(1) memory.
 # See Statistics::Descriptive.
 printf "Count: %u\nAverage/std. deviation: %f +- %f\nRange: %f .. %f\n",
 	$stat->count, $stat->mean, $stat->standard_deviation,
 	$stat->min, $stat->max;

 # These two can be done in O(1) as well... But nobody does.
 printf "Skewness: %f; kurtosis: %f\n",
 	$stat->skewness, $stat->kurtosis;

 # The following requires storing data in memory

 # Trimmed mean is the average of data w/o outliers
 # in this case, 25% lowest and 25% highest numbers were discarded
 printf "Trimmed mean(0.25): %f\n",
 	$stat->trimmed_mean(0.25);

 # Print percentiles.
 # Xth percentile is the point below which X% or data lies.
 foreach (0.5, 1, 5, 10, 25, 50, 75, 90, 95, 99, 99.5) {
 	my $x = $stat->percentile($_);
 	$x = "-inf" unless defined $x;
 	printf "%4.1f%%: %f\n", $_, $x;
 };

 # Print how much memory we used (if possible)
 if ($can_size) {
 	print "Memory usage: ".Devel::Size::total_size($stat)."\n";
 };
diff --git a/Other_example.pl b/Other_example.pl
 # downloads Boston dataset from STATLIB
 use LWP::Simple;
 getstore(
 "http://lib.stat.cmu.edu/datasets/boston",
 "boston.raw" );
 # corrects for record spanning two lines
 open( IN, "boston.raw" );
 open( OUT, ">boston.asc" );
 do { $line = <IN> } until $. == 22
 or eof; # Skips the first 22 lines of header
 while ( $line = <IN> ) {
 chomp $line;
 $line .= <IN>; # joins two lines
 print OUT $line;
 }
 close(OUT);
 # sends data to R for regression analysis
 open( RPROG,
 "| c:/rw1081/bin/Rterm.exe --no-restore --no-save"
 );
 select(RPROG);
 print <<’CODE’;
 bost<-read.table("boston.asc",header=F)
 names(bost)<- c( "CRIM", "ZN", "INDUS",
 "CHAS", "NOX", "RM",
 "AGE", "DIS", "RAD",
 "TAX", "PTRAT", "B",
 "LSTAT", "MEDV")
 attach(bost)
 boston.fit <- lm(log(MEDV) ~ CRIM + ZN + INDUS +
 CHAS + I(NOX^2) + I(RM^2) + AGE + log(DIS) +
 log(RAD) + TAX + PTRAT + B + log(LSTAT))
 sum <- summary(boston.fit)$coe[,1:2]
 write.table(sum,"boston.out",quote = FALSE)
 q()
 CODE
 close(RPROG);
 # creates LaTeX table with regression results
 open( TABLE, "boston.out" );
 open( TEX, ">table.tex" );
 $prec = 3; # sets number of decimals
 $width = 9; # sets the width of the field
 do { <TABLE> } until $. == 1 or eof; # Skips the first line of header
 while (<TABLE>) {
 chomp;
 @line = split;
 printf TEX "%11s & %${width}.${prec}g & %${width}.${prec}g\\\\ \n", $line[0],
 $line[1], $line[2];
 }
 close(TABLE);
diff --git a/Project.pl b/Project.pl
 #!/usr/bin/perl -w
 use strict;
 use Getopt::Long;
 print "Please enter criteria for bin width\n";
 my $data = "data_file";
 my $binwidth = <>;
 print "Please enter criteria for bin target\n";
 my $bintarget = <>;
 GetOptions ("length=i" => \$binwidth,
        "file=s" => \$data,
        "binlength" => \$bintarget)
 or die("Error, could not run. Try again.\n");
 exit;
diff --git a/Template.pl b/Template.pl
 #!/usr/bin/perl
 
 # Program to extract ion intensities from a .peptSpectra.xml file
 # Copyright (C) 2005 Jacques Colinge
 
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 
 # Contact:
 #  Prof. Jacques Colinge
 #  Upper Austria University of Applied Sciences at Hagenberg
 #  Hauptstrasse 117
 #  A-4232 Hagenberg, Austria
 #  http://www.fhs-hagenberg.ac.at
 
 Show 31 lines of Pod
 
 BEGIN{
  use File::Basename;
  push @INC, (dirname $0).'/../../lib';
 }
 
 use strict;
 use XML::Parser;
 use Getopt::Long;
 use InSilicoSpectro;
 use InSilicoSpectro::InSilico::MassCalculator;
 use InSilicoSpectro::InSilico::Peptide;
 use InSilicoSpectro::Spectra::PeakDescriptor;
 use InSilicoSpectro::Spectra::ExpSpectrum;
 use InSilicoSpectro::InSilico::MSMSTheoSpectrum;
 use InSilicoSpectro::InSilico::MSMSOutput;
 
 my ($help, $peptFile, $imposedCharge, $relint, $rFormat);
 my $fragTypes = 'a,b,b-NH3,b-H2O,b++,y,y-NH3,y-H2O,y++';
 my $tol = 600;
 my $minTol = 0.2;
 my $intSel = 'order';
 my $matchSel = 'closest';
 my $withPlots;
 my $configFile;
 
 if (!GetOptions('help' => \$help,
                'h' => \$help,
                'configfile=s' => \$configFile,
                'pept=s' => \$peptFile,
                'charge=i' => \$imposedCharge,
                'tol=f' => \$tol,
                'mintol=f' => \$minTol,
                'intsel=s' => \$intSel,
                'matchsel=s' => \$matchSel,
                'withplots' => \$withPlots,
                'rformat' => \$rFormat,
                'frag=s' => \$fragTypes) || defined($help) || !defined($peptFile)){
  print STDERR "Usage: getIonIntensities.pl --pept=peptfile [options]
 --configFile=string xml configuration file for Masscalculator.pm
 --frag=string       list of fragment types separated by comas, default is [$fragTypes]
 --charge=int        only look at peptides at this charge state, default is no imposed charge
 --tol=float         relative mass error tolerance in ppm, default [$tol]
 --mintol=float      absolute mass error tolerance in Da, default [$minTol]
 --intsel=string     select the intensity normalization (original|log|order|relative)
 --matchsel=string   select the match algorithm (closest|greedy|mostintense), the order is given by frag in case of greedy
 -withplots          generates individual plots for all the matches (png format)
 -rformat            formats the output for R
 -help
 -h\n";
 print "'$peptFile'\n";
  exit(0);
 }
 
 if($configFile){
  InSilicoSpectro::init($configFile);
 }
 else{
  InSilicoSpectro::init();
 }
 
 
 my @fragTypes = split(/,/, $fragTypes);
 my %intensStat;
 my $nPlot = 0;
 open(F, $peptFile);
 my $parser = new XML::Parser(Style => 'Stream');
 $parser->parse(\*F);
 close(F);
 
 my $maxLen = 0;
 foreach my $frag (keys(%intensStat)){
  if ((@{$intensStat{$frag}} > $maxLen)){
    $maxLen = scalar(@{$intensStat{$frag}});
  }
 }
 if (defined($rFormat)){
  foreach my $frag (sort InSilicoSpectro::InSilico::MSMSOutput::cmpFragTypes (keys(%intensStat))){
    print "\"$frag\"\t";
  }
  print "\n";
  for (my $i = 1; $i <= $maxLen; $i++){
    print "\"$i\"";
    foreach my $frag (sort InSilicoSpectro::InSilico::MSMSOutput::cmpFragTypes (keys(%intensStat))){
      if (defined($intensStat{$frag}[$i-1])){
        printf "\t%.3f", $intensStat{$frag}[$i-1];
      }
      else{
        print "\tNA";
      }
    }
    print "\n";
  }
 }
 else{
  foreach my $frag (sort InSilicoSpectro::InSilico::MSMSOutput::cmpFragTypes (keys(%intensStat))){
    print $frag;
    foreach (@{$intensStat{$frag}}){
      print "\t$_";
    }
    print "\n";
  }
 }
 
 # ------------------------------------------------------------------------
 # XML parsing
 # ------------------------------------------------------------------------
 
 my ($curChar, $massIndex, $intensityIndex, $charge, $peptide, $modif, $peaks, $itemIndex);
 
 sub Text
 {
  $curChar .= $_;
 
 } # Text
 
 
 sub StartTag
 {
  my ($p, $el) = @_;
 
  if ($el eq 'ple:ItemOrder'){
    $itemIndex = 0;
  }
  elsif ($el eq 'ple:item'){
    if ($_{type} eq 'mass'){
      $massIndex = $itemIndex;
    }
    elsif (($_{type} eq 'intensity') || ($_{type} eq 'height')){
      $intensityIndex = $itemIndex;
    }
    $itemIndex++;
  }
 
  undef($curChar);
 
 } # StartTag
 
 
 sub EndTag
 {
  my($p, $el)= @_;
  if ($el eq 'idi:sequence'){
    $peptide = $curChar;
  }
  elsif ($el eq 'idi:modif'){
    $modif = $curChar;
  }
  elsif ($el eq 'idi:charge'){
    $charge = $curChar;
  }
  elsif ($el eq 'ple:peaks'){
    $peaks = $curChar;
  }
  elsif (($el eq 'idi:OneIdentification') && (!defined($imposedCharge) || ($charge == $imposedCharge))){
 
    # Builds the peak list
    my @lines = split(/\n/, $peaks);
    my (@peaks, $maxIntensity);
    foreach my $line (@lines){
      $line =~ s/\r//g;
      my @part = split(/\s+/, $line);
      if ($part[$massIndex] > 0.0){
        push(@peaks, [$part[$massIndex], $part[$intensityIndex]]);
        $maxIntensity = $part[$intensityIndex] if ($part[$intensityIndex] > $maxIntensity);
      }
    }
 
    if (defined($maxIntensity)){
      # Matches with theoretical masses
      my %spectrum;
      if ($matchSel eq 'closest'){
        matchSpectrumClosest(pept=>$peptide, modif=>$modif, spectrum=>\%spectrum, expSpectrum=>\@peaks, fragTypes=>\@fragTypes);
      }
      elsif ($matchSel eq 'greedy'){
        matchSpectrumGreedy(pept=>$peptide, modif=>$modif, spectrum=>\%spectrum, expSpectrum=>\@peaks, fragTypes=>\@fragTypes, order=>\@fragTypes, tol=>$tol);
      }
      elsif ($matchSel eq 'mostintense'){
        matchSpectrumGreedy(pept=>$peptide, modif=>$modif, spectrum=>\%spectrum, expSpectrum=>\@peaks, fragTypes=>\@fragTypes, tol=>$tol);
      }
      else{
        CORE::die("Unknown match type [$matchSel]");
      }
 
      if (defined($withPlots)){
        my $msms = new InSilicoSpectro::InSilico::MSMSOutput(spectrum=>\%spectrum, prec=>2, modifLvl=>1, expSpectrum=>\@peaks, intSel=>'order', tol=>$tol, minTol=>$minTol);
        $msms->plotSpectrumMatch(fname=>"$peptide-$$-$nPlot", format=>'png', fontChoice=>'default:Large', changeColModifAA=>1, legend=>'right', plotIntern=>1);
        $nPlot++;
      }
 
      # Normalizes intensities
      my %normInt;
      normalizeIntensities($intSel, \@peaks, \%normInt);
 
      # Extracts statistics
      my $len = length($peptide);
      foreach my $frag (keys(%{$spectrum{mass}{term}})){
        for (my $i = 0; $i < @{$spectrum{ionType}{$frag}}; $i++){
          for (my $j = $i*$len; $j < ($i+1)*$len; $j++){
            if (defined($spectrum{mass}{term}{$frag}[$j]) && defined($spectrum{match}{term}{$frag}[$j])){
              my $theoMass = $spectrum{mass}{term}{$frag}[$j];
              my $expMass = $spectrum{match}{term}{$frag}[$j][0];
              if ((abs($theoMass-$expMass)/($theoMass+$expMass)*2.0e6 <= $tol) || (abs($theoMass-$expMass) <= $minTol)){
                push(@{$intensStat{$spectrum{ionType}{$frag}[$i]}}, $normInt{$spectrum{match}{term}{$frag}[$j][0]});
              }
            }
          }
        }
      }
      foreach my $frag (keys(%{$spectrum{match}{intern}})){
        foreach my $aa (keys(%{$spectrum{match}{intern}{$frag}})){
          if (defined($spectrum{match}{intern}{$frag}{$aa})){
            my $theoMass = $spectrum{mass}{intern}{$frag}{$aa};
            my $expMass = $spectrum{match}{intern}{$frag}{$aa}[0];
            if ((abs($theoMass-$expMass)/($theoMass+$expMass)*2.0e6 <= $tol) || (abs($theoMass-$expMass) <= $minTol)){
              push(@{$intensStat{"$frag($aa)"}}, $normInt{$spectrum{match}{intern}{$frag}{$aa}[0]});
            }
          }
        }
      }
    }
  }
 
 } # EndTag
diff --git a/version02.pl b/version02.pl
 #!/usr/bin/perl -w
 #

 use strict;
 use Getopt::Long;

 # Defining bin size and parameters

 my $data =  "table.tsv";
 my $AAfilter = "None";
 print "Before handling options, the AA filter is: $AAfilter\n";
 handleopts();
 print "After handling options, the AA filter is: $AAfilter\n";

 print "Please enter criteria for bin width\n";
 my $binwidth = <>;
 print "Please enter criteria for bin length\n";
 my $binlength = <>;

 sub handleopts {
        if (!GetOptions (
                "binwidth=i" => \$binwidth,
                "file=s" => \$data,
                "binlength=i" => \$binlength,
                "aa-filter=s" => \$AAfilter)){
                print STDERR "Error, failed to obtain information from user\n";
                print "Help_page\n";
                exit(0);
        }
 }

 print "\n";

 # Opening the data file

 open(IFILE, $data) or die "Error, could not open input file\n";
 my @lines = <IFILE>;
 foreach my $line (@lines) {
        processMzValue($line, $AAfilter);
 }

 # Reading the data

 sub processMzValue {
        # take the first parameter to the subroutine as a scalar variable, called line
        my $line = shift;
        my $AAfilter = shift;
        if ($AAfilter ne "None") {
                # For now this is not implemented
                # Will filter for sequence column to generate stats

                print "$AAfilter";
        }
        next if ($line =~ /^$/);
        # this error-proofs the read so that at every loop it will ignore the spaces
      
        my @column = split( /\t|\n/, $line );
        # splits the column

        my $mzcol = $column[2];
        my ($mzcolmatch) = $mzcol =~ m#(\d+)#;
        if ( $mzcol =~ /(\d+)/ ) {
                my $mzcolmatch = $1;
                # capture m/z value for the current row in table

                print "$mzcolmatch\n" or die "Nope";
        }
 }

 print "\n\nAll done\n";
 exit;
	#!/usr/bin/perl -w
	use strict;
	use warnings;

	my $filename = 'dummy.pepmasses';
	my $from = 0;
	my $to = 0;
	print "What range of information do you want to know ?\n";
	{
	my $str = <STDIN>;
	chomp $str;
	($from, $to) = split /\.\./, $str;
	}

	# open file
	my $ifile_fh;
	open $ifile_fh, '<', $filename or
	die "Cannot open file $filename: $!\n";

	# skip header
	readline $ifile_fh;

	my $prepnumber = 0;
	my $summass = 0;
	while (defined (my $line = <$ifile_fh>) ) {
	# read each line in file
	chomp $line;
	my $mz_value = (split /\s+/, $line)[2];
	if ($mz_value >= $from and $mz_value <= $to) {
	# print matched line
	print "$line\n";
	++$prepnumber;
	$summass = $mz_value+$summass;
	}
	}
	print "prepnumber=$prepnumber\n" if $prepnumber;
	print "average m/z number = $summass/$prepnumber\n";

	close $ifile_fh;
	exit;
	#!/usr/bin/perl -w
	#
	BEGIN{
	use File::Basename;
	push @INC,
	use Getopt::Long
	#
	my $binwidth;
	my $bintarget;
	GetOptions ("binwidth" => \$binwidth,
	"bintarget" => \$bintarget);
	if (GetOptions("binwidth" => \$binwidth,
	"bintarget" => \$bintarget) {
	print "OK";}
	exit;
	#!/usr/bin/env perl

	# SUMMARY
	#
	# This is a script that demonstrates the very basic abilities
	# of Statistics::Secriptive::LogScale.
	#
	# It reads numbers from STDIN or input files
	# and adds them to statistical data pool
	# then prints a summary with mean, median, percentiles etc.

	use strict;
	use warnings;

	# We would to have some extra modules, but that's optional.
	my $can_size = eval { require Devel::Size; 1; };
	my $can_getopt = eval { require Getopt::Long; 1; };

	# Always prefer local version of our module
	use FindBin qw($Bin);
	use lib "$Bin/../lib";
	use Statistics::Descriptive::LogScale;

	# These are our storage parameters
	my $base;
	my $linear;

	# Parse options, if absolutely needed
	my $want_options = grep { qr/^-/ } @ARGV;

	if ( $want_options ) {
	if (grep { $_ eq '--help' } @ARGV) {
	print "Usage: $0 [--base <1+small o> --precision <nnn>]\n";
	print "Read numbers from STDIN, output stat summary\n";
	print "NOTE that specifying options requires Getopt::Long";
	exit 1;
	};
	$can_getopt or die "Options given, but Getopt::Long not detected!";
	Getopt::Long->import;
	GetOptions (
	'base=s' => \$base,
	'precision=s' => \$linear,
	);
	};

	# HERE WE GO
	# Initialize statistical storage
	my $stat = Statistics::Descriptive::LogScale->new(
	base => $base, linear_width => $linear);

	# Read input
	# We want to catch scientific notation as well.
	my $re_num = qr/(?:[-+]?(?:\d+\.?\d*\|\.\d+)(?:[Ee][-+]?\d+)?)/;
	while (<>) {
	$stat->add_data(/($re_num)/g);
	};

	# Print the most basic statistics. These can be done precisely in O(1) memory.
	# See Statistics::Descriptive.
	printf "Count: %u\nAverage/std. deviation: %f +- %f\nRange: %f .. %f\n",
	$stat->count, $stat->mean, $stat->standard_deviation,
	$stat->min, $stat->max;

	# These two can be done in O(1) as well... But nobody does.
	printf "Skewness: %f; kurtosis: %f\n",
	$stat->skewness, $stat->kurtosis;

	# The following requires storing data in memory

	# Trimmed mean is the average of data w/o outliers
	# in this case, 25% lowest and 25% highest numbers were discarded
	printf "Trimmed mean(0.25): %f\n",
	$stat->trimmed_mean(0.25);

	# Print percentiles.
	# Xth percentile is the point below which X% or data lies.
	foreach (0.5, 1, 5, 10, 25, 50, 75, 90, 95, 99, 99.5) {
	my $x = $stat->percentile($_);
	$x = "-inf" unless defined $x;
	printf "%4.1f%%: %f\n", $_, $x;
	};

	# Print how much memory we used (if possible)
	if ($can_size) {
	print "Memory usage: ".Devel::Size::total_size($stat)."\n";
	};
	# downloads Boston dataset from STATLIB
	use LWP::Simple;
	getstore(
	"http://lib.stat.cmu.edu/datasets/boston",
	"boston.raw" );
	# corrects for record spanning two lines
	open( IN, "boston.raw" );
	open( OUT, ">boston.asc" );
	do { $line = <IN> } until $. == 22
	or eof; # Skips the first 22 lines of header
	while ( $line = <IN> ) {
	chomp $line;
	$line .= <IN>; # joins two lines
	print OUT $line;
	}
	close(OUT);
	# sends data to R for regression analysis
	open( RPROG,
	"\| c:/rw1081/bin/Rterm.exe --no-restore --no-save"
	);
	select(RPROG);
	print <<’CODE’;
	bost<-read.table("boston.asc",header=F)
	names(bost)<- c( "CRIM", "ZN", "INDUS",
	"CHAS", "NOX", "RM",
	"AGE", "DIS", "RAD",
	"TAX", "PTRAT", "B",
	"LSTAT", "MEDV")
	attach(bost)
	boston.fit <- lm(log(MEDV) ~ CRIM + ZN + INDUS +
	CHAS + I(NOX^2) + I(RM^2) + AGE + log(DIS) +
	log(RAD) + TAX + PTRAT + B + log(LSTAT))
	sum <- summary(boston.fit)$coe[,1:2]
	write.table(sum,"boston.out",quote = FALSE)
	q()
	CODE
	close(RPROG);
	# creates LaTeX table with regression results
	open( TABLE, "boston.out" );
	open( TEX, ">table.tex" );
	$prec = 3; # sets number of decimals
	$width = 9; # sets the width of the field
	do { <TABLE> } until $. == 1 or eof; # Skips the first line of header
	while (<TABLE>) {
	chomp;
	@line = split;
	printf TEX "%11s & %${width}.${prec}g & %${width}.${prec}g\\\\ \n", $line[0],
	$line[1], $line[2];
	}
	close(TABLE);
	#!/usr/bin/perl -w
	use strict;
	use Getopt::Long;
	print "Please enter criteria for bin width\n";
	my $data = "data_file";
	my $binwidth = <>;
	print "Please enter criteria for bin target\n";
	my $bintarget = <>;
	GetOptions ("length=i" => \$binwidth,
	"file=s" => \$data,
	"binlength" => \$bintarget)
	or die("Error, could not run. Try again.\n");
	exit;
	#!/usr/bin/perl

	# Program to extract ion intensities from a .peptSpectra.xml file
	# Copyright (C) 2005 Jacques Colinge

	# This program is free software; you can redistribute it and/or
	# modify it under the terms of the GNU General Public License
	# as published by the Free Software Foundation; either version 2
	# of the License, or (at your option) any later version.

	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.

	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write to the Free Software
	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

	# Contact:
	# Prof. Jacques Colinge
	# Upper Austria University of Applied Sciences at Hagenberg
	# Hauptstrasse 117
	# A-4232 Hagenberg, Austria
	# http://www.fhs-hagenberg.ac.at

	Show 31 lines of Pod

	BEGIN{
	use File::Basename;
	push @INC, (dirname $0).'/../../lib';
	}

	use strict;
	use XML::Parser;
	use Getopt::Long;
	use InSilicoSpectro;
	use InSilicoSpectro::InSilico::MassCalculator;
	use InSilicoSpectro::InSilico::Peptide;
	use InSilicoSpectro::Spectra::PeakDescriptor;
	use InSilicoSpectro::Spectra::ExpSpectrum;
	use InSilicoSpectro::InSilico::MSMSTheoSpectrum;
	use InSilicoSpectro::InSilico::MSMSOutput;

	my ($help, $peptFile, $imposedCharge, $relint, $rFormat);
	my $fragTypes = 'a,b,b-NH3,b-H2O,b++,y,y-NH3,y-H2O,y++';
	my $tol = 600;
	my $minTol = 0.2;
	my $intSel = 'order';
	my $matchSel = 'closest';
	my $withPlots;
	my $configFile;

	if (!GetOptions('help' => \$help,
	'h' => \$help,
	'configfile=s' => \$configFile,
	'pept=s' => \$peptFile,
	'charge=i' => \$imposedCharge,
	'tol=f' => \$tol,
	'mintol=f' => \$minTol,
	'intsel=s' => \$intSel,
	'matchsel=s' => \$matchSel,
	'withplots' => \$withPlots,
	'rformat' => \$rFormat,
	'frag=s' => \$fragTypes) \|\| defined($help) \|\| !defined($peptFile)){
	print STDERR "Usage: getIonIntensities.pl --pept=peptfile [options]
	--configFile=string xml configuration file for Masscalculator.pm
	--frag=string list of fragment types separated by comas, default is [$fragTypes]
	--charge=int only look at peptides at this charge state, default is no imposed charge
	--tol=float relative mass error tolerance in ppm, default [$tol]
	--mintol=float absolute mass error tolerance in Da, default [$minTol]
	--intsel=string select the intensity normalization (original\|log\|order\|relative)
	--matchsel=string select the match algorithm (closest\|greedy\|mostintense), the order is given by frag in case of greedy
	-withplots generates individual plots for all the matches (png format)
	-rformat formats the output for R
	-help
	-h\n";
	print "'$peptFile'\n";
	exit(0);
	}

	if($configFile){
	InSilicoSpectro::init($configFile);
	}
	else{
	InSilicoSpectro::init();
	}


	my @fragTypes = split(/,/, $fragTypes);
	my %intensStat;
	my $nPlot = 0;
	open(F, $peptFile);
	my $parser = new XML::Parser(Style => 'Stream');
	$parser->parse(\*F);
	close(F);

	my $maxLen = 0;
	foreach my $frag (keys(%intensStat)){
	if ((@{$intensStat{$frag}} > $maxLen)){
	$maxLen = scalar(@{$intensStat{$frag}});
	}
	}
	if (defined($rFormat)){
	foreach my $frag (sort InSilicoSpectro::InSilico::MSMSOutput::cmpFragTypes (keys(%intensStat))){
	print "\"$frag\"\t";
	}
	print "\n";
	for (my $i = 1; $i <= $maxLen; $i++){
	print "\"$i\"";
	foreach my $frag (sort InSilicoSpectro::InSilico::MSMSOutput::cmpFragTypes (keys(%intensStat))){
	if (defined($intensStat{$frag}[$i-1])){
	printf "\t%.3f", $intensStat{$frag}[$i-1];
	}
	else{
	print "\tNA";
	}
	}
	print "\n";
	}
	}
	else{
	foreach my $frag (sort InSilicoSpectro::InSilico::MSMSOutput::cmpFragTypes (keys(%intensStat))){
	print $frag;
	foreach (@{$intensStat{$frag}}){
	print "\t$_";
	}
	print "\n";
	}
	}

	# ------------------------------------------------------------------------
	# XML parsing
	# ------------------------------------------------------------------------

	my ($curChar, $massIndex, $intensityIndex, $charge, $peptide, $modif, $peaks, $itemIndex);

	sub Text
	{
	$curChar .= $_;

	} # Text


	sub StartTag
	{
	my ($p, $el) = @_;

	if ($el eq 'ple:ItemOrder'){
	$itemIndex = 0;
	}
	elsif ($el eq 'ple:item'){
	if ($_{type} eq 'mass'){
	$massIndex = $itemIndex;
	}
	elsif (($_{type} eq 'intensity') \|\| ($_{type} eq 'height')){
	$intensityIndex = $itemIndex;
	}
	$itemIndex++;
	}

	undef($curChar);

	} # StartTag


	sub EndTag
	{
	my($p, $el)= @_;
	if ($el eq 'idi:sequence'){
	$peptide = $curChar;
	}
	elsif ($el eq 'idi:modif'){
	$modif = $curChar;
	}
	elsif ($el eq 'idi:charge'){
	$charge = $curChar;
	}
	elsif ($el eq 'ple:peaks'){
	$peaks = $curChar;
	}
	elsif (($el eq 'idi:OneIdentification') && (!defined($imposedCharge) \|\| ($charge == $imposedCharge))){

	# Builds the peak list
	my @lines = split(/\n/, $peaks);
	my (@peaks, $maxIntensity);
	foreach my $line (@lines){
	$line =~ s/\r//g;
	my @part = split(/\s+/, $line);
	if ($part[$massIndex] > 0.0){
	push(@peaks, [$part[$massIndex], $part[$intensityIndex]]);
	$maxIntensity = $part[$intensityIndex] if ($part[$intensityIndex] > $maxIntensity);
	}
	}

	if (defined($maxIntensity)){
	# Matches with theoretical masses
	my %spectrum;
	if ($matchSel eq 'closest'){
	matchSpectrumClosest(pept=>$peptide, modif=>$modif, spectrum=>\%spectrum, expSpectrum=>\@peaks, fragTypes=>\@fragTypes);
	}
	elsif ($matchSel eq 'greedy'){
	matchSpectrumGreedy(pept=>$peptide, modif=>$modif, spectrum=>\%spectrum, expSpectrum=>\@peaks, fragTypes=>\@fragTypes, order=>\@fragTypes, tol=>$tol);
	}
	elsif ($matchSel eq 'mostintense'){
	matchSpectrumGreedy(pept=>$peptide, modif=>$modif, spectrum=>\%spectrum, expSpectrum=>\@peaks, fragTypes=>\@fragTypes, tol=>$tol);
	}
	else{
	CORE::die("Unknown match type [$matchSel]");
	}

	if (defined($withPlots)){
	my $msms = new InSilicoSpectro::InSilico::MSMSOutput(spectrum=>\%spectrum, prec=>2, modifLvl=>1, expSpectrum=>\@peaks, intSel=>'order', tol=>$tol, minTol=>$minTol);
	$msms->plotSpectrumMatch(fname=>"$peptide-$$-$nPlot", format=>'png', fontChoice=>'default:Large', changeColModifAA=>1, legend=>'right', plotIntern=>1);
	$nPlot++;
	}

	# Normalizes intensities
	my %normInt;
	normalizeIntensities($intSel, \@peaks, \%normInt);

	# Extracts statistics
	my $len = length($peptide);
	foreach my $frag (keys(%{$spectrum{mass}{term}})){
	for (my $i = 0; $i < @{$spectrum{ionType}{$frag}}; $i++){
	for (my $j = $i$len; $j < ($i+1)$len; $j++){
	if (defined($spectrum{mass}{term}{$frag}[$j]) && defined($spectrum{match}{term}{$frag}[$j])){
	my $theoMass = $spectrum{mass}{term}{$frag}[$j];
	my $expMass = $spectrum{match}{term}{$frag}[$j][0];
	if ((abs($theoMass-$expMass)/($theoMass+$expMass)*2.0e6 <= $tol) \|\| (abs($theoMass-$expMass) <= $minTol)){
	push(@{$intensStat{$spectrum{ionType}{$frag}[$i]}}, $normInt{$spectrum{match}{term}{$frag}[$j][0]});
	}
	}
	}
	}
	}
	foreach my $frag (keys(%{$spectrum{match}{intern}})){
	foreach my $aa (keys(%{$spectrum{match}{intern}{$frag}})){
	if (defined($spectrum{match}{intern}{$frag}{$aa})){
	my $theoMass = $spectrum{mass}{intern}{$frag}{$aa};
	my $expMass = $spectrum{match}{intern}{$frag}{$aa}[0];
	if ((abs($theoMass-$expMass)/($theoMass+$expMass)*2.0e6 <= $tol) \|\| (abs($theoMass-$expMass) <= $minTol)){
	push(@{$intensStat{"$frag($aa)"}}, $normInt{$spectrum{match}{intern}{$frag}{$aa}[0]});
	}
	}
	}
	}
	}
	}

	} # EndTag