Created
October 10, 2012 20:21
-
-
Save sashaphanes/3868174 to your computer and use it in GitHub Desktop.
work in progress...
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
#use warnings; | |
require "getopts.pl"; | |
use vars qw ($opt_f $opt_s $opt_h $opt_H); | |
&Getopts('f:s:hH:'); | |
my $executable = "transposer.pl"; | |
$| = 1; | |
my $usage = qq{ | |
################################################################### | |
usage: $executable -f fileName | |
used for transposing a table (rows to columns and columns to rows) | |
# -f the file name; will use stdin if specified as "stdin". | |
# | |
# -s column separator | |
# [default: tab] | |
# C: comma | |
# S: space | |
# | |
# -hH print this | |
example: >$executable -f testInputFile -s S > output | |
#################################################################### | |
\n}; | |
my ($inputFile, $separator, $FILE); | |
# options: | |
if (length($opt_h) > 0 || length($opt_H) > 0 ) {die($usage);} | |
if (-s $opt_f) {$inputFile = $opt_f; } | |
elsif ($opt_f =~ /stdin/i) {$inputFile = "stdin"; } | |
else {die("Error: must have a input file for -f $opt_f !!!\n$usage"); } | |
#if ($opt_s =~ /^C/i ) {$separator = "comma"; }elsif ($opt_s =~ /^S/i) {$separator = "space"; } else {$separator = "tab"; } | |
#interpretation of user input for delimiters (comma, space, or tab[default]) | |
if ($opt_s =~ /^C/i ) {$separator = ","; }elsif ($opt_s =~ /^S/i) {$separator = " "; } else {$separator = "\t"; } | |
#establish scalar variables for lines, columns, etc. Columns are stored as an array because they will be printed | |
#as lines in the final product. A hash of data is stored so that column numbers act as keys for line numbers. | |
my ($line, $lineNo, $colNo, $colData, @cols, %data); | |
#The || or option here produces an error when the user does not have appropriate permissions to open the file. | |
if ($inputFile =~ /\.gz$/i) { | |
open ($FILE, "zcat $inputFile |" ) || die ("Error:cannot open the input file $inputFile specified by -f !!!"); | |
}elsif ($inputFile eq "stdin") { | |
$FILE = \*STDIN; | |
}else { | |
open ($FILE, $inputFile) || die ("Error:cannot open the input file $inputFile specified by -f !!!"); | |
} | |
#This loop determines columns by using a separator given by the user, line by line. | |
while(<$FILE>) { | |
chomp; #This removes the newline from analysis | |
$line = $_; #Store the temporary value of $_ to $line | |
$lineNo++; #Analyze each line in ascending order (as in 0,1,2...n) | |
#The following separates strings found in each line and | |
#transfers these scalars in the array called @cols (columns). | |
@cols = split (/$separator/, $line); | |
$colNo = 0; #start from colNo0 | |
#This loop ties together data from $colNo and $lineNo into the %data hash. Because they are switched upon | |
#assignment to $colData, the rows will become columns. | |
foreach $colData (@cols) { #perform operation on each $colData found in the array @cols | |
$colNo++; #go up each value starting from 0 (as specified above) | |
$data{$colNo}{$lineNo} = $colData; | |
} | |
} | |
close ($FILE); | |
#$transposedLine gets concatenated via ".=" to the tab-separated content of $colData. | |
my $transposedLine; | |
foreach $colNo (sort {$a<=>$b} keys %data) { # $a<=>$b specifies that data is sorted numerically | |
$transposedLine = ""; #establishes an empty string for $transposedLine | |
#This loop sorts the data stored in $colNo numerically. | |
foreach $lineNo (sort {$a<=>$b} keys %{$data{$colNo} } ) { #again data is sorted from smallest to largest according to the hash of %data | |
$colData = $data{$colNo}{$lineNo}; #i becomes j, j becomes i | |
$transposedLine .= "$colData\t"; #concatenates the empty string to tab-separated $colData | |
} | |
$transposedLine =~ s/\t$//; # Tabs found at the end of a line are replaced with nothing. | |
print "$transposedLine\n"; #print the final product | |
} | |
#Now we exit! | |
exit; | |
#Okay, so the goal is to merge two tables together, and give users the option to merge everything or just the | |
#rows/columns that match between the two tables. | |
#To open and cat two files: | |
#@ARGV = ('file2.txt', 'file4.txt', 'file15.txt'); | |
#open SEL, '>', 'selected.txt' or die $!; | |
#while (<>) { | |
# print SEL; | |
#} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment