sashaphanes · October 11, 2012 17:39
diff --git a/merge.basedon.columns.pl b/merge.basedon.columns.pl
 #!/usr/bin/perl

 use strict;
 require "getopts.pl";

 use vars qw ($opt_f $opt_p $opt_q $opt_s $opt_t $opt_l $opt_m $opt_F $opt_Q $opt_L $opt_M $opt_c $opt_o $opt_h $opt_H);

 &Getopts('f:p:q:l:s:t:m:F:Q:L:M:c:o:hH:');

 my $executable = "merge.basedOn.column.pl";

 $| = 1;

 my $usage = qq{
 	
  ###################################################################
  usage:  $executable -f file1 -q file2 -l file1_column -m file2_column [-o joining_option] > output
      
      (used for merge two flat files like in a database) 
      
      # -f first file (tab separator for each column); use stdin if the input file not specified
      # -p a list file with multiple first files (-f). Each file in one line; all must be in the same format 
      #
      # -q second file (tab separator for each column)
      #
      # -l N:the joining column number of the first file
      #    if multiple columns use like 1,2,3
      #
      # -L ignore char in columns specified in -l:
      #    if specified, such as "chr", it will be ignored in the combined string from -l
      #    and not affect the joining
      #
      # -m N:the joining column number of the second file 
      #    if multiple columns use like 1,2,3
      #
      # -M ignore char in columns specified in -M:
      #    if specified, such as "chr", it will be ignored in the combined string from -m
      #    and not affect the joining
      #    
      # -F the row number of file header in -f [default: 1] (0: no header)
      # -Q the row number of file header in -q [default: 1] (0: no header)
      #
      # -c case senstive for the joining data
      #    Y: yes [default]
      #    N: no
      #
      # -o joining option:
      #    L: left joining: all first file and matched second file
      #    default (not specified): equal joining: only matched for both
      #    N: left joining, but exclude those records matched in second file    
      #
      # -s field separator:
      #    default: <tab>
      #    S: space+
      #    C: comma
      #
      # -t field separator for file2 [default as in -s]
      #    T: tab
      #    S: space+
      #    C: comma
      #
      # -hH print this 
      
  ####################################################################
 	  \n};


 my ($file1, $file1ListName, %file1List, $file2, $file1Col, $file2Col, $joinOpt, $separator, $file2Separator, $caseSenstive, $file1HeaderRow,$file2HeaderRow, $ignoreL, $ignoreM, $splitPattern,  $split2Pattern);
 # options:
 if (length($opt_h) > 0 || length($opt_H) > 0 ) {die($usage);}   
 if (-s $opt_f or -s $opt_p) {
    $file1List{$opt_f} = 1 if ($opt_f);
    if (-s $opt_p) {
 	$file1ListName = $opt_p;
 	if ($file1ListName =~ /\.gz$/i) {
 	    open (LIST, "zcat $file1ListName |") ||die ("Cant open $file1ListName!\n");
 	}else {
 	    open (LIST, "$file1ListName") ||die ("Cant open $file1ListName!\n");
 	}
 	while (<LIST>) {
 	    chomp; 
 	    $file1 = $_;
 	    $file1List{$file1} = 1 if (-s $file1);
 	}
 	close (LIST);
    }
 } else {
    #die($usage);
    $file1List{"stdin"} = 1;
 }
 if (length($opt_q) > 0) {$file2 = $opt_q;} else {die($usage);}
 #if (length($opt_l) > 0) {$file1Col = $opt_l - 1;} else {die($usage);}
 #if (length($opt_m) > 0) {$file2Col = $opt_m - 1;} else {die($usage);}
 if (length($opt_l) > 0) {$file1Col = $opt_l ;} else {die($usage);}
 if (length($opt_m) > 0) {$file2Col = $opt_m ;} else {die($usage);}
 if (length($opt_F) > 0 and $opt_F >= 0 ) {$file1HeaderRow = $opt_F; } else {$file1HeaderRow = 1; }
 if (length($opt_Q) > 0 and $opt_Q >= 0 ) {$file2HeaderRow = $opt_Q; } else {$file2HeaderRow = 1; }
 if (length($opt_L) > 0 ) {$ignoreL = $opt_L; }
 if (length($opt_M) > 0 ) {$ignoreM = $opt_M; }


 if (length($opt_o) > 0) {$joinOpt = $opt_o;}
 if (length($opt_s) > 0 and $opt_s =~ /^s/i) {
    $separator ="space";
    $splitPattern ="\\s\+";
 }elsif ($opt_s =~ /^C/i) {
    $separator ="comma";
    $splitPattern ="\\,";
 }else {
    $separator ="tab";
    $splitPattern ="\\t";
 }
 if ($opt_t =~ /^S/i) {
    $file2Separator = "space";
    $split2Pattern ="\\s\+";
 }elsif ($opt_t =~ /^C/i ) {
    $file2Separator = "comma";
    $split2Pattern ="\\,";
 }elsif ($opt_t =~ /^T/i ) {
    $file2Separator = "tab";
    $split2Pattern ="\\t";
 }else {
    $file2Separator = $separator;
    $split2Pattern = $splitPattern;
 }

 if ($opt_c =~ /^N/i) {$caseSenstive = 0; } else {$caseSenstive = 1; }

    
 my $zeroPlus = "0plus";
 my $lineCount2 = 0;
 my $lineCount1 = 0;

 my ($line);
 my (@columns1,  $tempCount);

 if ($file2 =~ /\.gz$/i) {
    open (FILE2, "zcat $file2 |") ||die ("Cant open $file2!\n");
 }else {
    open (FILE2, "$file2") ||die ("Cant open $file2!\n");
 }
 my (%records2, @columns2, $maxCol2, $colNo2);
 my ($key1, $key2, $colNm, $colAI, @file1Cols, @file2Cols,@file1ColAIs, @file2ColAIs, $file1ColsNm, $file2ColsNm);
 @file1Cols = split (/\,/, $file1Col);
 @file2Cols = split (/\,/, $file2Col);
 $file1ColsNm = @file1Cols;
 $file2ColsNm = @file2Cols;
 unless ($file1ColsNm == $file2ColsNm) {
    die("Error: need same total column numbers specified in -l and -m\n $usage");
 }

 foreach $colNm (@file1Cols ) {
    die("Error: column numbers must >0 in -m\n $usage") if ($colNm <1);
    $colAI = $colNm - 1;
    push (@file1ColAIs, $colAI);
 }

 foreach $colNm (@file2Cols ) {
    die("Error: column numbers must >0 in -m\n $usage") if ($colNm <1);
    $colAI = $colNm - 1;
    push (@file2ColAIs, $colAI);
 }	

 my ($file1Header, $file2Header);
 if ($file2HeaderRow > 0 ) {
    for ($lineCount2 = 1; $lineCount2 <= $file2HeaderRow; $lineCount2++) {
 	$file2Header = <FILE2>;	    
    }
    chomp ($file2Header);
 }

 if ($ignoreM) {
    while (<FILE2>) {
 	chomp;
 	$line = $_;
 	$lineCount2++;
 	#$file2Header = $_ if ($lineCount2 == $file2HeaderRow );
 	#if ($file2Separator eq "space") {
 	#	@columns2 = split (/\s+/, $line);	    
 	# }elsif ($file2Separator eq "comma" ) {
 	#	@columns2 = split(/\,/, $line);
 	# }else {
 	#	@columns2 = split (/\t/, $line);
 	# }
 	@columns2 = split (/$split2Pattern/, $line);
 	
 	$colNo2 = @columns2;
 	$maxCol2 = $colNo2 if ($colNo2 > $maxCol2);
 	$key2 = "";
 	foreach $colAI (@file2ColAIs) {
 	    $columns2[$colAI] = $zeroPlus if ($columns2[$colAI] eq "0" ); # so that 0 key will be included
 	    $columns2[$colAI] = "N/A" unless ($columns2[$colAI]);	    
 	    $key2 .= "$columns2[$colAI]".".";
 	}
 	# $columns2[$file2Col] = $zeroPlus if ($columns2[$file2Col] eq "0" ); # so that 0 key will be included
 	# $columns2[$file2Col] = "N/A" unless ($columns2[$file2Col]);
 	$key2 =~ s/\.$//;
 	#$records2{$columns2[$file2Col]}{$lineCount2} = $line;
 	$key2 =~ s/$ignoreM//g;
 	$key2 = uc($key2) if ($caseSenstive == 0 );
 	$records2{$key2}{$lineCount2} = $line;
    }
 }else {
    while (<FILE2>) {
 	chomp;
 	$line = $_;
 	$lineCount2++;
 	#$file2Header = $_ if ($lineCount2 == $file2HeaderRow );
 	# if ($file2Separator eq "space") {
 	#	@columns2 = split (/\s+/, $line);	    
 	# }elsif ($file2Separator eq "comma" ) {
 	#	@columns2 = split(/\,/, $line);
 	# }else {
 	#	@columns2 = split (/\t/, $line);
 	# }
 	@columns2 = split (/$split2Pattern/, $line);
 	
 	$colNo2 = @columns2;
 	$maxCol2 = $colNo2 if ($colNo2 > $maxCol2);
 	$key2 = "";
 	foreach $colAI (@file2ColAIs) {
 	    $columns2[$colAI] = $zeroPlus if ($columns2[$colAI] eq "0" ); # so that 0 key will be included
 	    $columns2[$colAI] = "N/A" unless ($columns2[$colAI]);	    
 	    $key2 .= "$columns2[$colAI]".".";
 	}
 	# $columns2[$file2Col] = $zeroPlus if ($columns2[$file2Col] eq "0" ); # so that 0 key will be included
 	# $columns2[$file2Col] = "N/A" unless ($columns2[$file2Col]);
 	$key2 =~ s/\.$//;
 	#$records2{$columns2[$file2Col]}{$lineCount2} = $line;
 	$key2 = uc($key2) if ($caseSenstive == 0 );
 	$records2{$key2}{$lineCount2} = $line;
    }
 	
 }
 close (FILE2);

 my ($file1Count, $joinLabel);
 if ($joinOpt =~ /^L/i) {
    $joinLabel = "--->";
 }elsif ($joinOpt =~ /^N/i ) {
    $joinLabel = "<---";
 }else {
    $joinLabel = "<-->";
 }

 my $FILE1;

 foreach $file1 (sort keys %file1List) {
    $file1Count++;
    if ($file1 eq "stdin") {
 	$FILE1 = \*STDIN;
    }elsif ($file1 =~ /\.gz$/i) {
 	open ($FILE1, "zcat $file1 |") || die ("Cant open $file1!\n");;
    }else {
 	open ($FILE1, "$file1") ||die ("Cant open $file1!\n");
    }

    if ($file1HeaderRow > 0 ) {
 	for ($lineCount1 = 1; $lineCount1 <= $file1HeaderRow; $lineCount1++) {
 	    $file1Header = <$FILE1>;	    
 	}
 	chomp ($file1Header);
 	if ($file1Count == 1  and $file1Header ) {
 	    #if ($joinOpt =~ /^L/i) {
 	    #	print "$file1Header\t---\>\t$file2Header\n";
 	    #}else {
 	    #	print "$line\t<--\>\t$file2Header\n";
 	    #}
 	    print "$file1Header\t$joinLabel\t$file2Header\n";
 	}
    }

    if ($ignoreL) {
 	while (<$FILE1>) {
 	    chomp;
 	    $line = $_;
 	    $lineCount1++;
 	   # $file1Header = $_ if ($lineCount1 == $file1HeaderRow );
 	   # if ($separator eq "space") {
 	   #	@columns1 = split (/\s+/, $line);	    
 	   # }elsif ($separator eq "comma" ) {
 	   #	@columns1 = split(/\,/, $line);
 	   # }else {
 	   #	@columns1 = split (/\t/, $line);
 	   # }
 	    @columns1 = split (/$splitPattern/, $line);	   

 	    $key1 = "";
 	    foreach $colAI (@file1ColAIs) {
 		$columns1[$colAI] = $zeroPlus if ($columns1[$colAI] eq "0" ); # so that 0 key will be included
 		$columns1[$colAI] = "N/A" unless ($columns1[$colAI]);	    
 		$key1 .= "$columns1[$colAI]".".";
 	    }
 	    $key1 =~ s/\.$//;	
 	    $key1 =~ s/$ignoreL//g;
 	    # $columns1[$file1Col] = $zeroPlus if ($columns1[$file1Col] eq "0" ); # so that 0 key will be included
 	    # $columns1[$file1Col] = "N/A" unless ($columns1[$file1Col]);
 	    #if ($columns1[$file1Col] ne "N/A" and defined %{$records2{$columns1[$file1Col]}} and $joinOpt !~ /^N/i) {
 	    $key1 = uc($key1) if ($caseSenstive == 0 );
 	    #if ($file1Header and $lineCount1 == $file1HeaderRow ) {
 	    #	if ($joinOpt =~ /^L/i) {
 	    #	    print "$file1Header\t---\>\t$file2Header\n";
 	    #	}else {
 	    #	    print "$line\t<--\>\t$file2Header\n";
 	    #	}
 		
 	    #}elsif ($key1 ne "N/A" and exists $records2{$key1} and $joinOpt !~ /^N/i) { # defined %{$records2{$key1}} will take new memory 
 	    if ($key1 ne "N/A" and exists $records2{$key1} and $joinOpt !~ /^N/i) { # defined %{$records2{$key1}} will take new memory 
 		foreach $lineCount2 (keys %{$records2{$key1}} ) {
 		    #if ($joinOpt =~ /^L/i) {
 		    #	print "$line\t---\>\t";
 		    # }else {
 		    #	print "$line\t<--\>\t";
 		    # }
 		    print "$line\t$joinLabel\t";
 		    print "$records2{$key1}{$lineCount2}\n";		    
 		}
 	    }else {
 		
 		if ($joinOpt =~ /^L/i) { # left joining: all first file and matched second file
 		    print "$line\t$joinLabel";
 		    for ($tempCount = 0; $tempCount < $maxCol2; $tempCount++) {
 			print "\t";
 		    }
 		    print "\n";
 		}elsif ($joinOpt =~ /^N/i ) { # left joining, but only print those not in second file
 		    print "$line\t$joinLabel\n" unless (exists $records2{$key1} );   # defined %{$records2{$key1}} will take new memory 
 		}
 	    }	
 	}	
    }else {
 	while (<$FILE1>) {
 	    chomp;
 	    $line = $_;
 	    $lineCount1++;
 	    $file1Header = $_ if ($lineCount1 == $file1HeaderRow );
 	    #if ($separator eq "space") {
 	    #	@columns1 = split (/\s+/, $line);	    
 	    # }elsif ($separator eq "comma" ) {
 	    #	@columns1 = split(/\,/, $line);
 	    # }else {
 	    #	@columns1 = split (/\t/, $line);
 	    # }
 	    @columns1 = split (/$splitPattern/, $line);	  

 	    $key1 = "";
 	    foreach $colAI (@file1ColAIs) {
 		$columns1[$colAI] = $zeroPlus if ($columns1[$colAI] eq "0" ); # so that 0 key will be included
 		$columns1[$colAI] = "N/A" unless ($columns1[$colAI]);	    
 		$key1 .= "$columns1[$colAI]".".";
 	    }
 	    $key1 =~ s/\.$//;	
 	    # $columns1[$file1Col] = $zeroPlus if ($columns1[$file1Col] eq "0" ); # so that 0 key will be included
 	    # $columns1[$file1Col] = "N/A" unless ($columns1[$file1Col]);
 	    #if ($columns1[$file1Col] ne "N/A" and defined %{$records2{$columns1[$file1Col]}} and $joinOpt !~ /^N/i) {
 	    $key1 = uc($key1) if ($caseSenstive == 0 );
 	    #if ($file1Header and $lineCount1 == $file1HeaderRow ) {
 	    #	if ($joinOpt =~ /^L/i) {
 	    #	    print "$file1Header\t---\>\t$file2Header\n";
 	    #	}else {
 	    #	    print "$line\t<--\>\t$file2Header\n";
 	    #	}
 		
 	    # }elsif ($key1 ne "N/A" and exists $records2{$key1} and $joinOpt !~ /^N/i) { # defined %{$records2{$key1}} will take new memory 
 	    if ($key1 ne "N/A" and exists $records2{$key1} and $joinOpt !~ /^N/i) { # defined %{$records2{$key1}} will take new memory 
 		foreach $lineCount2 (keys %{$records2{$key1}} ) {
 		    #if ($joinOpt =~ /^L/i) {
 		    #	print "$line\t---\>\t";
 		    #}else {
 		    #	print "$line\t<--\>\t";
 		    # }
 		    print "$line\t$joinLabel\t";
 		    print "$records2{$key1}{$lineCount2}\n";		    
 		}
 	    }else {
 		if ($joinOpt =~ /^L/i) { # left joining: all first file and matched second file
 		    print "$line\t$joinLabel";
 		    for ($tempCount = 0; $tempCount < $maxCol2; $tempCount++) {
 			print "\t";
 		    }
 		    print "\n";
 		}elsif ($joinOpt =~ /^N/i ) { # left joining, but only print those not in second file
 		    print "$line\t$joinLabel\n" unless (exists $records2{$key1} );   # defined %{$records2{$key1}} will take new memory 
 		}
 	    }	
 	}	
    }
    close ($FILE1);
 }
 exit;
	#!/usr/bin/perl

	use strict;
	require "getopts.pl";

	use vars qw ($opt_f $opt_p $opt_q $opt_s $opt_t $opt_l $opt_m $opt_F $opt_Q $opt_L $opt_M $opt_c $opt_o $opt_h $opt_H);

	&Getopts('f:p:q:l:s:t:m:F:Q:L:M:c:o:hH:');

	my $executable = "merge.basedOn.column.pl";

	$\| = 1;

	my $usage = qq{

	###################################################################
	usage: $executable -f file1 -q file2 -l file1_column -m file2_column [-o joining_option] > output

	(used for merge two flat files like in a database)

	# -f first file (tab separator for each column); use stdin if the input file not specified
	# -p a list file with multiple first files (-f). Each file in one line; all must be in the same format
	#
	# -q second file (tab separator for each column)
	#
	# -l N:the joining column number of the first file
	# if multiple columns use like 1,2,3
	#
	# -L ignore char in columns specified in -l:
	# if specified, such as "chr", it will be ignored in the combined string from -l
	# and not affect the joining
	#
	# -m N:the joining column number of the second file
	# if multiple columns use like 1,2,3
	#
	# -M ignore char in columns specified in -M:
	# if specified, such as "chr", it will be ignored in the combined string from -m
	# and not affect the joining
	#
	# -F the row number of file header in -f [default: 1] (0: no header)
	# -Q the row number of file header in -q [default: 1] (0: no header)
	#
	# -c case senstive for the joining data
	# Y: yes [default]
	# N: no
	#
	# -o joining option:
	# L: left joining: all first file and matched second file
	# default (not specified): equal joining: only matched for both
	# N: left joining, but exclude those records matched in second file
	#
	# -s field separator:
	# default: <tab>
	# S: space+
	# C: comma
	#
	# -t field separator for file2 [default as in -s]
	# T: tab
	# S: space+
	# C: comma
	#
	# -hH print this

	####################################################################
	\n};


	my ($file1, $file1ListName, %file1List, $file2, $file1Col, $file2Col, $joinOpt, $separator, $file2Separator, $caseSenstive, $file1HeaderRow,$file2HeaderRow, $ignoreL, $ignoreM, $splitPattern, $split2Pattern);
	# options:
	if (length($opt_h) > 0 \|\| length($opt_H) > 0 ) {die($usage);}
	if (-s $opt_f or -s $opt_p) {
	$file1List{$opt_f} = 1 if ($opt_f);
	if (-s $opt_p) {
	$file1ListName = $opt_p;
	if ($file1ListName =~ /\.gz$/i) {
	open (LIST, "zcat $file1ListName \|") \|\|die ("Cant open $file1ListName!\n");
	}else {
	open (LIST, "$file1ListName") \|\|die ("Cant open $file1ListName!\n");
	}
	while (<LIST>) {
	chomp;
	$file1 = $_;
	$file1List{$file1} = 1 if (-s $file1);
	}
	close (LIST);
	}
	} else {
	#die($usage);
	$file1List{"stdin"} = 1;
	}
	if (length($opt_q) > 0) {$file2 = $opt_q;} else {die($usage);}
	#if (length($opt_l) > 0) {$file1Col = $opt_l - 1;} else {die($usage);}
	#if (length($opt_m) > 0) {$file2Col = $opt_m - 1;} else {die($usage);}
	if (length($opt_l) > 0) {$file1Col = $opt_l ;} else {die($usage);}
	if (length($opt_m) > 0) {$file2Col = $opt_m ;} else {die($usage);}
	if (length($opt_F) > 0 and $opt_F >= 0 ) {$file1HeaderRow = $opt_F; } else {$file1HeaderRow = 1; }
	if (length($opt_Q) > 0 and $opt_Q >= 0 ) {$file2HeaderRow = $opt_Q; } else {$file2HeaderRow = 1; }
	if (length($opt_L) > 0 ) {$ignoreL = $opt_L; }
	if (length($opt_M) > 0 ) {$ignoreM = $opt_M; }


	if (length($opt_o) > 0) {$joinOpt = $opt_o;}
	if (length($opt_s) > 0 and $opt_s =~ /^s/i) {
	$separator ="space";
	$splitPattern ="\\s\+";
	}elsif ($opt_s =~ /^C/i) {
	$separator ="comma";
	$splitPattern ="\\,";
	}else {
	$separator ="tab";
	$splitPattern ="\\t";
	}
	if ($opt_t =~ /^S/i) {
	$file2Separator = "space";
	$split2Pattern ="\\s\+";
	}elsif ($opt_t =~ /^C/i ) {
	$file2Separator = "comma";
	$split2Pattern ="\\,";
	}elsif ($opt_t =~ /^T/i ) {
	$file2Separator = "tab";
	$split2Pattern ="\\t";
	}else {
	$file2Separator = $separator;
	$split2Pattern = $splitPattern;
	}

	if ($opt_c =~ /^N/i) {$caseSenstive = 0; } else {$caseSenstive = 1; }


	my $zeroPlus = "0plus";
	my $lineCount2 = 0;
	my $lineCount1 = 0;

	my ($line);
	my (@columns1, $tempCount);

	if ($file2 =~ /\.gz$/i) {
	open (FILE2, "zcat $file2 \|") \|\|die ("Cant open $file2!\n");
	}else {
	open (FILE2, "$file2") \|\|die ("Cant open $file2!\n");
	}
	my (%records2, @columns2, $maxCol2, $colNo2);
	my ($key1, $key2, $colNm, $colAI, @file1Cols, @file2Cols,@file1ColAIs, @file2ColAIs, $file1ColsNm, $file2ColsNm);
	@file1Cols = split (/\,/, $file1Col);
	@file2Cols = split (/\,/, $file2Col);
	$file1ColsNm = @file1Cols;
	$file2ColsNm = @file2Cols;
	unless ($file1ColsNm == $file2ColsNm) {
	die("Error: need same total column numbers specified in -l and -m\n $usage");
	}

	foreach $colNm (@file1Cols ) {
	die("Error: column numbers must >0 in -m\n $usage") if ($colNm <1);
	$colAI = $colNm - 1;
	push (@file1ColAIs, $colAI);
	}

	foreach $colNm (@file2Cols ) {
	die("Error: column numbers must >0 in -m\n $usage") if ($colNm <1);
	$colAI = $colNm - 1;
	push (@file2ColAIs, $colAI);
	}

	my ($file1Header, $file2Header);
	if ($file2HeaderRow > 0 ) {
	for ($lineCount2 = 1; $lineCount2 <= $file2HeaderRow; $lineCount2++) {
	$file2Header = <FILE2>;
	}
	chomp ($file2Header);
	}

	if ($ignoreM) {
	while (<FILE2>) {
	chomp;
	$line = $_;
	$lineCount2++;
	#$file2Header = $_ if ($lineCount2 == $file2HeaderRow );
	#if ($file2Separator eq "space") {
	# @columns2 = split (/\s+/, $line);
	# }elsif ($file2Separator eq "comma" ) {
	# @columns2 = split(/\,/, $line);
	# }else {
	# @columns2 = split (/\t/, $line);
	# }
	@columns2 = split (/$split2Pattern/, $line);

	$colNo2 = @columns2;
	$maxCol2 = $colNo2 if ($colNo2 > $maxCol2);
	$key2 = "";
	foreach $colAI (@file2ColAIs) {
	$columns2[$colAI] = $zeroPlus if ($columns2[$colAI] eq "0" ); # so that 0 key will be included
	$columns2[$colAI] = "N/A" unless ($columns2[$colAI]);
	$key2 .= "$columns2[$colAI]".".";
	}
	# $columns2[$file2Col] = $zeroPlus if ($columns2[$file2Col] eq "0" ); # so that 0 key will be included
	# $columns2[$file2Col] = "N/A" unless ($columns2[$file2Col]);
	$key2 =~ s/\.$//;
	#$records2{$columns2[$file2Col]}{$lineCount2} = $line;
	$key2 =~ s/$ignoreM//g;
	$key2 = uc($key2) if ($caseSenstive == 0 );
	$records2{$key2}{$lineCount2} = $line;
	}
	}else {
	while (<FILE2>) {
	chomp;
	$line = $_;
	$lineCount2++;
	#$file2Header = $_ if ($lineCount2 == $file2HeaderRow );
	# if ($file2Separator eq "space") {
	# @columns2 = split (/\s+/, $line);
	# }elsif ($file2Separator eq "comma" ) {
	# @columns2 = split(/\,/, $line);
	# }else {
	# @columns2 = split (/\t/, $line);
	# }
	@columns2 = split (/$split2Pattern/, $line);

	$colNo2 = @columns2;
	$maxCol2 = $colNo2 if ($colNo2 > $maxCol2);
	$key2 = "";
	foreach $colAI (@file2ColAIs) {
	$columns2[$colAI] = $zeroPlus if ($columns2[$colAI] eq "0" ); # so that 0 key will be included
	$columns2[$colAI] = "N/A" unless ($columns2[$colAI]);
	$key2 .= "$columns2[$colAI]".".";
	}
	# $columns2[$file2Col] = $zeroPlus if ($columns2[$file2Col] eq "0" ); # so that 0 key will be included
	# $columns2[$file2Col] = "N/A" unless ($columns2[$file2Col]);
	$key2 =~ s/\.$//;
	#$records2{$columns2[$file2Col]}{$lineCount2} = $line;
	$key2 = uc($key2) if ($caseSenstive == 0 );
	$records2{$key2}{$lineCount2} = $line;
	}

	}
	close (FILE2);

	my ($file1Count, $joinLabel);
	if ($joinOpt =~ /^L/i) {
	$joinLabel = "--->";
	}elsif ($joinOpt =~ /^N/i ) {
	$joinLabel = "<---";
	}else {
	$joinLabel = "<-->";
	}

	my $FILE1;

	foreach $file1 (sort keys %file1List) {
	$file1Count++;
	if ($file1 eq "stdin") {
	$FILE1 = \*STDIN;
	}elsif ($file1 =~ /\.gz$/i) {
	open ($FILE1, "zcat $file1 \|") \|\| die ("Cant open $file1!\n");;
	}else {
	open ($FILE1, "$file1") \|\|die ("Cant open $file1!\n");
	}

	if ($file1HeaderRow > 0 ) {
	for ($lineCount1 = 1; $lineCount1 <= $file1HeaderRow; $lineCount1++) {
	$file1Header = <$FILE1>;
	}
	chomp ($file1Header);
	if ($file1Count == 1 and $file1Header ) {
	#if ($joinOpt =~ /^L/i) {
	# print "$file1Header\t---\>\t$file2Header\n";
	#}else {
	# print "$line\t<--\>\t$file2Header\n";
	#}
	print "$file1Header\t$joinLabel\t$file2Header\n";
	}
	}

	if ($ignoreL) {
	while (<$FILE1>) {
	chomp;
	$line = $_;
	$lineCount1++;
	# $file1Header = $_ if ($lineCount1 == $file1HeaderRow );
	# if ($separator eq "space") {
	# @columns1 = split (/\s+/, $line);
	# }elsif ($separator eq "comma" ) {
	# @columns1 = split(/\,/, $line);
	# }else {
	# @columns1 = split (/\t/, $line);
	# }
	@columns1 = split (/$splitPattern/, $line);

	$key1 = "";
	foreach $colAI (@file1ColAIs) {
	$columns1[$colAI] = $zeroPlus if ($columns1[$colAI] eq "0" ); # so that 0 key will be included
	$columns1[$colAI] = "N/A" unless ($columns1[$colAI]);
	$key1 .= "$columns1[$colAI]".".";
	}
	$key1 =~ s/\.$//;
	$key1 =~ s/$ignoreL//g;
	# $columns1[$file1Col] = $zeroPlus if ($columns1[$file1Col] eq "0" ); # so that 0 key will be included
	# $columns1[$file1Col] = "N/A" unless ($columns1[$file1Col]);
	#if ($columns1[$file1Col] ne "N/A" and defined %{$records2{$columns1[$file1Col]}} and $joinOpt !~ /^N/i) {
	$key1 = uc($key1) if ($caseSenstive == 0 );
	#if ($file1Header and $lineCount1 == $file1HeaderRow ) {
	# if ($joinOpt =~ /^L/i) {
	# print "$file1Header\t---\>\t$file2Header\n";
	# }else {
	# print "$line\t<--\>\t$file2Header\n";
	# }

	#}elsif ($key1 ne "N/A" and exists $records2{$key1} and $joinOpt !~ /^N/i) { # defined %{$records2{$key1}} will take new memory
	if ($key1 ne "N/A" and exists $records2{$key1} and $joinOpt !~ /^N/i) { # defined %{$records2{$key1}} will take new memory
	foreach $lineCount2 (keys %{$records2{$key1}} ) {
	#if ($joinOpt =~ /^L/i) {
	# print "$line\t---\>\t";
	# }else {
	# print "$line\t<--\>\t";
	# }
	print "$line\t$joinLabel\t";
	print "$records2{$key1}{$lineCount2}\n";
	}
	}else {

	if ($joinOpt =~ /^L/i) { # left joining: all first file and matched second file
	print "$line\t$joinLabel";
	for ($tempCount = 0; $tempCount < $maxCol2; $tempCount++) {
	print "\t";
	}
	print "\n";
	}elsif ($joinOpt =~ /^N/i ) { # left joining, but only print those not in second file
	print "$line\t$joinLabel\n" unless (exists $records2{$key1} ); # defined %{$records2{$key1}} will take new memory
	}
	}
	}
	}else {
	while (<$FILE1>) {
	chomp;
	$line = $_;
	$lineCount1++;
	$file1Header = $_ if ($lineCount1 == $file1HeaderRow );
	#if ($separator eq "space") {
	# @columns1 = split (/\s+/, $line);
	# }elsif ($separator eq "comma" ) {
	# @columns1 = split(/\,/, $line);
	# }else {
	# @columns1 = split (/\t/, $line);
	# }
	@columns1 = split (/$splitPattern/, $line);

	$key1 = "";
	foreach $colAI (@file1ColAIs) {
	$columns1[$colAI] = $zeroPlus if ($columns1[$colAI] eq "0" ); # so that 0 key will be included
	$columns1[$colAI] = "N/A" unless ($columns1[$colAI]);
	$key1 .= "$columns1[$colAI]".".";
	}
	$key1 =~ s/\.$//;
	# $columns1[$file1Col] = $zeroPlus if ($columns1[$file1Col] eq "0" ); # so that 0 key will be included
	# $columns1[$file1Col] = "N/A" unless ($columns1[$file1Col]);
	#if ($columns1[$file1Col] ne "N/A" and defined %{$records2{$columns1[$file1Col]}} and $joinOpt !~ /^N/i) {
	$key1 = uc($key1) if ($caseSenstive == 0 );
	#if ($file1Header and $lineCount1 == $file1HeaderRow ) {
	# if ($joinOpt =~ /^L/i) {
	# print "$file1Header\t---\>\t$file2Header\n";
	# }else {
	# print "$line\t<--\>\t$file2Header\n";
	# }

	# }elsif ($key1 ne "N/A" and exists $records2{$key1} and $joinOpt !~ /^N/i) { # defined %{$records2{$key1}} will take new memory
	if ($key1 ne "N/A" and exists $records2{$key1} and $joinOpt !~ /^N/i) { # defined %{$records2{$key1}} will take new memory
	foreach $lineCount2 (keys %{$records2{$key1}} ) {
	#if ($joinOpt =~ /^L/i) {
	# print "$line\t---\>\t";
	#}else {
	# print "$line\t<--\>\t";
	# }
	print "$line\t$joinLabel\t";
	print "$records2{$key1}{$lineCount2}\n";
	}
	}else {
	if ($joinOpt =~ /^L/i) { # left joining: all first file and matched second file
	print "$line\t$joinLabel";
	for ($tempCount = 0; $tempCount < $maxCol2; $tempCount++) {
	print "\t";
	}
	print "\n";
	}elsif ($joinOpt =~ /^N/i ) { # left joining, but only print those not in second file
	print "$line\t$joinLabel\n" unless (exists $records2{$key1} ); # defined %{$records2{$key1}} will take new memory
	}
	}
	}
	}
	close ($FILE1);
	}
	exit;