Created
September 17, 2010 13:50
-
-
Save velenux/584261 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
my %rows; | |
my %published; | |
my $files_examined = 0; | |
# populate rows hash | |
foreach my $file (@ARGV) { | |
next if not -r $file; # skip if not readable | |
next if not -f $file; # skip if not file (dir, pipe, etc) | |
open IN, $file or die "Can't open file $file, $!\n"; | |
$files_examined += 1; | |
while(<IN>) { | |
my $line = $_; | |
chomp($line); | |
$line = sanitize($line); | |
if (defined $rows{$line}) { | |
#print ">>> $line \t($rows{$line} -> ". ($rows{$line} + 1) .")\n"; | |
$rows{$line} += 1; | |
} #increment counter | |
else { $rows{$line} = 1; } #create counter | |
} # end while | |
close IN; | |
} # end foreach | |
# print only uncommon lines | |
open COMMON, ">common-lines.txt" or die "Can't open file common-lines.txt for writing, $!\n"; | |
foreach my $file (@ARGV) { | |
next if not -r $file; # skip if not readable | |
next if not -f $file; # skip if not file (dir, pipe, etc) | |
open IN, $file or die "Can't open file $file, $!\n"; | |
open OUT, ">$file.uncommon" or die "Can't open file $file.uncommon for writing, $!\n"; | |
while(<IN>) { | |
my $line = $_; | |
chomp($line); | |
$line = sanitize($line); | |
if ( $rows{$line} != $files_examined ) { # if counter is different than total number of files | |
#warn "###OUT line counter $rows{$line}, file counter $files_examined"; | |
print OUT $_; # print line to file.uncommon if appears only one time | |
} | |
if ( $rows{$line} == $files_examined and not defined $published{$_} ) { # if counter equals total number of files and row isn't already in common file | |
#warn "###COMMON line counter $rows{$line}, file counter $files_examined"; | |
print COMMON $_; | |
$published{$_} = 1; | |
} # end if | |
} # end while | |
close IN; | |
close OUT; | |
} # end foreach | |
close COMMON; | |
sub sanitize { | |
my $r = shift; | |
# my $debug; | |
# $debug = '### ' . $r . ' -> '; | |
$r =~ s/^\s+([^\s])/$1/; # remove all space at line start | |
# print $debug . $r . "\n"; | |
return $r; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment