Created
June 16, 2014 21:27
-
-
Save maxclark/bef6f0ce07fb99691c8d to your computer and use it in GitHub Desktop.
Removes duplicates from the input file (first parameter on command line) and saves clean/dupe files to separate files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# removes duplicates from the input file (first parameter on command line) and saves clean/dupe files to separate files | |
$file = $ARGV[0]; | |
open (FILE, $file); | |
open (CLEAN, ">clean.txt"); | |
open (DUPES, ">dupes.txt"); | |
#undef $/; | |
#$all = <FILE>; | |
#@lines = split(/\n/, $all); | |
while ($line = <FILE>) { | |
if ($check{lc($line)} != 1) { | |
$check{lc($line)} = 1; | |
print CLEAN $line; | |
} | |
else { | |
$dupe++; | |
#print "$count $dupe Found dupe: $line"; | |
print DUPES $line; | |
} | |
#print $line; | |
$count++; | |
#print "count: $count\n"; | |
} | |
print "Saved dupes to dupes.txt\nSaved clean list to clean.txt\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment