Created
August 26, 2009 01:56
-
-
Save sms420/175242 to your computer and use it in GitHub Desktop.
de_dupe.rb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # de_dupe.rb | |
| # Sean (2009-08-25) | |
| # reads line of data from parent.txt | |
| # which is a super set of child.txt | |
| # and outputs all lines of parent | |
| # file not found in the child file | |
| # WORD OF CAUTION: MAKE SURE THAT | |
| # BOTH PARENT AND CHILD TEXT FILES | |
| # ARE CONSISTENT WITH HAVING OR NOT | |
| # HAVING AN ENDLINE, IF ONE HAS "\n" | |
| # AND THE OTHER DOESN'T - YOU'RE F'D | |
| file_A = File.new("parent.txt", "r+") | |
| file_C = File.new("out.txt", "r+") | |
| #iterate thru parent one line at a time | |
| while (line_A = file_A.gets) | |
| #set found flag to zero | |
| @found = 0 | |
| #open child file. we'll open and close | |
| #this file for each line in parent | |
| file_B = File.new("child.txt", "r+") | |
| while (line_B = file_B.gets) | |
| if (line_A == line_B) | |
| @found = 1 #set flag to one if found | |
| break | |
| end #if else | |
| end #while | |
| #close it, will open next iteration | |
| file_B.close | |
| if (@found == 1) | |
| next | |
| else | |
| #this only happens if line exists in | |
| #parent file, but nowhere in child | |
| file_C.puts "#{line_A}" | |
| end #if else | |
| end #while | |
| file_A.close | |
| file_C.close |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment