Last active
January 27, 2019 03:19
-
-
Save briandfoy/b550c4ec70d2382dbcaaec1e6f53f94d to your computer and use it in GitHub Desktop.
Merge multiple files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python3 | |
import heapq | |
import os | |
import sys | |
class MergeFiles: | |
""" | |
Given a list of files, output their numbers in sorted order. | |
Each files has one integer per line and those numbers are | |
sorted in ascending order. There are no comments or blank | |
lines. | |
numbers = MergeFiles( | |
'odds.txt', | |
'evens.txt', | |
'repeats.txt', | |
) | |
for i in numbers: | |
print( "%s: %d" % (i[0], i[1]) ) | |
""" | |
def __init__(self, *filenames): | |
"""Create an iterable from a list of filenames | |
Each call to the iterator returns the next smallest | |
number from any of the files until it exhausts all of | |
the files. | |
If one of the files does not exist, this blows up. | |
""" | |
self.heap = []; | |
self.count = 0; | |
for f in filenames: | |
print( f ) | |
fh = open( f, 'r' ) | |
n = int(fh.readline().rstrip('\n')) | |
# the count is a tie breaker argument. Otherwise, the | |
# heap will try to compare next items in the tuple, | |
# which are the filehandle and file name. | |
self.heap.append( (n, self.count, fh, f) ) | |
self.count += 1 | |
heapq.heapify( self.heap ) | |
def __iter__(self): | |
return self | |
# the Python 3 version of the method (just next in 2) | |
def __next__(self): | |
# perhaps we've read the last thing in the heap. | |
try: | |
tuple = heapq.heappop( self.heap ) | |
except IndexError: | |
raise StopIteration | |
self.count += 1 | |
# might not be a numer, so we'll stop processing that | |
# file | |
try: | |
new_tuple = ( | |
int(tuple[2].readline().rstrip('\n')), | |
self.count, | |
tuple[2], | |
tuple[3], | |
) | |
heapq.heappush( self.heap, new_tuple ) | |
except: | |
pass | |
return [ tuple[-1], tuple[0] ] | |
numbers = MergeFiles( *sys.argv[1:] ) | |
for i in numbers: | |
print( "%s: %d" % (i[0], i[1]) ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment