Last active
December 16, 2015 08:48
-
-
Save santa4nt/5408293 to your computer and use it in GitHub Desktop.
Combine the last three gists: generate random ints, sort them by chunking, and verify sorted final output.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import os | |
| import sys | |
| import array | |
| import struct | |
| NUMRANDS = 10 ** 6 # the number of random integers we want to generate (one million) | |
| BUFSIZE = 1000 # the number of integers we buffer internally before flushing to output | |
| a = array.array('i') | |
| for i in xrange(NUMRANDS): # generate one million 32-bit integers | |
| # generate the integer (signed) from the OS' random source of byte strings | |
| randint = struct.unpack('@i', os.urandom(4))[0] | |
| a.append(randint) # buffer the random integer into an array | |
| if len(a) >= BUFSIZE: # flush to output buffer once we have enough | |
| a.tofile(sys.stdout) | |
| del a[:] | |
| if a: | |
| a.tofile(sys.stdout) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import os | |
| import sys | |
| import array | |
| import heapq | |
| import struct | |
| import tempfile | |
| BUFSIZE = 4096 # read 1000 * 4-byte (32-bit) numbers at a time | |
| TBUFSIZE = 256 # read each sorted tempfile 64 numbers at a time | |
| input = sys.stdin | |
| output = sys.stdout | |
| tempsorts = [] # holds temporary file names whose contents contain ordered numbers | |
| sortiters = [] # holds iterators over the content of tempsorts above | |
| # 1. Read the input into blocks of BUFSIZE bytes, sort them, and dump into a tempfile | |
| # read the first block of the input into the buffer | |
| buf = input.read(BUFSIZE) | |
| while buf: | |
| a = array.array('i', buf) # if buf was full (all (BUFSIZE=4096) bytes), then len(a) == 1024 (we have 1024 random ints) | |
| s = array.array('i', sorted(a)) | |
| # emit the sorted output to a tempfile | |
| with tempfile.NamedTemporaryFile(mode='w+b', bufsize=BUFSIZE, dir='.', delete=False) as f: | |
| s.tofile(f.file) | |
| tempsorts.append(f.name) | |
| # read the next block of the input into the buffer | |
| buf = input.read(BUFSIZE) | |
| # create an iterator (really, a generator) for each temp file output we have | |
| def tempiter(fname): | |
| with open(fname, 'rb') as f: | |
| buf = f.read(TBUFSIZE) # read 64 numbers (256) bytes at a time from each temp file | |
| while buf: | |
| for n in array.array('i', buf): | |
| yield n # then the generator yields one number at a time therein | |
| buf = f.read(TBUFSIZE) # read the next 256-byte block from this tempfile | |
| # create len(tempsorts) number of such an iterator | |
| map(lambda f: sortiters.append(tempiter(f)), tempsorts) | |
| assert len(sortiters) == len(tempsorts) # sanity check | |
| """ To test the iterators we created: """ | |
| """ | |
| penult = sortiters[-2] # second last block should have exactly 1024 sorted numbers | |
| counter = 0 | |
| for n in penult: | |
| counter += 1 | |
| print ('%d,' % n), | |
| print counter | |
| assert counter == BUFSIZE / 4 | |
| """ | |
| # 2. Merge sorted output from each tempfile, using a heap queue that can contain at least len(tempsorts) numbers | |
| out = [] # temporary heap | |
| # a. First, insert the head of each input iterator into the heap | |
| for i in xrange(len(sortiters)): | |
| it = sortiters[i] | |
| item = next(it) | |
| heapq.heappush(out, (item, i)) # also include the index for the merging loop in step (b) | |
| # b. Loop while the heap is not empty | |
| a = array.array('i') # temporary buffer to store merged sorted output | |
| while out: | |
| item, i = heapq.heappop(out) | |
| # buffer the item we just popped from the heap | |
| a.append(item) | |
| # if the buffer is "full" (reached a pre-set capacity), flush to output | |
| if len(a) >= (BUFSIZE / 4): | |
| a.tofile(output) | |
| del a[:] | |
| # refresh the heap with new item from input iterator at index i | |
| item = next(sortiters[i], None) | |
| if item is not None: | |
| heapq.heappush(out, (item, i)) | |
| # finally, clean up tempfiles | |
| map(lambda f: os.unlink(f), tempsorts) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import sys | |
| import array | |
| BUFSIZE = 4096 # read 1000 * 4-byte (32-bit) numbers at a time | |
| input = sys.stdin | |
| largest = None | |
| buf = input.read(BUFSIZE) | |
| while buf: | |
| a = array.array('i', buf) | |
| for n in a: | |
| if a < largest: | |
| # found a new number that is larger than currently known largest number! | |
| print 0 | |
| sys.exit(-1) | |
| else: | |
| largest = a | |
| buf = input.read(BUFSIZE) | |
| # the input is sorted! | |
| print 1 | |
| sys.exit(0) |
Author
santa4nt
commented
Apr 17, 2013
Author
$ time ./genrandints.py | ./sortints.py > sortedints
real 0m3.803s
user 0m2.312s
sys 0m1.768s
And since verification has to be linear:
$ time ./genrandints.py | ./sortints.py | ./vrfsrtints.py
1
real 0m21.829s
user 0m21.117s
sys 0m1.840s
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment