Created
May 20, 2014 17:24
-
-
Save jColeChanged/06cecc001d3004a3c45a to your computer and use it in GitHub Desktop.
This is my personal attempt at sorting a million 32-bit integers in 2MB of RAM.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from random import randint | |
from sys import maxint | |
import tempfile | |
import array | |
def write_a_million_ints(): | |
f = file("ints.bin", "wb") | |
ints = array.array('i', [randint(-maxint + 1, maxint - 1) for i in range(1000000)]) | |
ints.tofile(f) | |
def read_ints(f): | |
while True: | |
a = array.array('i') | |
a.fromstring(f.read(4000)) | |
if not a: | |
break | |
for x in a: | |
yield x | |
def merge(file_one, file_two): | |
""" | |
I think the fact that I'm using generators made this really messy. | |
""" | |
merge_file = tempfile.TemporaryFile() | |
one_gen = read_ints(file_one) | |
two_gen = read_ints(file_two) | |
merge_array = array.array('i') | |
one_next = None | |
two_next = None | |
while True: | |
if one_next is None: | |
try: | |
one_next = one_gen.next() | |
except StopIteration: | |
pass | |
if two_next is None: | |
try: | |
two_next = two_gen.next() | |
except StopIteration: | |
pass | |
if one_next is None and two_next is None: | |
merge_array.tofile(merge_file.file) | |
merge_file.seek(0) | |
return merge_file | |
elif one_next is None: | |
merge_array.append(two_next) | |
two_next = None | |
elif two_next is None: | |
merge_array.append(one_next) | |
one_next = None | |
elif one_next >= two_next: | |
merge_array.append(two_next) | |
two_next = None | |
else: | |
merge_array.append(one_next) | |
one_next = None | |
if len(merge_array) >= 1000: | |
merge_array.tofile(merge_file.file) | |
del merge_array | |
merge_array = array.array('i') | |
def combine(files): | |
out_file = file("sorted.txt", "w") | |
# merge_file = reduce(merge, files) | |
merge_file = merge_sort_files(files) | |
for value in read_ints(merge_file): | |
out_file.write(str(value) + "\n") | |
def merge_sort_files(files): | |
if len(files) == 1: | |
return files[0] | |
if len(files) == 2: | |
return merge(files[0], files[1]) | |
mid = len(files) / 2 | |
return merge(merge_sort_files(files[:mid]), merge_sort_files(files[mid:])) | |
def sort_a_million_ints(): | |
f = file("ints.bin", "rb") | |
temp_files = [] | |
while True: | |
a = array.array('i') | |
a.fromstring(f.read(40000)) # read 10,000 ints | |
if not a: # if array is empty | |
break | |
temp_file = tempfile.TemporaryFile() | |
array.array('i', sorted(a)).tofile(temp_file.file) | |
temp_file.seek(0) | |
temp_files.append(temp_file) | |
combine(temp_files) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment