Last active
September 26, 2024 13:16
-
-
Save ksamuel/2352d2c10d89f9bed3da6d45ac4c8ce3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import tempfile | |
import heapq | |
from array import array | |
def int_array_from_file(int_file, buffer_size=4000): | |
int_array = array("i") | |
int_array.frombytes(int_file.read(buffer_size)) | |
return int_array | |
def read_buffered_ints(int_file): | |
while True: | |
int_array = int_array_from_file(int_file) | |
if not int_array: | |
return None | |
yield from int_array | |
with open("random_numbers.bin", "rb") as int_file, open( | |
"sorted_numbers.bin", "wb" | |
) as final_result: | |
int_arrays_generators = [] | |
# We load the numbers in chunk with this loop | |
while True: | |
int_array = int_array_from_file(int_file, 40000) | |
if not int_array: | |
break | |
# For each chunk, we create a temp file... | |
temp_file = tempfile.TemporaryFile() | |
# we sort the arrays, convert them back into bytes, | |
# and put them in the file... | |
array("i", sorted(int_array)).tofile(temp_file) | |
# then save a generator ready to give us back those ints, | |
# in our list | |
temp_file.seek(0) | |
generator = read_buffered_ints(temp_file) | |
int_arrays_generators.append(generator) | |
int_buffer = array("i") | |
# heapq.merge give us all integer IN ORDER | |
for integer in heapq.merge(*int_arrays_generators): | |
# We store them all in an array, and once the array is big enough, | |
# we append its content into the final result file | |
int_buffer.append(integer) | |
if len(int_buffer) >= 1000: | |
int_buffer.tofile(final_result) | |
del int_buffer[:] # empty the array, it reached the size limit | |
# For the last array if it didn't reach 1000 | |
if int_buffer: | |
int_buffer.tofile(final_result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment