Skip to content

Instantly share code, notes, and snippets.

@ksamuel
Last active September 26, 2024 13:16
Show Gist options
  • Save ksamuel/2352d2c10d89f9bed3da6d45ac4c8ce3 to your computer and use it in GitHub Desktop.
Save ksamuel/2352d2c10d89f9bed3da6d45ac4c8ce3 to your computer and use it in GitHub Desktop.
import sys
import tempfile
import heapq
from array import array
def int_array_from_file(int_file, buffer_size=4000):
int_array = array("i")
int_array.frombytes(int_file.read(buffer_size))
return int_array
def read_buffered_ints(int_file):
while True:
int_array = int_array_from_file(int_file)
if not int_array:
return None
yield from int_array
with open("random_numbers.bin", "rb") as int_file, open(
"sorted_numbers.bin", "wb"
) as final_result:
int_arrays_generators = []
# We load the numbers in chunk with this loop
while True:
int_array = int_array_from_file(int_file, 40000)
if not int_array:
break
# For each chunk, we create a temp file...
temp_file = tempfile.TemporaryFile()
# we sort the arrays, convert them back into bytes,
# and put them in the file...
array("i", sorted(int_array)).tofile(temp_file)
# then save a generator ready to give us back those ints,
# in our list
temp_file.seek(0)
generator = read_buffered_ints(temp_file)
int_arrays_generators.append(generator)
int_buffer = array("i")
# heapq.merge give us all integer IN ORDER
for integer in heapq.merge(*int_arrays_generators):
# We store them all in an array, and once the array is big enough,
# we append its content into the final result file
int_buffer.append(integer)
if len(int_buffer) >= 1000:
int_buffer.tofile(final_result)
del int_buffer[:] # empty the array, it reached the size limit
# For the last array if it didn't reach 1000
if int_buffer:
int_buffer.tofile(final_result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment