Skip to content

Instantly share code, notes, and snippets.

@mynameisfiber
Created April 15, 2014 18:21
Show Gist options
  • Select an option

  • Save mynameisfiber/10755533 to your computer and use it in GitHub Desktop.

Select an option

Save mynameisfiber/10755533 to your computer and use it in GitHub Desktop.
The pains of multiprocessing and copy-on-write
import numpy as np
import multiprocessing
import ctypes
import mmap
data_normal = np.zeros((4, 10), dtype=np.uint8)
# This shows the important piece in this whole mystery has been the MAP_SHARED
# memory flag. Normally, memory that is compied when the process os.fork()'s
# is set to copy-on-write which essentially makes each fork has it's own
# private memory space. In order for this to work we need to explicity set
# this chunk of memory as shared
data_private_mmap = mmap.mmap(-1, 4*10*np.uint8().size, flags=mmap.MAP_PRIVATE)
data_private = np.frombuffer(data_private_mmap, dtype=np.uint8)
data_private.shape = (4,10) #set the value instead of .resize() to ensure there is no copy going on
data_shared_mmap = mmap.mmap(-1, 4*10*np.uint8().size, flags=mmap.MAP_SHARED)
data_shared = np.frombuffer(data_shared_mmap, dtype=np.uint8)
data_shared.shape = (4,10)
data_shared_array = multiprocessing.RawArray(ctypes.c_uint8, 4*10)
data_shared2 = np.frombuffer(data_shared_array, dtype=np.uint8)
data_shared2.shape = (4,10)
def worker(i):
data_normal[i,:] = i+1
data_private[i,:] = i+1
data_shared[i,:] = i+1
data_shared2[i,:] = i+1
if __name__ == "__main__":
pool = multiprocessing.Pool()
pool.map(worker, xrange(4))
print "Normal:"
print data_normal
print "mmap MAP_PRIVATE:"
print data_private
print "mmap MAP_SHARED:"
print data_shared
print "multiprocessing.RawArray trick:"
print data_shared2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment