Created
March 25, 2013 23:19
-
-
Save minrk/5241793 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"gist_id": "5241793", | |
"name": "dill" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"Using dill to pickle anything" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"IPython.parallel doesn't do much in the way of serialization.\n", | |
"It has custom zero-copy handling of numpy arrays,\n", | |
"but other than that, it doesn't do anything other than the bare minimum to make basic interactively defined functions and classes sendable.\n", | |
"\n", | |
"There are a few projects that extend pickle to make just about anything sendable, and one of these is [dill](http://www.cacr.caltech.edu/~mmckerns/dill).\n", | |
"\n", | |
"**NOTE:** *This requires IPython 1.0-dev* and latest dill, currently only available from:\n", | |
" \n", | |
" pip install dill-0.2a.dev-20120503.tar.gz" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"First, as always, we create our" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def make_closure(a):\n", | |
" \"\"\"make a function with a closure, and return it\"\"\"\n", | |
" def has_closure(b):\n", | |
" return a * b\n", | |
" return has_closure" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"closer = make_closure(5)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"closer(2)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": [ | |
"10" | |
] | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import pickle" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Without help, pickle can't deal with closures" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"pickle.dumps(closer)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "PicklingError", | |
"evalue": "Can't pickle <function has_closure at 0x10b6ac140>: it's not found as __main__.has_closure", | |
"output_type": "pyerr", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mPicklingError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-5-2dad7d7aaa8a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mpickle\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdumps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcloser\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[1;32m/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/pickle.py\u001b[0m in \u001b[0;36mdumps\u001b[1;34m(obj, protocol)\u001b[0m\n\u001b[0;32m 1372\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mdumps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mprotocol\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1373\u001b[0m \u001b[0mfile\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mStringIO\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1374\u001b[1;33m \u001b[0mPickler\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfile\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mprotocol\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1375\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mfile\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetvalue\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1376\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;32m/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/pickle.py\u001b[0m in \u001b[0;36mdump\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 222\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mproto\u001b[0m \u001b[1;33m>=\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 223\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mPROTO\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mchr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mproto\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 224\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 225\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mSTOP\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 226\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;32m/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 284\u001b[0m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdispatch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mt\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 285\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 286\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 287\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 288\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;32m/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/pickle.py\u001b[0m in \u001b[0;36msave_global\u001b[1;34m(self, obj, name, pack)\u001b[0m\n\u001b[0;32m 746\u001b[0m raise PicklingError(\n\u001b[0;32m 747\u001b[0m \u001b[1;34m\"Can't pickle %r: it's not found as %s.%s\"\u001b[0m \u001b[1;33m%\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 748\u001b[1;33m (obj, module, name))\n\u001b[0m\u001b[0;32m 749\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 750\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mklass\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;31mPicklingError\u001b[0m: Can't pickle <function has_closure at 0x10b6ac140>: it's not found as __main__.has_closure" | |
] | |
} | |
], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"But after we import dill, magic happens" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import dill" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"pickle.dumps(closer)[:64] + '...'" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 7, | |
"text": [ | |
"\"cdill.dill\\n_load_type\\np0\\n(S'FunctionType'\\np1\\ntp2\\nRp3\\n(cdill.dill...\"" | |
] | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"So from now on, pretty much everything is pickleable." | |
] | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Now use this in IPython.parallel" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"As usual, we start by creating our Client and View" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from IPython import parallel\n", | |
"rc = parallel.Client()\n", | |
"view = rc.load_balanced_view()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"We can use dill to allow IPython.parallel to send anything.\n", | |
"\n", | |
"Step 1. Disable IPython's special handling of function objects (nothing to work around anymore):" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from types import FunctionType\n", | |
"from IPython.utils.pickleutil import can_map\n", | |
"\n", | |
"can_map.pop(FunctionType, None)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 9, | |
"text": [ | |
"IPython.utils.pickleutil.CannedFunction" | |
] | |
} | |
], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Step 2. Switch the serialize module to use Python pickle instead of cPickle (dill extends `pickle` to be able to dump anything, but this doesn't work with the optimized cPickle)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from IPython.kernel.zmq import serialize\n", | |
"serialize.pickle = pickle" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"And that's it! Now we can send closures and other previously non-pickleables to our engines." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"view.apply_sync(closer, 3)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 11, | |
"text": [ | |
"15" | |
] | |
} | |
], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"But wait, there's more!" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"view.apply_sync(make_closure, 2)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "RemoteError", | |
"evalue": "ValueError(Sorry, cannot pickle code objects with closures)", | |
"output_type": "pyerr", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)\u001b[1;32m/Users/minrk/dev/ip/mine/IPython/kernel/zmq/serialize.pyc\u001b[0m in \u001b[0;36mserialize_object\u001b[1;34m(obj, buffer_threshold, item_threshold)\u001b[0m", | |
"\u001b[0;32m 107\u001b[0m \u001b[0mbuffers\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_extract_buffers\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcobj\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbuffer_threshold\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m", | |
"\u001b[0;32m 108\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m", | |
"\u001b[1;32m--> 109\u001b[1;33m \u001b[0mbuffers\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minsert\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpickle\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdumps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcobj\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m", | |
"\u001b[0m\u001b[0;32m 110\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mbuffers\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m", | |
"\u001b[0;32m 111\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m", | |
"\u001b[1;32m/Users/minrk/dev/ip/mine/IPython/utils/codeutil.pyc\u001b[0m in \u001b[0;36mreduce_code\u001b[1;34m(co)\u001b[0m", | |
"\u001b[0;32m 32\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mreduce_code\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mco\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m", | |
"\u001b[0;32m 33\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mco\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mco_freevars\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mco\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mco_cellvars\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m", | |
"\u001b[1;32m---> 34\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Sorry, cannot pickle code objects with closures\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m", | |
"\u001b[0m\u001b[0;32m 35\u001b[0m args = [co.co_argcount, co.co_nlocals, co.co_stacksize,", | |
"\u001b[0;32m 36\u001b[0m \u001b[0mco\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mco_flags\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mco\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mco_code\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mco\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mco_consts\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mco\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mco_names\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m", | |
"\u001b[1;31mValueError\u001b[0m: Sorry, cannot pickle code objects with closures" | |
] | |
} | |
], | |
"prompt_number": 12 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"We can also apply very same patch to the engines, then we can send previously unpickleable objects both ways.\n", | |
"Here's a way to do dill patch the pickle everywhere at once:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"%%px --local\n", | |
"\n", | |
"# load dill\n", | |
"import dill\n", | |
"\n", | |
"# disable special function handling\n", | |
"from types import FunctionType\n", | |
"from IPython.utils.pickleutil import can_map\n", | |
"\n", | |
"can_map.pop(FunctionType, None)\n", | |
"\n", | |
"# fallback to pickle instead of cPickle, so that dill can take over\n", | |
"import pickle\n", | |
"from IPython.kernel.zmq import serialize\n", | |
"serialize.pickle = pickle" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 13 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"remote_closure = view.apply_sync(make_closure, 4)\n", | |
"remote_closure(5)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 14, | |
"text": [ | |
"20" | |
] | |
} | |
], | |
"prompt_number": 14 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"At this point, we can send/recv all kinds of stuff" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def outer(a):\n", | |
" def inner(b):\n", | |
" def inner_again(c):\n", | |
" return c * b * a\n", | |
" return inner_again\n", | |
" return inner" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 15 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"So outer returns a function with a closure, which returns a function with a closure.\n", | |
"\n", | |
"Now, we can resolve the first closure on the engine, the second here, and the third on a different engine,\n", | |
"after passing through a lambda we define here and call there, just for good measure." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"view.apply_sync(lambda f: f(3),view.apply_sync(outer, 1)(2))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 16, | |
"text": [ | |
"6" | |
] | |
} | |
], | |
"prompt_number": 16 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment