Skip to content

Instantly share code, notes, and snippets.

@ogrisel
Last active August 29, 2015 14:14
Show Gist options
  • Save ogrisel/45eb5d8b491185a4e165 to your computer and use it in GitHub Desktop.
Save ogrisel/45eb5d8b491185a4e165 to your computer and use it in GitHub Desktop.
pickle main: working on a modern replacement for cloudpickle based on dill
#!/usr/bin/env python
import dill
import sys
payload_filename = sys.argv[1]
if len(sys.argv) > 2:
print('Restoring __main__ context')
main_filename = sys.argv[2]
with open(main_filename, 'rb') as f:
main_ns = dill.load(f)
main_module = sys.modules['__main__']
for k, v in main_ns.items():
print("setting __main__.{} to {}".format(k, v))
setattr(main_module, k, v)
with open(payload_filename, 'rb') as f:
print("Loading callable and arguments")
func, args, kwargs = dill.load(f)
print("Executing call:", flush=True)
print(func(*args, **kwargs))
import dill
from dill.detect import globalvars
import gc
import sys
import os.path as op
import subprocess
import types
from math import log, exp
from itertools import chain
import tempfile
import shutil
_main_module = sys.modules.get('__main__')
CONSTANT = 1
SKIPPED_TYPES = (
types.ModuleType,
types.BuiltinFunctionType,
types.BuiltinMethodType,
)
def some_function(x, y):
return log(CONSTANT + abs(x + y))
some_function_alias = some_function
def some_other_function(a, b):
return some_function(exp(a - b), a ** 2 - b ** 2)
def _is_from_main(obj):
return getattr(obj, '__module__', None) == '__main__'
def _named_dependencies(obj, ignore=None, max_depth=100,
module_name='__main__'):
queue = [(obj, 0)]
named_dependencies = {}
if ignore is None:
ignore = set()
gc.collect()
while queue:
candidate, depth = queue.pop(0)
if id(candidate) in ignore:
continue
ignore.add(id(candidate)) # consider each candidate only once
# Collect dependencies from `obj` that are defined in the module of
# interest (typically __main__) and therefore cannot be imported
# by reference.
candidate_module = getattr(candidate, '__module__', None)
if candidate_module and candidate_module == module_name:
named_dependencies.update(globalvars(candidate).items())
if depth >= max_depth:
continue
# Function and method have reference to globals namespace but
# their actual dependencies are managed via dill.detect.globalvars
ignore.add(id(getattr(candidate, '__globals__', None)))
for referent in chain(gc.get_referents(candidate),
globalvars(candidate).values()):
if (id(referent) not in ignore
and not isinstance(referent, SKIPPED_TYPES)):
queue.append((referent, depth + 1))
return named_dependencies
f = lambda x: some_other_function(x, x)
class A(object):
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
def some_method(self, x):
print(self.args)
print(self.kwargs)
return some_function(x, x + 1)
def __call__(self, x):
return self.some_method(x)
class B(object):
def some_other_method(self, x):
return log(x)
class C(object):
def some_method(self, x):
return f(x)
def make_closure(a):
def closed(x):
return CONSTANT + a + x
return closed
def make_generator(x):
for i in range(x):
yield i ** 2 + CONSTANT
g = make_generator(10)
# print(_named_dependencies((log, [1], 2)))
# print(_named_dependencies([some_other_function]))
# print(_named_dependencies({'a': some_function_alias}))
# print(_named_dependencies(f))
# print(_named_dependencies([A()]))
# print(_named_dependencies((A(A.B()), 1, None, {'a': [A]})))
# print(_named_dependencies(lambda x: C()))
# print(_named_dependencies(C.some_method))
# print(_named_dependencies((C().some_method, 1)))
# print(_named_dependencies(make_closure(2)))
# print(_named_dependencies((lambda x: lambda y: some_function_alias(1, x))(0)))
# print(_named_dependencies(g))
def _dump(obj, filename):
filenames = [filename]
with open(filename, 'wb') as f:
dill.dump(obj, f, byref=False)
main_ns = _named_dependencies(obj)
if main_ns:
main_filename = filename + '.main.pkl'
with open(main_filename, 'wb') as f:
dill.dump(main_ns, f, byref=False)
filenames.append(main_filename)
return filenames
def subprocess_callpy(func, *args, **kwargs):
try:
tempdir = tempfile.mkdtemp()
filename = op.join(tempdir, 'pickle.pkl')
filenames = _dump((func, args, kwargs), filename)
try:
cmd = ['python', 'load_and_run.py'] + filenames
out = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
print(out.decode('utf-8'))
except subprocess.CalledProcessError as e:
print('Subprocess crashed with:')
print(e.output.decode('utf-8'))
# sys.exit(1)
finally:
shutil.rmtree(tempdir)
subprocess_callpy(some_other_function, 1, 2)
subprocess_callpy(some_function_alias, 1, 2)
subprocess_callpy(f, 2)
subprocess_callpy(A(1, b=2), 1)
subprocess_callpy(A(1, b=2).some_method, 1)
subprocess_callpy(lambda x: some_function(1, x), 1)
subprocess_callpy(lambda x: some_function_alias(1, x), 1)
subprocess_callpy(make_closure(1), 0)
subprocess_callpy((lambda x: lambda y: some_function_alias(1, x))(0), 1)
# Not working:
# subprocess_callpy(C().some_method, 1)
# subprocess_callpy(lambda x: f(x), 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment