Last active
August 29, 2015 14:14
-
-
Save ogrisel/45eb5d8b491185a4e165 to your computer and use it in GitHub Desktop.
pickle main: working on a modern replacement for cloudpickle based on dill
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import dill | |
| import sys | |
| payload_filename = sys.argv[1] | |
| if len(sys.argv) > 2: | |
| print('Restoring __main__ context') | |
| main_filename = sys.argv[2] | |
| with open(main_filename, 'rb') as f: | |
| main_ns = dill.load(f) | |
| main_module = sys.modules['__main__'] | |
| for k, v in main_ns.items(): | |
| print("setting __main__.{} to {}".format(k, v)) | |
| setattr(main_module, k, v) | |
| with open(payload_filename, 'rb') as f: | |
| print("Loading callable and arguments") | |
| func, args, kwargs = dill.load(f) | |
| print("Executing call:", flush=True) | |
| print(func(*args, **kwargs)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import dill | |
| from dill.detect import globalvars | |
| import gc | |
| import sys | |
| import os.path as op | |
| import subprocess | |
| import types | |
| from math import log, exp | |
| from itertools import chain | |
| import tempfile | |
| import shutil | |
| _main_module = sys.modules.get('__main__') | |
| CONSTANT = 1 | |
| SKIPPED_TYPES = ( | |
| types.ModuleType, | |
| types.BuiltinFunctionType, | |
| types.BuiltinMethodType, | |
| ) | |
| def some_function(x, y): | |
| return log(CONSTANT + abs(x + y)) | |
| some_function_alias = some_function | |
| def some_other_function(a, b): | |
| return some_function(exp(a - b), a ** 2 - b ** 2) | |
| def _is_from_main(obj): | |
| return getattr(obj, '__module__', None) == '__main__' | |
| def _named_dependencies(obj, ignore=None, max_depth=100, | |
| module_name='__main__'): | |
| queue = [(obj, 0)] | |
| named_dependencies = {} | |
| if ignore is None: | |
| ignore = set() | |
| gc.collect() | |
| while queue: | |
| candidate, depth = queue.pop(0) | |
| if id(candidate) in ignore: | |
| continue | |
| ignore.add(id(candidate)) # consider each candidate only once | |
| # Collect dependencies from `obj` that are defined in the module of | |
| # interest (typically __main__) and therefore cannot be imported | |
| # by reference. | |
| candidate_module = getattr(candidate, '__module__', None) | |
| if candidate_module and candidate_module == module_name: | |
| named_dependencies.update(globalvars(candidate).items()) | |
| if depth >= max_depth: | |
| continue | |
| # Function and method have reference to globals namespace but | |
| # their actual dependencies are managed via dill.detect.globalvars | |
| ignore.add(id(getattr(candidate, '__globals__', None))) | |
| for referent in chain(gc.get_referents(candidate), | |
| globalvars(candidate).values()): | |
| if (id(referent) not in ignore | |
| and not isinstance(referent, SKIPPED_TYPES)): | |
| queue.append((referent, depth + 1)) | |
| return named_dependencies | |
| f = lambda x: some_other_function(x, x) | |
| class A(object): | |
| def __init__(self, *args, **kwargs): | |
| self.args = args | |
| self.kwargs = kwargs | |
| def some_method(self, x): | |
| print(self.args) | |
| print(self.kwargs) | |
| return some_function(x, x + 1) | |
| def __call__(self, x): | |
| return self.some_method(x) | |
| class B(object): | |
| def some_other_method(self, x): | |
| return log(x) | |
| class C(object): | |
| def some_method(self, x): | |
| return f(x) | |
| def make_closure(a): | |
| def closed(x): | |
| return CONSTANT + a + x | |
| return closed | |
| def make_generator(x): | |
| for i in range(x): | |
| yield i ** 2 + CONSTANT | |
| g = make_generator(10) | |
| # print(_named_dependencies((log, [1], 2))) | |
| # print(_named_dependencies([some_other_function])) | |
| # print(_named_dependencies({'a': some_function_alias})) | |
| # print(_named_dependencies(f)) | |
| # print(_named_dependencies([A()])) | |
| # print(_named_dependencies((A(A.B()), 1, None, {'a': [A]}))) | |
| # print(_named_dependencies(lambda x: C())) | |
| # print(_named_dependencies(C.some_method)) | |
| # print(_named_dependencies((C().some_method, 1))) | |
| # print(_named_dependencies(make_closure(2))) | |
| # print(_named_dependencies((lambda x: lambda y: some_function_alias(1, x))(0))) | |
| # print(_named_dependencies(g)) | |
| def _dump(obj, filename): | |
| filenames = [filename] | |
| with open(filename, 'wb') as f: | |
| dill.dump(obj, f, byref=False) | |
| main_ns = _named_dependencies(obj) | |
| if main_ns: | |
| main_filename = filename + '.main.pkl' | |
| with open(main_filename, 'wb') as f: | |
| dill.dump(main_ns, f, byref=False) | |
| filenames.append(main_filename) | |
| return filenames | |
| def subprocess_callpy(func, *args, **kwargs): | |
| try: | |
| tempdir = tempfile.mkdtemp() | |
| filename = op.join(tempdir, 'pickle.pkl') | |
| filenames = _dump((func, args, kwargs), filename) | |
| try: | |
| cmd = ['python', 'load_and_run.py'] + filenames | |
| out = subprocess.check_output(cmd, stderr=subprocess.STDOUT) | |
| print(out.decode('utf-8')) | |
| except subprocess.CalledProcessError as e: | |
| print('Subprocess crashed with:') | |
| print(e.output.decode('utf-8')) | |
| # sys.exit(1) | |
| finally: | |
| shutil.rmtree(tempdir) | |
| subprocess_callpy(some_other_function, 1, 2) | |
| subprocess_callpy(some_function_alias, 1, 2) | |
| subprocess_callpy(f, 2) | |
| subprocess_callpy(A(1, b=2), 1) | |
| subprocess_callpy(A(1, b=2).some_method, 1) | |
| subprocess_callpy(lambda x: some_function(1, x), 1) | |
| subprocess_callpy(lambda x: some_function_alias(1, x), 1) | |
| subprocess_callpy(make_closure(1), 0) | |
| subprocess_callpy((lambda x: lambda y: some_function_alias(1, x))(0), 1) | |
| # Not working: | |
| # subprocess_callpy(C().some_method, 1) | |
| # subprocess_callpy(lambda x: f(x), 2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment