Created
August 18, 2012 03:39
-
-
Save JoshRosen/3384191 to your computer and use it in GitHub Desktop.
Flattening data structures that contain Pickled objects
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Say that we're storing pickled Python objects in a complicated Java data | |
structure, which might contain nested scala.Tuple2s and java.util.Lists. | |
We may be able to create a pickled representation of the Java data structure | |
based on the pickled objects that it contains, allowing the Python consumer to | |
deserialized the nested object in a single unpickle call. | |
This file contains a prototype of this idea. It has only been tested with | |
Pickle protocol version 2. | |
""" | |
from pickle import EMPTY_TUPLE, TUPLE, TUPLE1, TUPLE2, TUPLE3, STOP, \ | |
EMPTY_LIST, MARK, APPENDS, PROTO | |
try: | |
import cPickle as pickle | |
except ImportError: | |
import pickle | |
PICKLE_VER = 2 | |
PROTO2 = PROTO + chr(PICKLE_VER) | |
def strip_pickle(pickled): | |
""" | |
>>> pickle.dumps(1, PICKLE_VER) | |
'\\x80\\x02K\\x01.' | |
>>> strip_pickle(pickle.dumps(1, PICKLE_VER)) | |
'K\\x01' | |
""" | |
# Strip out the PROTO_2 from the start of the pickle and the STOP from the | |
# end. | |
assert pickled[:2] == PROTO2 | |
assert pickled[-1] == STOP | |
return pickled[2:-1] | |
def flatten_pickle(x): | |
""" | |
>>> one = pickle.dumps(1, PICKLE_VER) | |
>>> tup = pickle.dumps((2, 3), PICKLE_VER) | |
>>> pickle.loads(flatten_pickle((one, tup))) | |
(1, (2, 3)) | |
>>> pickle.loads(flatten_pickle([one, one])) | |
[1, 1] | |
>>> pickle.loads(flatten_pickle([one])) | |
[1] | |
>>> pickle.loads(flatten_pickle(())) | |
() | |
>>> pickle.loads(flatten_pickle((one,))) | |
(1,) | |
>>> pickle.loads(flatten_pickle((one, one))) | |
(1, 1) | |
>>> pickle.loads(flatten_pickle((one, one, one))) | |
(1, 1, 1) | |
>>> pickle.loads(flatten_pickle((one, one, one, one))) | |
(1, 1, 1, 1) | |
>>> hello = pickle.dumps("hello", PICKLE_VER) | |
>>> world = pickle.dumps("world", PICKLE_VER) | |
>>> pickle.loads(flatten_pickle((hello, world))) | |
('hello', 'world') | |
>>> pickle.loads(flatten_pickle([one, tup, (tup, tup), one])) | |
[1, (2, 3), ((2, 3), (2, 3)), 1] | |
>>> obj = pickle.dumps(object(), PICKLE_VER) | |
>>> [type(x) for x in pickle.loads(flatten_pickle([obj, obj]))] | |
[<type 'object'>, <type 'object'>] | |
""" | |
return PROTO2 + _flatten_pickle(x) + STOP | |
def _flatten_pickle(x): | |
if isinstance(x, tuple): | |
l = len(x) | |
if l == 0: | |
return EMPTY_TUPLE | |
elif l == 1: | |
return _flatten_pickle(x[0]) + TUPLE1 | |
elif l == 2: | |
return ''.join(_flatten_pickle(y) for y in x) + TUPLE2 | |
elif l == 3: | |
return ''.join(_flatten_pickle(y) for y in x) + TUPLE3 | |
else: | |
ops = [] | |
ops.append(MARK) | |
ops.extend(_flatten_pickle(y) for y in x) | |
ops.append(TUPLE) | |
return ''.join(ops) | |
elif isinstance(x, list): | |
ops = [] | |
ops.append(EMPTY_LIST) | |
ops.append(MARK) | |
ops.extend(_flatten_pickle(y) for y in x) | |
ops.append(APPENDS) | |
return ''.join(ops) | |
elif isinstance(x, str): | |
return strip_pickle(x) | |
else: | |
raise ValueError( | |
"_flatten_pickle(): unsupported type %s" % str(type(x))) | |
def _test(): | |
import doctest | |
doctest.testmod() | |
if __name__ == "__main__": | |
_test() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment