Created
January 22, 2019 15:47
-
-
Save christopherlovell/b26d522e4ca1d1526340cfb52ca35e37 to your computer and use it in GitHub Desktop.
Convert pickle dict formatted as a byte string in to string format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Taken from https://stackoverflow.com/questions/22840092/unpickling-data-from-python-2-with-unicode-strings-in-python-3 | |
Updated for numpy bytes_ object | |
""" | |
import numpy as np | |
def bytes_to_unicode(ob): | |
t = type(ob) | |
if t in (list, tuple): | |
l = [str(i, 'utf-8') if type(i) is bytes else i for i in ob] | |
l = [bytes_to_unicode(i) if type(i) in (list, tuple, dict) else i for i in l] | |
ro = tuple(l) if t is tuple else l | |
elif t is dict: | |
byte_keys = [i for i in ob if type(i) is np.bytes_] | |
for bk in byte_keys: | |
v = ob[bk] | |
del(ob[bk]) | |
ob[str(bk,'utf-8')] = v | |
for k in ob: | |
if type(ob[k]) is bytes: | |
ob[k] = str(ob[k], 'utf-8') | |
elif type(ob[k]) in (list, tuple, dict): | |
ob[k] = bytes_to_unicode(ob[k]) | |
ro = ob | |
else: | |
ro = ob | |
print("unprocessed object: {0} {1}".format(t, ob)) | |
return ro |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment