Skip to content

Instantly share code, notes, and snippets.

@Shide
Last active March 9, 2018 13:37
Show Gist options
  • Save Shide/ae5a2925f50076475b7e5223ab1d48b7 to your computer and use it in GitHub Desktop.
Save Shide/ae5a2925f50076475b7e5223ab1d48b7 to your computer and use it in GitHub Desktop.
Generator File Pickled
class IterPickledFile(list):
def __init__(self, file_name, seek_list=None, del_file=False):
self.file_name = file_name
self._del_file = bool(del_file)
seek_list = isinstance(seek_list, list) and seek_list or []
if not seek_list:
with open(self.file_name, 'r') as f:
while True:
try:
seek_list.append(f.tell())
pickle.load(f)
except:
break
self._seek_list = seek_list
def __iter__(self):
with open(self.file_name, 'r') as f:
for sk in self._seek_list:
f.seek(sk)
yield pickle.load(f)
def __len__(self):
return len(self._seek_list)
def __getitem__(self, i):
data = None
with open(self.file_name, 'r') as f:
try:
f.seek(self._seek_list[i])
data = pickle.load(f)
except:
raise IndexError
return data
def __getslice__(self, index_start, index_end):
with open(self.file_name, 'r') as f:
for sk in self._seek_list[index_start:index_end]:
f.seek(sk)
try:
yield pickle.load(f)
except:
break
def __delitem__(self, index):
del self._seek_list[index]
def __delslice__(self, index_start, index_end):
del self._seek_list[index_start:index_end]
def __del__(self):
if self._del_file:
os.remove(self.file_name)
def __repr__(self):
return "<%s(file='%s', del=%s, len=%s)>" % (
self.__class__.__name__, self.file_name, self._del_file, len(self._seek_list)
)
@Shide
Copy link
Author

Shide commented Mar 9, 2018

Class to help you deserialize with a generator data from a file previously filled with pickled data.
You can use this when you don't have memory to let the list of objects live in memory.
This class acts as a list, but the file will remain untouched, so you can create multiple instances attacking same file.

Ej.: Serialize X objects/data/etc onto a file:

import pickle
import tempfile
file_obj = tempfile.NamedTemporaryFile(delete=False)
with file_obj as f:
    for dat in datas:
        pickle.dump(dat, f, pickle.HIGHEST_PROTOCOL)

Now, let's catch the file and process it. Lazy loading without using a lot of RAM.
Data is unpacked with pickle.load.
Ej.:

myIter = IterPickledFile(file_obj.name, del_file=False)
del myIter[2]
for x in myIter[0:3]:
    print x

Feel free to copy/modify this code at your own risk.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment