Created
September 30, 2016 22:33
-
-
Save nicktimko/0be938e31a3071cc84df0b9da86a15d9 to your computer and use it in GitHub Desktop.
Rejected Ideas
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# maybe they'll be used, one day... | |
class ArrayAutoAllocator(object): | |
def __init__(self, n_columns, fill_val=-1, chunk_size=PREALLOC_ROWS, dtype=None): | |
self.n_columns = n_columns + 1 # for the index | |
self.fill_val = fill_val | |
self.chunk_size = chunk_size | |
self.dtype = dtype | |
self.chunks = {} | |
def __getitem__(self, idx): | |
chunk = self._get_chunk(idx) | |
return chunk[idx] | |
def __setitem__(self, key, val): | |
idx, col = key | |
chunk = self._get_chunk(idx) | |
chunk[idx % self.chunk_size, col] = val | |
def _get_chunk(self, idx): | |
chunk_number = idx // self.chunk_size | |
if chunk_number not in self.chunks: | |
self.chunks[chunk_number] = self._new_array(chunk_number) | |
return self.chunks[chunk_number] | |
def _new_array(self, chunk_number): | |
a = self.fill_val * np.ones((self.chunk_size, self.n_columns), dtype=self.dtype) | |
a[...,0] = np.arange(self.chunk_size * chunk_number, self.chunk_size * (chunk_number + 1)) | |
return a | |
def flatten(self): | |
"""Return a basic array""" | |
S = self.chunk_size | |
total = np.empty((S * len(self.chunks), self.n_columns), dtype=self.dtype) | |
for i, cn in enumerate(sorted(self.chunks.keys())): | |
total[S*i:S*(i+1)] = self.chunks[cn] | |
return total | |
def crush(self): | |
total = self.flatten() | |
bool_filt = np.sum(total[...,1:], axis=1) != (self.n_columns - 1) * self.fill_val | |
return total[bool_filt] | |
PREALLOC_ROWS = 1000 | |
class DataFramePreallocator(object): | |
def __init__(self, fill, columns, chunk_size=PREALLOC_ROWS): | |
self.fill = fill | |
self.columns = columns | |
self.chunk_size = chunk_size | |
self.chunks = {} | |
def __getitem__(self, idx): | |
chunk = self._get_chunk(idx) | |
return chunk.loc[idx] | |
def __setitem__(self, key, val): | |
idx, col = key | |
chunk = self._get_chunk(idx) | |
chunk.loc[key] = val | |
def _get_chunk(self, idx): | |
chunk_number = idx // self.chunk_size | |
if chunk_number not in self.chunks: | |
self.chunks[chunk_number] = self._new_df(chunk_number) | |
return self.chunks[chunk_number] | |
def _new_df(self, chunk_number): | |
index = range(PREALLOC_ROWS * chunk_number, PREALLOC_ROWS * (chunk_number + 1)) | |
return pd.DataFrame([self.fill], columns=self.columns, index=index) | |
def flatten(self): | |
"""Return a basic dataframe""" | |
return pd.concat(self.chunks.values()) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment