Last active
August 29, 2015 14:15
-
-
Save vitiral/13278001b1a0b0cde647 to your computer and use it in GitHub Desktop.
Functions that should be standard to pandas dataframe objects
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from . import tools as _t | |
def resolve_header(header): | |
if header is None: | |
return None | |
if isinstance(header, dict): | |
return _t.get_header(header) | |
else: | |
return header | |
def dataframe_dict(data, index=None, filler='', header=None): | |
'''General loader of dataframes from python objects. Can either be a | |
dict of lists or a list of dicts. | |
Header is detected automatically and will be multiindex if the dict | |
is nested''' | |
if isinstance(data, dict): | |
header = resolve_header(header) | |
if header is None: | |
header = _t.get_header(data) | |
else: | |
header = resolve_header(header) | |
if header is None: | |
header = _t.get_header(data[0]) | |
data = _t.unpack_dicts(data, header) | |
data = _t.fill_dict(data, filler) | |
data = pd.DataFrame.from_dict(data) | |
if index is not None: | |
data.set_index(index, inplace=True) | |
data.sort_index(inplace=True) | |
return data |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from unittest import TestCase | |
from pandas.util.testing import assert_frame_equal | |
from . import dataframe | |
from . import tools | |
strings = 'abcdefg' | |
testdata = {key: value for (key, value) in zip(strings, range(len(strings)))} | |
testdata['many'] = dict(testdata) | |
testdata = [testdata for n in range(10)] | |
class TestLoad(TestCase): | |
def test_list(self): | |
print() | |
print(dataframe.dataframe_dict(testdata)) | |
def test_dict(self): | |
print() | |
header = tools.get_header(testdata[0]) | |
testdata_dict = tools.unpack_dicts(testdata, header) | |
print(testdata_dict) | |
df = dataframe.dataframe_dict(testdata_dict) | |
print(df) | |
df2 = dataframe.dataframe_dict(testdata) | |
assert_frame_equal(df, df2, check_names=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def dict_depth(d, depth=0): | |
if not isinstance(d, dict) or not d: | |
return depth | |
return max(dict_depth(v, depth + 1) for k, v in d.items()) | |
def get_header(item, extra_levels=None, filler=''): | |
'''Returns the header of a nested dictionary | |
The header is a list of tuples detailing the structure of the dictionary''' | |
levels = extra_levels | |
if levels is None: | |
levels = dict_depth(item) | |
keys = [] | |
for key, value in item.items(): | |
if isinstance(value, dict): | |
keys.extend((key,) + v for v in get_header(value, levels - 1, filler)) | |
else: | |
keys.append((key,)) | |
return keys | |
def get_item(dic, item): | |
'''item access with columns''' | |
for i in item: | |
dic = dic[i] | |
return dic | |
def fill_dict(data, filler): | |
'''Makes all keys tuples of the same length''' | |
keys, values = zip(*data.items()) | |
# convert all keys to tuples | |
keys = tuple(key if isinstance(key, tuple) else (key,) for key in keys) | |
maxlen = max(map(len, keys)) | |
return {key + ((filler,) * (maxlen - len(key))): value for (key, value) | |
in zip(keys, values)} | |
def unpack_dicts(data, header): | |
'''Unpacks a list of dictionaries into a dictionary of lists | |
according to the header''' | |
out = {key: [] for key in header} | |
for d in data: | |
for h in header: | |
out[h].append(get_item(d, h)) | |
return out |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment