Last active
February 13, 2018 11:19
-
-
Save ygurin/27c074e861e4376acdca73d0fba9fd27 to your computer and use it in GitHub Desktop.
Performing a Pandas DataFrame Flatmap Operation mapping each list value among each of its rows this example uses Pythons itertools
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Performing a Flatmap Operation on a selected column | |
# containing rows of lists this example uses | |
# Pythons itertools | |
import pandas as pd | |
from itertools import chain | |
def flatmap(f, items): | |
return list(chain.from_iterable(list(map(f, items)))) | |
def create_dict(df, headers_ls, f_var, x): | |
temp_dict = dict({}) | |
for h in headers_ls: | |
temp_dict[h] = df[h] | |
temp_dict[f_var] = x | |
return temp_dict | |
def flatmap_df(df, flatten_col): | |
headers_ls = df.columns | |
rec_dict = df[headers_ls].to_dict(orient='records') | |
return pd.DataFrame(flatmap(lambda rec_dict: [create_dict(rec_dict, | |
headers_ls, flatten_col, | |
col_ls) for col_ls in rec_dict[flatten_col]], rec_dict)) | |
########## | |
# Create a DataFrame with a row list | |
########## | |
print("\nOriginal DataFrame:") | |
episodes = [ | |
{"id": 1, "topics": [5,7,8], "date": '2018-01-01'}, | |
{"id": 2, "topics": [65,43,64], "date": '2018-01-02'} | |
] | |
df_epi = pd.DataFrame(episodes) | |
print(df_epi) | |
print("\nFlattened DataFrame:") | |
########## | |
# Flatten the column lists | |
########## | |
flattened_df = flatmap_df(df_epi[['id', 'topics']], 'topics') | |
print(flattened_df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment