Created
October 29, 2022 20:05
-
-
Save johnarban/59219bf58d364892d2a900e220102553 to your computer and use it in GitHub Desktop.
View the nested structure of a pandas DataFrame
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pandas import DataFrame, concat, Series, isna | |
import json | |
def expand_subdict(df_col): | |
""" | |
Expand a column of dictionaries into a Series | |
Drop any all nan columns | |
""" | |
# drop empty columns that are sometimes created. | |
return df_col.apply(Series).dropna(axis=1, how='all') | |
def values_are_dict_like(col): | |
""" | |
Check if a is a dictionary | |
""" | |
# coming from json, the dict may be a string | |
if col.dtype == 'object': | |
# is it a string, if so the see if it looks like json | |
if hasattr(col,'str') and col.str.contains(r'\{.*\}').any(): | |
return True | |
# otherwise check if it is a dict objects | |
elif col.apply(lambda x: isinstance(x, dict)).any(): | |
return True | |
else: | |
return False | |
else: | |
return False | |
def values_are_list_like(col): | |
""" | |
Check if any element in a column is a list | |
""" | |
return col.apply(lambda x: isinstance(x, list)).any() | |
def convertable_to_DataFrame(x): | |
""" | |
Check if a value can be expanded to a dataframe | |
""" | |
return values_are_dict_like(x) or values_are_list_like(x) | |
def infer_schema(df, schema = {}): | |
""" | |
Recursively find the structure of a | |
pandas DataFrame with nested dictionaries | |
Github: @johnarban | |
""" | |
# if already a series or has a list of subcolum | |
if isinstance(df, Series) or values_are_list_like(df): | |
return infer_schema(expand_subdict(df), schema) | |
else: | |
for col in df.columns: | |
schema[col] = {} | |
if convertable_to_DataFrame(df[col]): | |
infer_schema(df[col], schema[col]) | |
return schema | |
# copy output of json.dumps(schema) into https://codebeautify.org/jsonviewer |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment