Skip to content

Instantly share code, notes, and snippets.

@johnarban
Created October 29, 2022 20:05
Show Gist options
  • Save johnarban/59219bf58d364892d2a900e220102553 to your computer and use it in GitHub Desktop.
Save johnarban/59219bf58d364892d2a900e220102553 to your computer and use it in GitHub Desktop.
View the nested structure of a pandas DataFrame
from pandas import DataFrame, concat, Series, isna
import json
def expand_subdict(df_col):
"""
Expand a column of dictionaries into a Series
Drop any all nan columns
"""
# drop empty columns that are sometimes created.
return df_col.apply(Series).dropna(axis=1, how='all')
def values_are_dict_like(col):
"""
Check if a is a dictionary
"""
# coming from json, the dict may be a string
if col.dtype == 'object':
# is it a string, if so the see if it looks like json
if hasattr(col,'str') and col.str.contains(r'\{.*\}').any():
return True
# otherwise check if it is a dict objects
elif col.apply(lambda x: isinstance(x, dict)).any():
return True
else:
return False
else:
return False
def values_are_list_like(col):
"""
Check if any element in a column is a list
"""
return col.apply(lambda x: isinstance(x, list)).any()
def convertable_to_DataFrame(x):
"""
Check if a value can be expanded to a dataframe
"""
return values_are_dict_like(x) or values_are_list_like(x)
def infer_schema(df, schema = {}):
"""
Recursively find the structure of a
pandas DataFrame with nested dictionaries
Github: @johnarban
"""
# if already a series or has a list of subcolum
if isinstance(df, Series) or values_are_list_like(df):
return infer_schema(expand_subdict(df), schema)
else:
for col in df.columns:
schema[col] = {}
if convertable_to_DataFrame(df[col]):
infer_schema(df[col], schema[col])
return schema
# copy output of json.dumps(schema) into https://codebeautify.org/jsonviewer
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment