Skip to content

Instantly share code, notes, and snippets.

@meddulla
Created October 9, 2018 08:19
Show Gist options
  • Save meddulla/3719c0fa33fac8677c739b081c9fcd16 to your computer and use it in GitHub Desktop.
Save meddulla/3719c0fa33fac8677c739b081c9fcd16 to your computer and use it in GitHub Desktop.
flatten json into pandas columns
import os
import json
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
# from https://www.kaggle.com/julian3833/1-quick-start-read-csv-and-flatten-json-fields
def load_df(csv_path='../input/train.csv', nrows=None):
JSON_COLUMNS = ['device', 'geoNetwork', 'totals', 'trafficSource']
df = pd.read_csv(csv_path,
converters={column: json.loads for column in JSON_COLUMNS},
dtype={'fullVisitorId': 'str'}, # Important!!
nrows=nrows)
for column in JSON_COLUMNS:
column_as_df = json_normalize(df[column])
column_as_df.columns = [f"{column}.{subcolumn}" for subcolumn in column_as_df.columns]
df = df.drop(column, axis=1).merge(column_as_df, right_index=True, left_index=True)
print(f"Loaded {os.path.basename(csv_path)}. Shape: {df.shape}")
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment