Created
March 16, 2019 16:40
-
-
Save lorey/2b57b4ebfec4d45221e15a49060f80d2 to your computer and use it in GitHub Desktop.
Keeping Pandas DataFrames clean when importing JSON
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pandas.io.json import json_normalize | |
df = json_normalize(data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class DataFrameFromDict(object): | |
""" | |
Temporarily imports data frame columns and deletes them afterwards. | |
""" | |
def __init__(self, data): | |
self.df = json_normalize(data) | |
self.columns = list(self.df.columns.values) | |
def __enter__(self): | |
return self.df | |
def __exit__(self, exc_type, exc_val, exc_tb): | |
self.df.drop([c for c in self.columns], axis=1, inplace=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pandas.io.json import json_normalize | |
df = json_normalize(data) | |
// make temporary columns | |
df.columns = ['temp_' + c for c in df.columns] | |
// pre-processing, basic calculations, etc. | |
df['company_id'] = df['temp_companyId'] | |
df['location'] = df['temp_properties.city.value'] | |
df['name'] = df['temp_properties.name.value'] | |
df['domain'] = df['temp_properties.website.value'] | |
//... .apply(), .as_type(int), whatever... | |
df.drop([c for c in df.columns if c.startswith('temp_')], axis=1, inplace=True) | |
// or | |
df = df[[c for c in df.columns if not c.startswith('temp_')]] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pandas.io.json import json_normalize | |
df = json_normalize(data) | |
df['company_id'] = df['companyId'] | |
df['location'] = df['properties.city.value'] | |
df['name'] = df['properties.name.value'] | |
df['domain'] = df['properties.website.value'] | |
//... .apply(), .as_type(int), whatever... |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with DataFrameFromDict(companies) as df: | |
// imported dict now in df, same result as json_normalize | |
df['company_id'] = df['companyId'] | |
df['location'] = df['properties.city.value'] | |
df['name'] = df['properties.name.value'] | |
df['domain'] = df['properties.website.value'] | |
// after context exits, df contains company_id, location, name, and domain | |
// but no more temporary columns | |
print(df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment