Last active
March 6, 2021 16:44
-
-
Save Jim-Holmstroem/f1bb7c7170846fdfb01b65f1dd30a4e3 to your computer and use it in GitHub Desktop.
fix parquet read format to be pandas-like
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itertools import repeat | |
import pandas as pd | |
df = pd.DataFrame( | |
{ | |
'accountData': [ | |
[{'key': 'name' ,'value': 'jim'}, {'key': 'schlong' ,'value': '27' }], | |
[{'key': 'name' ,'value': 'cnagy'}, {'key': 'schlong' ,'value': '26' }], | |
], | |
'additionalData': [ | |
[{'key': 'id' ,'value': 'jimbo'}, {'key': 'flash' ,'value': 'yes' }], | |
[{'key': 'id' ,'value': 'cnagius'}, {'key': 'flash' ,'value': 'no' }], | |
], | |
'meta': [ | |
1, | |
0, | |
], | |
'eventIdThing': [ | |
{ 'somewhere': 13 }, | |
{ 'somewhere': 37 }, | |
], | |
}, | |
) | |
df.index = df.eventIdThing.map(lambda d: d['somewhere']).rename('EventId') | |
def dic(d): | |
return dict(map( | |
lambda kv: ( | |
kv['key'], | |
kv['value'] | |
), | |
d | |
)) | |
df = pd.concat( | |
{ | |
'accountData': df.accountData.map(dic).apply(pd.Series), | |
'additionalData': df.additionalData.map(dic).apply(pd.Series), | |
'meta': df.meta, | |
}, | |
axis=1 | |
) | |
tdf = df.T # or use the transpose instead if more convenient | |
def render(expr): | |
print(f'-----< {expr} >-----') | |
exec(f'print({expr})') | |
print() | |
list(map( | |
render, | |
[ | |
'df', | |
'df.accountData.name', | |
'df.accountData.name[13]', | |
'tdf', | |
'tdf[13]', | |
] | |
)) | |
Author
Jim-Holmstroem
commented
Mar 6, 2021
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment