Skip to content

Instantly share code, notes, and snippets.

@catawbasam
Created September 4, 2021 17:28
Show Gist options
  • Save catawbasam/c52dbc22edb9e9c624b2cac9099f812c to your computer and use it in GitHub Desktop.
Save catawbasam/c52dbc22edb9e9c624b2cac9099f812c to your computer and use it in GitHub Desktop.
load ndjson into a pandas dataframe then save to sqlite
"""load ndjson into a Pandas dataframe. then save to sqlite. """
import json
import pandas as pd
#load
path = 'data.ndjson'
with open(path,'rt', encoding='utf-8') as f:
lines = f.readlines()
print('# lines', len(lines))
json_array = []
for l in lines:
json_array.append( json.loads(l))
df = pd.DataFrame(json_array)
print(df.columns)
df2 = df[[ 'username', 'bio',
'posts', 'media', 'comments',
'likes', 'followed', 'following',
'joined',
'lastseents',
'state',
'userid',
'profilePhoto',
'verified', 'rss', 'integration'
]] # 'badges', -- array need to be string or store only 1st one.
#%%
######################
import sqlite3
# Create your connection.
#cnx = sqlite3.connect(':memory:')
cnx = sqlite3.connect('data.db')
df2.to_sql(name='user1000', con=cnx)
# round trip test
df3 = pd.read_sql('select * from user1000', cnx)
print('done')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment