Skip to content

Instantly share code, notes, and snippets.

@jaklinger
Created September 26, 2019 12:50
Show Gist options
  • Save jaklinger/58312f66efb7143c712059ca61955f3a to your computer and use it in GitHub Desktop.
Save jaklinger/58312f66efb7143c712059ca61955f3a to your computer and use it in GitHub Desktop.
Write data from mongodb to json files, in chunks.
import pymongo
from pymongo import MongoClient
import pandas as pd
def mongo_to_files(usr_name, pwd, address, db_name, collection,
out_path='out{}-{}.json', chunk_size=10000, port=27017):
client = MongoClient(f'mongodb://{usr_name}:{pwd}@{address}', port)
db = client[db_name]
collection = db[collection]
total = collection.count()
n = 0
while n < total:
data = list(collection.find().skip(n).limit(chunk_size))
for row in data:
row.pop('_id')
filename = out_path.format(n, n+chunk_size)
pd.DataFrame(data).to_json(filename, orient='records', force_ascii=False)
n += len(data)
return n
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment