Created
September 26, 2019 12:50
-
-
Save jaklinger/58312f66efb7143c712059ca61955f3a to your computer and use it in GitHub Desktop.
Write data from mongodb to json files, in chunks.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pymongo | |
from pymongo import MongoClient | |
import pandas as pd | |
def mongo_to_files(usr_name, pwd, address, db_name, collection, | |
out_path='out{}-{}.json', chunk_size=10000, port=27017): | |
client = MongoClient(f'mongodb://{usr_name}:{pwd}@{address}', port) | |
db = client[db_name] | |
collection = db[collection] | |
total = collection.count() | |
n = 0 | |
while n < total: | |
data = list(collection.find().skip(n).limit(chunk_size)) | |
for row in data: | |
row.pop('_id') | |
filename = out_path.format(n, n+chunk_size) | |
pd.DataFrame(data).to_json(filename, orient='records', force_ascii=False) | |
n += len(data) | |
return n |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment