Skip to content

Instantly share code, notes, and snippets.

@mieitza
Created January 3, 2018 15:19
Show Gist options
  • Save mieitza/b0b8d1670b64d26909f241941d705180 to your computer and use it in GitHub Desktop.
Save mieitza/b0b8d1670b64d26909f241941d705180 to your computer and use it in GitHub Desktop.
parallel insert
import json
import pandas as pd
from pymongo import MongoClient
def mongo_to_pandas(client, database, collection):
""" Reads mongo collection to pandas data frame."""
cur = client[database][collection].find()
df = pd.DataFrame(list(cur))
df["_id"] = df["_id"].apply(lambda x: str(x)) #convert ids to strings
return df
def pandas_to_mongo(df, database, collection):
""" Parallel inserts a data frame into a mongo collection. """
docs = json.loads(df.to_json(orient="records"))
cli = MongoClient()
cli[database][collection].insert_many(docs, ordered=False)
return
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment