Created
January 29, 2019 11:47
-
-
Save elena-roff/1f3e92e7964f413549a39878d94f3296 to your computer and use it in GitHub Desktop.
MongoDB flow for transforming JSON docs into pd.DataFrame
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pymongo import MongoClient | |
def get_db(): | |
# For local use | |
from pymongo import MongoClient | |
# if your db is run locally | |
client = MongoClient(host='localhost', port=27017) | |
print(client.list_database_names()) | |
return client.warehouse | |
# database | |
db = get_db() | |
# collections | |
collection1 = db.collection1 | |
collection2 = db.collections2 | |
# some aggregation query | |
# that gets a table of json documents that you need | |
query = [ | |
{}, ... | |
] | |
# documents | |
docs = collection1.aggregate(query) | |
# and transform it into df for easy data wrangling | |
df = pd.DataFrame() | |
for doc in docs: | |
df = df.append(doc, ignore_index=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment