Created
March 17, 2016 16:08
-
-
Save vinovator/80e3586ce70e12c58714 to your computer and use it in GitHub Desktop.
Script to interact with MongoDB using pymongo driver
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# mongo_test_restaurants.py | |
# Python 2.7.6 | |
""" | |
Test script to connect to MongoDB collections using pymongo library | |
Connects to an already imported connection named "restaurants" | |
source - https://docs.mongodb.org/getting-started/python/ | |
""" | |
from pymongo import MongoClient, ASCENDING, DESCENDING | |
from datetime import datetime | |
# create a connection | |
# If no parameters passed assumes localhost and port 27017 | |
client = MongoClient() | |
# connect to MongoDB database | |
db = client.test | |
# db = client["test"] # This is also valid | |
# Connect to a collection | |
restaurants = db.restaurants | |
# coll = db["restaurants"] # This is also valid | |
""" QUERYING DATA """ | |
# Query all the documents in collection | |
cursor1 = restaurants.find() # returns pymongo.cursor.Cursor type | |
# cursor = db.restaurants.find() # This is also valid | |
# Query documents with filter criteria | |
cursor2 = restaurants.find({"borough": "Manhattan"}) | |
# Query documents by a field in embedded document | |
cursor3 = restaurants.find({"address.zipcode": "10075"}) | |
""" Conditional Operators | |
greater than - $gt | |
less than - $lt | |
OR - key as "$or" and value as List of dicts | |
AND - One dict with multiple keys | |
""" | |
# Querying using an operator | |
cursor4 = restaurants.find({"grades.score": {"$gt": 30}}) | |
# Logical AND - one dict with multiple keys | |
cursor5 = restaurants.find({"cuisine": "Italian", "address.zipcode": "10075"}) | |
# Logical OR - key as "$or" and value as List of dicts | |
cursor6 = restaurants.find({"$or": [{"cuisine": "Italian"}, | |
{"address.zipcode": "10075"}]}) | |
""" Sorting queried data | |
use sort() method | |
pass sort parameters as list of tuples | |
ascending - pymongo.ASCENDING | |
descending - pymongo.DESCENDING | |
""" | |
cursor7 = restaurants.find({"cuisine": "Indian"}).sort([ | |
("borough", ASCENDING), # pymongo.ASCENDING | |
("grades.score", DESCENDING)]) # pymongo.DESCENDING | |
# Iterate the cursor and print all the documents | |
for document in cursor7: | |
print (document) # returns a dict variable | |
""" INSERTING DATA """ | |
""" | |
insert one document - insert_one() | |
insert many documents - insert_many() | |
""" | |
record = { | |
"address": { | |
"street": "2 Avenue", | |
"zipcode": "10075", | |
"building": "1480", | |
"coord": [-73.9557413, 40.7720266] | |
}, | |
"borough": "Manhattan", | |
"cuisine": "Italian", | |
"grades": [ | |
{ | |
"date": datetime.strptime("2014-10-01", "%Y-%m-%d"), | |
"grade": "A", | |
"score": 11 | |
}, | |
{ | |
"date": datetime.strptime("2014-01-16", "%Y-%m-%d"), | |
"grade": "B", | |
"score": 17 | |
} | |
], | |
"name": "Vella", | |
"restaurant_id": "41704620" | |
} | |
result = restaurants.insert_one(record) | |
print type(result) # <class 'pymongo.results.InsertOneResult'> | |
print result.inserted_id | |
""" UPDATING DATA """ | |
""" | |
update one document - update_one() | |
update many documents - update_many() | |
Replace an entire document, except for _id - replace_one() | |
Update parameters | |
- filter document (similar to query data) | |
- update document (with update operators) | |
- Optional upsert parameter | |
update operators - https://docs.mongodb.org/manual/reference/operator/update/) | |
""" | |
# To update an embedded field from the first matched document | |
update_result = restaurants.update_one( | |
{"restaurant_id": "41156888"}, # Filter document | |
{"$set": {"address.street": "East 32nd street"}} # update document | |
) | |
print update_result.matched_count | |
print update_result.modified_count | |
print type(update_result) # <class 'pymongo.results.UpdateResult'> | |
# Update multiple documents | |
update_result2 = restaurants.update_many( | |
{"address.zipcode": "10016", "cuisine": "Other"}, # Filter documet | |
{ | |
"$set": {"cuisine": "Category to be determined"}, | |
"$currentDate": {"lastModified": True} | |
} # update document | |
) | |
print update_result2.matched_count | |
print update_result2.modified_count | |
print type(update_result2) # <class 'pymongo.results.UpdateResult'> | |
""" DELETING DATA """ | |
""" | |
delete single document - delete_one() | |
delete mulitple documents - delete_many() | |
to drop a collection - restaurants.drop() | |
""" | |
delete_result = restaurants.delete_many({"borough": "Manhattan"}) | |
print delete_result.deleted_count | |
print type(delete_result) # <class 'pymongo.results.DeleteResult'> | |
""" DATA AGGREGATION """ | |
""" | |
To aggregate data (such as grouping or total or count) - aggregate() | |
db.collection.aggregate([<stage1>,<stage2>,..]) | |
each stage is processed sequentially | |
""" | |
# Group documents and calculate count | |
agg_cursor = restaurants.aggregate( | |
[ | |
{"$group": {"_id": "$borough", # Group by "borough" field | |
"count": {"$sum": 1}}} # count by "$sum" accumulator | |
] | |
) | |
for document in agg_cursor: | |
print (document) | |
# Filter and group documents | |
agg_cursor2 = restaurants.aggregate( | |
[ | |
{"$match": {"borough": "Queens", "cuisine": "Brazilian"}}, | |
{"$group": {"_id": "$address.zipcode", "count": {"$sum": 1}}} | |
] | |
) | |
for document in agg_cursor2: | |
print (document) | |
""" Indexing Data """ | |
""" | |
to create single index or compound index - create_index() | |
index is automatically created for _id | |
only creates index if index does not exist | |
""" | |
# To create a single index | |
idx = restaurants.create_index([("cuisine", ASCENDING)]) | |
print idx # cuisine_1 | |
print type(idx) # <type 'unicode'> | |
# To create a compound index | |
idx2 = restaurants.create_index([ | |
("cuisine", ASCENDING), | |
("address.zipcode", DESCENDING) | |
]) | |
print idx2 # cuisine_1_address.zipcode_-1 | |
print type(idx2) # <type 'unicode'> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment