Last active
December 30, 2015 21:39
-
-
Save pierangeloc/7888705 to your computer and use it in GitHub Desktop.
mongoDB queries compendium. This is an overview of the capabilities of Mongo that I have been using during the M101J course
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//find from people collection | |
// _id can be not only a scalar value but also a document: | |
db.people.insert({_id: {"bsn": "2322323", "phone_nr" : "0612313123123"}}) | |
/** QUERIES **/ | |
// find documents where profession exists | |
db.people.find({profession: {$exists: true}}) | |
// find documents where name attribute has type String (for the correspondence between String and type | |
//number, refer to http://bsonspec.org/#/specification) | |
db.people.find({name: {$type: 2}}) | |
// find where name attribute matches the regex (in this case ends with a) | |
db.people.find({name: {$regex: "a$"}}) | |
// OR clause: the $or is an attribute of the find argument, and it contains the array of queries to form | |
// the or clause | |
db.people.find( { $or: [ {name: {$regex: "e$"} }, {age: {$exists: true} } ] } ) | |
// find scores with score attribute > 90 OR < 50 | |
db.scores.find({$or: [{score: {$gt: 90}}, {score: {$lt: 50}}]}) | |
// AND clause | |
// find people with name lexicographically > A and ending with k | |
db.people.find({$and: [{name: {$gt: "A"} }, {name: {$regex: "k$"}}]}) | |
//N.B.: No need to use AND most of the times, you can specify multiple conditions on an attribute putting | |
//them in a document: | |
db.people.find({name: {$gt: "A", $regex: "k$"}}) | |
//N.B.: the js object that is passed to the find method is evaluated before being submitted. Therefore: | |
db.scores.find( { score : { $gt : 50 }, score : { $lt : 60 } } ); | |
// is equivalent to | |
db.scores.find( {score : { $lt : 60 } } ); | |
// search is polimorphic when applied to arrays, i.e. if it finds an array it matches any of the elements | |
// in the array; if it finds a single | |
// property, it matches the value of that single property. | |
// If the collection is: | |
{ "_id" : ObjectId("526523f693e6535ed27424c1"), "name" : "piero", "favorites" : [ "pasta", "pizza", "pane" ] } | |
{ "_id" : ObjectId("5265240b93e6535ed27424c2"), "name" : "ania", "favorites" : [ "lasagne", "pesce", "pane" ] } | |
{ "_id" : ObjectId("5265242193e6535ed27424c3"), "name" : "moerka", "favorites" : [ "crocchette", "pesce", "grasso" ] } | |
{ "_id" : ObjectId("5265259793e6535ed27424c4"), "name" : "tony rocky horror", "favorites" : "grasso" } | |
//then the query | |
db.accounts.find({favorites: "grasso"}) | |
//produces | |
{ "_id" : ObjectId("5265242193e6535ed27424c3"), "name" : "moerka", "favorites" : [ "crocchette", "pesce", "grasso" ] } | |
{ "_id" : ObjectId("5265259793e6535ed27424c4"), "name" : "tony rocky horror", "favorites" : "grasso" } | |
// match a set of values in the array. The content of the array must have at least the provided set: | |
db.accounts.find({favorites: {$all: ["pesce", "pane"]}}) | |
// matches the documents who contains in the attribute "favorites" at least one element contained in | |
// the provided list. | |
// This applies also to non-array types | |
db.accounts.find({favorites: {$in: ["pesce", "pane"]}}) | |
/** CURSORS **/ | |
//these actually modify the istrucitons sent to the DB server | |
cur = db.accounts.find() | |
cur.hasNext() | |
cur.next() | |
cur.limit(5) // --> limits the nr of results reacheable through this cursor to 5 | |
cur.sort({name: -1}) | |
cur.sort({name: -1}).limit(3) | |
cur.sort({name: -1}).skip(2).limit(3) //skip the first 2 and show the next 3 | |
while(cur.hasNext()) printjson(cur.next) | |
/** COUNTING **/ | |
//wherever find() is used, count() can be used | |
/** UPDATES **/ | |
//1. WHOLESALE UPDATE | |
//it replaces the whole content of the found record, with the whole content provided in the 2nd argument. | |
//N.B: IT DISCARDS ANY PREVIOUS CONTENT | |
db.people.update({name: "Piero"}, {name: "Pierangelo", salary: 50000}) | |
//2. SELECTIVE UPDATE | |
// N.B.: Default behavior of update is to update only the first document that matches the criteria | |
// If you want all matching docs to be updated, you must specify an extra option (3rd parameter) {multi: true} | |
//set one single field | |
db.users.update({username: "splunker"}, {$set: {"country" : "RU"}}) | |
//increment an attribute. If the attribute does not exist, it is created and added to 0 | |
db.users.update({username: "splunker"}, {$inc: {"contactsnr" : 1}}) | |
// remove an attribute | |
db.users.update({username: "splunker"}, {$unset: {"country" : 1}}) | |
// update single array element: | |
db.arrays.insert({"_id": 0, a: [1,2,3,4,5,6,7]}) | |
db.arrays.update({"_id": 0}, {$set: {"a.2": 100}}) // updates the 3rd element of the array "a" | |
//arrays push and pop | |
//$push() push on right | |
db.arrays.update({"_id": 0}, {$push: {"a": 2}}) | |
//pop one element on right, specify a > 0 argument (it doesn't matter if the arg is 1 or > 1) | |
db.arrays.update({"_id": 0}, {$pop: {"a": 2}}) | |
//pop on left, specify a < 0 argument | |
db.arrays.update({"_id": 0}, {$push: {"a": -2}}) | |
db.arrays.update({"_id": 0}, {$pushAll: {"a": [101, 102, 103, 104]}}) | |
//remove a single element from the array. | |
// N.B.: the element specified is not the position, but the value!!! If it occurs more than once, | |
// it removes them all | |
db.arrays.update({"_id": 0}, {$pull: {"a": 4}}) | |
db.arrays.update({"_id": 0}, {$pullAll: {"a": [4,5,6,7]}}) | |
//$addToSet treats the array as a set, so it does not add an element if it is already included it | |
db.arrays.update({"_id": 0}, {$addToSet: {"a": 4}}) | |
/** 3. REMOVE**/ | |
db.users.remove({/* criteria object*/}) | |
db.users.drop() // much faster than db.users.drop({}), but it does also remove the indexes | |
// to check the status of last operation, e.g. nr of docs updated: | |
db.runCommand({getLastError: 1}) | |
/**************** INDEXES ************************/ | |
// creates an increasing index on student_id | |
db.students.ensureIndex({student_id: 1}) | |
// creates a compound index that increasing on student_id, and decreasing on class. Indexes are used also to speed up sorting. Single index ascending | |
// can be used also for sorting in descending order | |
db.students.ensureIndex({student_id: 1, class: -1}) | |
db.students.dropIndex({student_id: 1, class: -1}) | |
// N.B.: cannot create compound indexes where 2 of the keys are composed by arrays | |
// Unique Index | |
db.students.ensureIndex({student_id: 1}, {unique: true}) | |
// Sparse Index: the meaning is to have an indexed property to refer only to the documents that have that property set, so the other ones (for | |
// which the property is null) are just ignored. If I add an unique index on a property only set for a few documents but missing for more than one, | |
// they key is not any more unique, because its value is 'null' for more than one. | |
// The solution is to mark the index as sparse, so it maps only to the documents that have the property set. | |
// Side effect: sorting on a property for which there is a sparse index, retrieves and sorts only the documents with the property set. The docs | |
// with property not set are just ignored | |
db.students.ensureIndex({primarySchoolName: 1}, {sparse: true}) | |
// HINT a query | |
// with .hint({...}) after the query we can instruct the DB to use a certain index | |
// Geospatial queries | |
// given documents with a property representing a location, 2 size array: | |
{location: [40.23, 50.34]} | |
// we can mark it as an index with a special property for 2d locations | |
db.positions.ensureIndex({location: '2d', type: 1}) | |
//and then find the locations near a certain position | |
db.positions.find({location: {$near: [x, y]}}) // -> shows them with increasing distance | |
// with spherical model we have [long, lat]. | |
/***** AGGREGATION *****/ | |
// aggregations work through processing blocks pipelines. Think of it as a sequence of transformations through the documents | |
db.products.aggregate([ { $group: { _id : "$category", num_products: {$sum: 1} } } ]) | |
// the array passed to the aggregate function represent the series of aggregation blocks that process the documents | |
// 1. $project: projection, reshape. This does not select nor aggregate anything ==> 1 : 1 | |
// 2. $match: filter, this does a filtering, a selection, corresponds to the WHERE ==> n : 1 | |
// 3. $group: aggregation, this does the aggregation ==> n : 1 | |
// 4. $sort: sorting ==> 1 : 1 | |
// 5. $skip: skips some documents (for paging) ==> n : 1 | |
// 6. $limit: limit the result (for paging) ==> n : 1 | |
// 7. $unwind: it normalizes the data, e.g. if an attribute contains a 'tags' key | |
// with value [1, 4, 6], it produces 3 documents one for value 1, one | |
// for 4, one for 6 | |
// to count the nr of products by manufacturer and category, in SQL we run: | |
// select manufacturer, category , count(*) from products group by manufacturer, category | |
// in MongoDB, we create the _id in the $group object containing both the $manufacturer and the $category: | |
db.products.aggregate([ { $group: { _id : { maker: "$manufacturer", "group name": "$category" }, "num_products": {$sum: 1} } } ]) | |
// this way the transformation process treats as belonging to the same result document all the docs that have the same pair (manufacturer, category) | |
// RECOMMENDATION: use always _id: { ... } instead of just _id: "$manufacturer" | |
// Group operations: | |
// $sum: to sum the attribute value, use {$sum: "$attribute_name" } | |
db.products.aggregate([ { $group: { _id : { maker: "$manufacturer", "group name": "$category" }, "aggregate price": {$sum: "$price"} } } ]) | |
// $avg | |
db.products.aggregate([ { $group: { _id : { category: "$category" }, "avg price" : {$avg: "$price"} } } ]) | |
// $addToSet: this is used to create an array for each grouping. Every time it finds an existing manufacturer, it adds the category to | |
// the categories set | |
db.products.aggregate([ { $group: { _id : { maker: "$manufacturer"}, categories: {$addToSet: "$category"}}}]) | |
// $push: it's like addToSet but it just push it on the array | |
// $max/$min: it updates the value only if the corresponding value found exploring the collection is bigger/smaller; | |
// $project: used to remove keys, add keys, rearrange keys, do simple manipulation such as toLowerCase. We can do reshape also before | |
// grouping, to simplify the processing | |
// to reshape: | |
db.products.aggregate([ | |
{$project: | |
{ | |
_id:0, | |
'maker': {$toLower:"$manufacturer"}, | |
'details': {'category': "$category", | |
'price' : {"$multiply":["$price",10]} | |
}, | |
'item':'$name' | |
} | |
} | |
]) | |
// $match : to filter the data before/after the aggregation pipe | |
db.zips.aggregate([{$match: {state: 'NY'}}]) | |
// it's equivalent to | |
db.zips.find({state: 'NY'}) | |
// $sort: it works the usual way. | |
// $skip, $limit work nicely with sort, for pagination | |
db.zips.aggregate([ | |
{$match: | |
{ | |
state:"NY" | |
} | |
}, | |
{$group: | |
{ | |
_id: "$city", | |
population: {$sum:"$pop"}, | |
} | |
}, | |
{$project: | |
{ | |
_id: 0, | |
city: "$_id", | |
population: 1, | |
} | |
}, | |
{$sort: | |
{ | |
population:-1 | |
} | |
}, | |
{$skip: 10}, | |
{$limit: 5} | |
]) | |
// $first and $last in Grouping are used to select the first document in partial grouping or last one: | |
db.zips.aggregate([ | |
/* get the population of every city in every state */ | |
{$group: | |
{ | |
_id: {state:"$state", city:"$city"}, | |
population: {$sum:"$pop"}, | |
} | |
}, | |
/* sort by state, population */ | |
{$sort: | |
{"_id.state":1, "population":-1} | |
}, | |
/* group by state, get the first item in each group */ | |
{$group: | |
{ | |
_id:"$_id.state", | |
city: {$first: "$_id.city"}, | |
population: {$first:"$population"} | |
} | |
}, | |
/* now sort by state again */ | |
{$sort: | |
{"_id":1} | |
} | |
]) | |
// $unwind: "$tags": it produces a new document for each value in the array, so that I can then group | |
// this double unwind creates a cartesian product of all the values of sizes and colors in the collections | |
db.inventory.aggregate([ | |
{$unwind: "$sizes"}, | |
{$unwind: "$colors"} | |
]) | |
// Note: $unwind is the reverse of a $push operation | |
db.inventory.aggregate([ | |
{$unwind: "$sizes"}, | |
{$unwind: "$colors"}, | |
/* create the color array */ | |
{$group: | |
{ | |
'_id': {name:"$name",size:"$sizes"}, | |
'colors': {$push: "$colors"}, | |
} | |
}, | |
/* create the size array */ | |
{$group: | |
{ | |
'_id': {'name':"$_id.name", | |
'colors' : "$colors"}, | |
'sizes': {$push: "$_id.size"} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment