Skip to content

Instantly share code, notes, and snippets.

@pierangeloc
Last active December 30, 2015 21:39
Show Gist options
  • Save pierangeloc/7888705 to your computer and use it in GitHub Desktop.
Save pierangeloc/7888705 to your computer and use it in GitHub Desktop.
mongoDB queries compendium. This is an overview of the capabilities of Mongo that I have been using during the M101J course
//find from people collection
// _id can be not only a scalar value but also a document:
db.people.insert({_id: {"bsn": "2322323", "phone_nr" : "0612313123123"}})
/** QUERIES **/
// find documents where profession exists
db.people.find({profession: {$exists: true}})
// find documents where name attribute has type String (for the correspondence between String and type
//number, refer to http://bsonspec.org/#/specification)
db.people.find({name: {$type: 2}})
// find where name attribute matches the regex (in this case ends with a)
db.people.find({name: {$regex: "a$"}})
// OR clause: the $or is an attribute of the find argument, and it contains the array of queries to form
// the or clause
db.people.find( { $or: [ {name: {$regex: "e$"} }, {age: {$exists: true} } ] } )
// find scores with score attribute > 90 OR < 50
db.scores.find({$or: [{score: {$gt: 90}}, {score: {$lt: 50}}]})
// AND clause
// find people with name lexicographically > A and ending with k
db.people.find({$and: [{name: {$gt: "A"} }, {name: {$regex: "k$"}}]})
//N.B.: No need to use AND most of the times, you can specify multiple conditions on an attribute putting
//them in a document:
db.people.find({name: {$gt: "A", $regex: "k$"}})
//N.B.: the js object that is passed to the find method is evaluated before being submitted. Therefore:
db.scores.find( { score : { $gt : 50 }, score : { $lt : 60 } } );
// is equivalent to
db.scores.find( {score : { $lt : 60 } } );
// search is polimorphic when applied to arrays, i.e. if it finds an array it matches any of the elements
// in the array; if it finds a single
// property, it matches the value of that single property.
// If the collection is:
{ "_id" : ObjectId("526523f693e6535ed27424c1"), "name" : "piero", "favorites" : [ "pasta", "pizza", "pane" ] }
{ "_id" : ObjectId("5265240b93e6535ed27424c2"), "name" : "ania", "favorites" : [ "lasagne", "pesce", "pane" ] }
{ "_id" : ObjectId("5265242193e6535ed27424c3"), "name" : "moerka", "favorites" : [ "crocchette", "pesce", "grasso" ] }
{ "_id" : ObjectId("5265259793e6535ed27424c4"), "name" : "tony rocky horror", "favorites" : "grasso" }
//then the query
db.accounts.find({favorites: "grasso"})
//produces
{ "_id" : ObjectId("5265242193e6535ed27424c3"), "name" : "moerka", "favorites" : [ "crocchette", "pesce", "grasso" ] }
{ "_id" : ObjectId("5265259793e6535ed27424c4"), "name" : "tony rocky horror", "favorites" : "grasso" }
// match a set of values in the array. The content of the array must have at least the provided set:
db.accounts.find({favorites: {$all: ["pesce", "pane"]}})
// matches the documents who contains in the attribute "favorites" at least one element contained in
// the provided list.
// This applies also to non-array types
db.accounts.find({favorites: {$in: ["pesce", "pane"]}})
/** CURSORS **/
//these actually modify the istrucitons sent to the DB server
cur = db.accounts.find()
cur.hasNext()
cur.next()
cur.limit(5) // --> limits the nr of results reacheable through this cursor to 5
cur.sort({name: -1})
cur.sort({name: -1}).limit(3)
cur.sort({name: -1}).skip(2).limit(3) //skip the first 2 and show the next 3
while(cur.hasNext()) printjson(cur.next)
/** COUNTING **/
//wherever find() is used, count() can be used
/** UPDATES **/
//1. WHOLESALE UPDATE
//it replaces the whole content of the found record, with the whole content provided in the 2nd argument.
//N.B: IT DISCARDS ANY PREVIOUS CONTENT
db.people.update({name: "Piero"}, {name: "Pierangelo", salary: 50000})
//2. SELECTIVE UPDATE
// N.B.: Default behavior of update is to update only the first document that matches the criteria
// If you want all matching docs to be updated, you must specify an extra option (3rd parameter) {multi: true}
//set one single field
db.users.update({username: "splunker"}, {$set: {"country" : "RU"}})
//increment an attribute. If the attribute does not exist, it is created and added to 0
db.users.update({username: "splunker"}, {$inc: {"contactsnr" : 1}})
// remove an attribute
db.users.update({username: "splunker"}, {$unset: {"country" : 1}})
// update single array element:
db.arrays.insert({"_id": 0, a: [1,2,3,4,5,6,7]})
db.arrays.update({"_id": 0}, {$set: {"a.2": 100}}) // updates the 3rd element of the array "a"
//arrays push and pop
//$push() push on right
db.arrays.update({"_id": 0}, {$push: {"a": 2}})
//pop one element on right, specify a > 0 argument (it doesn't matter if the arg is 1 or > 1)
db.arrays.update({"_id": 0}, {$pop: {"a": 2}})
//pop on left, specify a < 0 argument
db.arrays.update({"_id": 0}, {$push: {"a": -2}})
db.arrays.update({"_id": 0}, {$pushAll: {"a": [101, 102, 103, 104]}})
//remove a single element from the array.
// N.B.: the element specified is not the position, but the value!!! If it occurs more than once,
// it removes them all
db.arrays.update({"_id": 0}, {$pull: {"a": 4}})
db.arrays.update({"_id": 0}, {$pullAll: {"a": [4,5,6,7]}})
//$addToSet treats the array as a set, so it does not add an element if it is already included it
db.arrays.update({"_id": 0}, {$addToSet: {"a": 4}})
/** 3. REMOVE**/
db.users.remove({/* criteria object*/})
db.users.drop() // much faster than db.users.drop({}), but it does also remove the indexes
// to check the status of last operation, e.g. nr of docs updated:
db.runCommand({getLastError: 1})
/**************** INDEXES ************************/
// creates an increasing index on student_id
db.students.ensureIndex({student_id: 1})
// creates a compound index that increasing on student_id, and decreasing on class. Indexes are used also to speed up sorting. Single index ascending
// can be used also for sorting in descending order
db.students.ensureIndex({student_id: 1, class: -1})
db.students.dropIndex({student_id: 1, class: -1})
// N.B.: cannot create compound indexes where 2 of the keys are composed by arrays
// Unique Index
db.students.ensureIndex({student_id: 1}, {unique: true})
// Sparse Index: the meaning is to have an indexed property to refer only to the documents that have that property set, so the other ones (for
// which the property is null) are just ignored. If I add an unique index on a property only set for a few documents but missing for more than one,
// they key is not any more unique, because its value is 'null' for more than one.
// The solution is to mark the index as sparse, so it maps only to the documents that have the property set.
// Side effect: sorting on a property for which there is a sparse index, retrieves and sorts only the documents with the property set. The docs
// with property not set are just ignored
db.students.ensureIndex({primarySchoolName: 1}, {sparse: true})
// HINT a query
// with .hint({...}) after the query we can instruct the DB to use a certain index
// Geospatial queries
// given documents with a property representing a location, 2 size array:
{location: [40.23, 50.34]}
// we can mark it as an index with a special property for 2d locations
db.positions.ensureIndex({location: '2d', type: 1})
//and then find the locations near a certain position
db.positions.find({location: {$near: [x, y]}}) // -> shows them with increasing distance
// with spherical model we have [long, lat].
/***** AGGREGATION *****/
// aggregations work through processing blocks pipelines. Think of it as a sequence of transformations through the documents
db.products.aggregate([ { $group: { _id : "$category", num_products: {$sum: 1} } } ])
// the array passed to the aggregate function represent the series of aggregation blocks that process the documents
// 1. $project: projection, reshape. This does not select nor aggregate anything ==> 1 : 1
// 2. $match: filter, this does a filtering, a selection, corresponds to the WHERE ==> n : 1
// 3. $group: aggregation, this does the aggregation ==> n : 1
// 4. $sort: sorting ==> 1 : 1
// 5. $skip: skips some documents (for paging) ==> n : 1
// 6. $limit: limit the result (for paging) ==> n : 1
// 7. $unwind: it normalizes the data, e.g. if an attribute contains a 'tags' key
// with value [1, 4, 6], it produces 3 documents one for value 1, one
// for 4, one for 6
// to count the nr of products by manufacturer and category, in SQL we run:
// select manufacturer, category , count(*) from products group by manufacturer, category
// in MongoDB, we create the _id in the $group object containing both the $manufacturer and the $category:
db.products.aggregate([ { $group: { _id : { maker: "$manufacturer", "group name": "$category" }, "num_products": {$sum: 1} } } ])
// this way the transformation process treats as belonging to the same result document all the docs that have the same pair (manufacturer, category)
// RECOMMENDATION: use always _id: { ... } instead of just _id: "$manufacturer"
// Group operations:
// $sum: to sum the attribute value, use {$sum: "$attribute_name" }
db.products.aggregate([ { $group: { _id : { maker: "$manufacturer", "group name": "$category" }, "aggregate price": {$sum: "$price"} } } ])
// $avg
db.products.aggregate([ { $group: { _id : { category: "$category" }, "avg price" : {$avg: "$price"} } } ])
// $addToSet: this is used to create an array for each grouping. Every time it finds an existing manufacturer, it adds the category to
// the categories set
db.products.aggregate([ { $group: { _id : { maker: "$manufacturer"}, categories: {$addToSet: "$category"}}}])
// $push: it's like addToSet but it just push it on the array
// $max/$min: it updates the value only if the corresponding value found exploring the collection is bigger/smaller;
// $project: used to remove keys, add keys, rearrange keys, do simple manipulation such as toLowerCase. We can do reshape also before
// grouping, to simplify the processing
// to reshape:
db.products.aggregate([
{$project:
{
_id:0,
'maker': {$toLower:"$manufacturer"},
'details': {'category': "$category",
'price' : {"$multiply":["$price",10]}
},
'item':'$name'
}
}
])
// $match : to filter the data before/after the aggregation pipe
db.zips.aggregate([{$match: {state: 'NY'}}])
// it's equivalent to
db.zips.find({state: 'NY'})
// $sort: it works the usual way.
// $skip, $limit work nicely with sort, for pagination
db.zips.aggregate([
{$match:
{
state:"NY"
}
},
{$group:
{
_id: "$city",
population: {$sum:"$pop"},
}
},
{$project:
{
_id: 0,
city: "$_id",
population: 1,
}
},
{$sort:
{
population:-1
}
},
{$skip: 10},
{$limit: 5}
])
// $first and $last in Grouping are used to select the first document in partial grouping or last one:
db.zips.aggregate([
/* get the population of every city in every state */
{$group:
{
_id: {state:"$state", city:"$city"},
population: {$sum:"$pop"},
}
},
/* sort by state, population */
{$sort:
{"_id.state":1, "population":-1}
},
/* group by state, get the first item in each group */
{$group:
{
_id:"$_id.state",
city: {$first: "$_id.city"},
population: {$first:"$population"}
}
},
/* now sort by state again */
{$sort:
{"_id":1}
}
])
// $unwind: "$tags": it produces a new document for each value in the array, so that I can then group
// this double unwind creates a cartesian product of all the values of sizes and colors in the collections
db.inventory.aggregate([
{$unwind: "$sizes"},
{$unwind: "$colors"}
])
// Note: $unwind is the reverse of a $push operation
db.inventory.aggregate([
{$unwind: "$sizes"},
{$unwind: "$colors"},
/* create the color array */
{$group:
{
'_id': {name:"$name",size:"$sizes"},
'colors': {$push: "$colors"},
}
},
/* create the size array */
{$group:
{
'_id': {'name':"$_id.name",
'colors' : "$colors"},
'sizes': {$push: "$_id.size"}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment