pierangeloc · December 30, 2015 21:39
diff --git a/mongo-db-queries.js b/mongo-db-queries.js
 //find from people collection
 // _id  can be not only a scalar value but also a document:
 db.people.insert({_id: {"bsn": "2322323", "phone_nr" : "0612313123123"}})


 /** QUERIES **/
 // find documents where profession exists
 db.people.find({profession: {$exists: true}})
 // find documents where name attribute has type String (for the correspondence between String and type 
 //number, refer to http://bsonspec.org/#/specification)
 db.people.find({name: {$type: 2}})
 // find where name attribute matches the regex (in this case ends with a)
 db.people.find({name: {$regex: "a$"}})

 // OR clause: the $or is an attribute of the find argument, and it contains the array of queries to form 
 // the or clause 
 db.people.find( { $or: [ {name: {$regex: "e$"} }, {age: {$exists: true} } ] } )
 // find scores with score attribute > 90 OR < 50 
 db.scores.find({$or: [{score: {$gt: 90}}, {score: {$lt: 50}}]})

 // AND clause
 // find people with name lexicographically > A and ending with k
 db.people.find({$and: [{name: {$gt: "A"} }, {name: {$regex: "k$"}}]}) 
 //N.B.: No need to use AND most of the times, you can specify multiple conditions on an attribute putting 
 //them in a document:
 db.people.find({name: {$gt: "A", $regex: "k$"}})

 //N.B.: the js object that is passed to the find method is evaluated before being submitted. Therefore:
 db.scores.find( { score : { $gt : 50 }, score : { $lt : 60 } } );
 // is equivalent to
 db.scores.find( {score : { $lt : 60 } } );

 // search is polimorphic when applied to arrays, i.e. if it finds an array it matches any of the elements
 // in the array; if it finds a single 
 // property, it matches the value of that single property. 
 // If the collection is:
 { "_id" : ObjectId("526523f693e6535ed27424c1"), "name" : "piero", "favorites" : [  "pasta",  "pizza",  "pane" ] }
 { "_id" : ObjectId("5265240b93e6535ed27424c2"), "name" : "ania", "favorites" : [  "lasagne",  "pesce",  "pane" ] }
 { "_id" : ObjectId("5265242193e6535ed27424c3"), "name" : "moerka", "favorites" : [  "crocchette",  "pesce",  "grasso" ] }
 { "_id" : ObjectId("5265259793e6535ed27424c4"), "name" : "tony rocky horror", "favorites" : "grasso" }
 //then the query
 db.accounts.find({favorites: "grasso"})
 //produces
 { "_id" : ObjectId("5265242193e6535ed27424c3"), "name" : "moerka", "favorites" : [  "crocchette",  "pesce",  "grasso" ] }
 { "_id" : ObjectId("5265259793e6535ed27424c4"), "name" : "tony rocky horror", "favorites" : "grasso" }

 // match a set of values in the array. The content of the array must have at least the provided set:
 db.accounts.find({favorites: {$all: ["pesce", "pane"]}}) 
 // matches the documents who contains in the attribute "favorites" at least one element contained in 
 // the provided list.
 // This applies also to non-array types
 db.accounts.find({favorites: {$in: ["pesce", "pane"]}}) 

 /** CURSORS **/
 //these actually modify the istrucitons sent to the DB server
 cur = db.accounts.find()
 cur.hasNext()
 cur.next()
 cur.limit(5) // --> limits the nr of results reacheable through this cursor to 5
 cur.sort({name: -1})
 cur.sort({name: -1}).limit(3)

 cur.sort({name: -1}).skip(2).limit(3) //skip the first 2 and show the next 3

 while(cur.hasNext()) printjson(cur.next)

 /** COUNTING **/
 //wherever find() is used, count() can be used

 /** UPDATES **/
 //1. WHOLESALE UPDATE
 //it replaces the whole content of the found record, with the whole content provided in the 2nd argument.
 //N.B: IT DISCARDS ANY PREVIOUS CONTENT
 db.people.update({name: "Piero"}, {name: "Pierangelo", salary: 50000})

 //2. SELECTIVE UPDATE
 // N.B.: Default behavior of update is to update only the first document that matches the criteria
 // If you want all matching docs to be updated, you must specify an extra option (3rd parameter) {multi: true}

 //set one single field
 db.users.update({username: "splunker"}, {$set: {"country" : "RU"}})
 //increment an attribute. If the attribute does not exist, it is created and added to 0
 db.users.update({username: "splunker"}, {$inc: {"contactsnr" : 1}})
 // remove an attribute
 db.users.update({username: "splunker"}, {$unset: {"country" : 1}})

 // update single array element:
 db.arrays.insert({"_id": 0, a: [1,2,3,4,5,6,7]})
 db.arrays.update({"_id": 0}, {$set: {"a.2": 100}}) // updates the 3rd element of the array "a"
 //arrays push and pop
 //$push() push on right
 db.arrays.update({"_id": 0}, {$push: {"a": 2}})
 //pop one element on right, specify a > 0 argument (it doesn't matter if the arg is 1 or > 1)
 db.arrays.update({"_id": 0}, {$pop: {"a": 2}})
 //pop on left, specify a < 0 argument
 db.arrays.update({"_id": 0}, {$push: {"a": -2}})
 db.arrays.update({"_id": 0}, {$pushAll: {"a": [101, 102, 103, 104]}})
 //remove a single element from the array.
 // N.B.: the element specified is not the position, but the value!!! If it occurs more than once, 
 // it removes them all
 db.arrays.update({"_id": 0}, {$pull: {"a": 4}})
 db.arrays.update({"_id": 0}, {$pullAll: {"a": [4,5,6,7]}})
 //$addToSet treats the array as a set, so it does not add an element if it is already included it
 db.arrays.update({"_id": 0}, {$addToSet: {"a": 4}})

 /** 3. REMOVE**/ 
 db.users.remove({/* criteria object*/})
 db.users.drop() // much faster than db.users.drop({}), but it does also remove the indexes

 // to check the status of last operation, e.g. nr of docs updated:
 db.runCommand({getLastError: 1})



 /**************** INDEXES ************************/

 // creates an increasing index on student_id
 db.students.ensureIndex({student_id: 1})
 // creates a compound index that increasing on student_id, and decreasing on class. Indexes are used also to speed up sorting. Single index ascending
 // can be used also for sorting in descending order
 db.students.ensureIndex({student_id: 1, class: -1})
 db.students.dropIndex({student_id: 1, class: -1})

 // N.B.: cannot create compound indexes where 2 of the keys are composed by arrays

 // Unique Index
 db.students.ensureIndex({student_id: 1}, {unique: true})

 // Sparse Index: the meaning is to have an indexed property to refer only to the documents that have that property set, so the other ones (for 
 // which the property is null) are just ignored. If I add an unique index on a property only set for a few documents but missing for more than one,
 // they key is not any more unique, because its value is 'null' for more than one.
 // The solution is to mark the index as sparse, so it maps only to the documents that have the property set.
 // Side effect: sorting on a property for which there is a sparse index, retrieves and sorts only the documents with the property set. The docs
 // with property not set are just ignored
 db.students.ensureIndex({primarySchoolName: 1}, {sparse: true})

 // HINT a query
 // with .hint({...}) after the query we can instruct the DB to use a certain index

 // Geospatial queries
 // given documents with a property representing a location, 2 size array:
 {location: [40.23, 50.34]}
 // we can mark it as an index with a special property for 2d locations
 db.positions.ensureIndex({location: '2d', type: 1})
 //and then find the locations near a certain position
 db.positions.find({location: {$near: [x, y]}}) // -> shows them with increasing distance
 // with spherical model we have [long, lat]. 


 /***** AGGREGATION *****/
 // aggregations work through processing blocks pipelines. Think of it as a sequence of transformations through the documents
 db.products.aggregate([ { $group:  { _id : "$category", num_products: {$sum: 1} } } ])

 // the array passed to the aggregate function represent the series of aggregation blocks that process the documents
 // 1. $project: projection, reshape. This does not select nor aggregate anything   ==> 1 : 1
 // 2. $match: filter, this does a filtering, a selection, corresponds to the WHERE ==> n : 1
 // 3. $group: aggregation, this does the aggregation                               ==> n : 1
 // 4. $sort: sorting                                                               ==> 1 : 1
 // 5. $skip: skips some documents (for paging)                                     ==> n : 1
 // 6. $limit: limit the result (for paging)                                        ==> n : 1
 // 7. $unwind: it normalizes the data, e.g. if an attribute contains a 'tags' key
 //             with value [1, 4, 6], it produces 3 documents one for value 1, one
 //             for 4, one for 6


 // to count the nr of products by manufacturer and category, in SQL we run:
 // select manufacturer, category , count(*) from products group by manufacturer, category
 // in MongoDB, we create the _id in the $group object containing both the $manufacturer and the $category:
 db.products.aggregate([ { $group:  { _id : { maker: "$manufacturer", "group name": "$category" }, "num_products": {$sum: 1} } } ])
 // this way the transformation process treats as belonging to the same result document all the docs  that have the same pair (manufacturer, category)
 // RECOMMENDATION: use always _id: { ... } instead of just _id: "$manufacturer"

 // Group operations:
 // $sum: to sum the attribute value, use {$sum: "$attribute_name" }
 db.products.aggregate([ { $group:  { _id : { maker: "$manufacturer", "group name": "$category" }, "aggregate price": {$sum: "$price"} } } ])
 // $avg
 db.products.aggregate([ { $group:  { _id : { category: "$category" }, "avg price" : {$avg: "$price"} } } ])
 // $addToSet: this is used to create an array for each grouping. Every time it finds an existing manufacturer, it adds the category to 
 // the categories set
 db.products.aggregate([ { $group:  { _id : { maker: "$manufacturer"}, categories: {$addToSet: "$category"}}}])
 // $push: it's like addToSet but it just push it on the array
 // $max/$min: it updates the value only if the corresponding value found exploring the collection is bigger/smaller; 

 // $project: used to remove keys, add keys, rearrange keys, do simple manipulation such as toLowerCase. We can do reshape also before
 // grouping, to simplify the processing
 // to reshape: 
 db.products.aggregate([
    {$project:
     {
 	 _id:0,
 	 'maker': {$toLower:"$manufacturer"},
 	 'details': {'category': "$category",
 		     'price' : {"$multiply":["$price",10]}
 		    },
 	 'item':'$name'
     }
    }
 ])


 // $match : to filter the data before/after the aggregation pipe
 db.zips.aggregate([{$match: {state: 'NY'}}])
 // it's equivalent to
 db.zips.find({state: 'NY'})

 // $sort: it works the usual way.
 // $skip, $limit work nicely with sort, for pagination
 db.zips.aggregate([
    {$match:
     {
 	 state:"NY"
     }
    },
    {$group:
     {
 	 _id: "$city",
 	 population: {$sum:"$pop"},
     }
    },
    {$project:
     {
 	 _id: 0,
 	 city: "$_id",
 	 population: 1,
     }
    },
    {$sort:
     {
 	 population:-1
     }
    },
    {$skip: 10},
    {$limit: 5}
 ])

 // $first and $last in Grouping are used to select the first document in partial grouping or last one:
 db.zips.aggregate([
    /* get the population of every city in every state */
    {$group:
     {
 	 _id: {state:"$state", city:"$city"},
 	 population: {$sum:"$pop"},
     }
    },
     /* sort by state, population */
    {$sort: 
     {"_id.state":1, "population":-1}
    },

    /* group by state, get the first item in each group */
    {$group: 
     {
 	 _id:"$_id.state",
 	 city: {$first: "$_id.city"},
 	 population: {$first:"$population"}
     }
    },

    /* now sort by state again */
    {$sort:
     {"_id":1}
    }
 ])


 // $unwind: "$tags": it produces a new document for each value in the array, so that I can then group
 // this double unwind creates a cartesian product of all the values of sizes and colors in the collections
 db.inventory.aggregate([
  {$unwind: "$sizes"},
  {$unwind: "$colors"}
  ])

 // Note: $unwind is the reverse of a $push operation
 db.inventory.aggregate([
    {$unwind: "$sizes"},
    {$unwind: "$colors"},
    /* create the color array */
    {$group: 
     {
 	'_id': {name:"$name",size:"$sizes"},
 	 'colors': {$push: "$colors"},
     }
    },
    /* create the size array */
    {$group: 
     {
 	'_id': {'name':"$_id.name",
 		'colors' : "$colors"},
 	 'sizes': {$push: "$_id.size"}
     }
    }
	//find from people collection
	// _id can be not only a scalar value but also a document:
	db.people.insert({_id: {"bsn": "2322323", "phone_nr" : "0612313123123"}})


	/ QUERIES /
	// find documents where profession exists
	db.people.find({profession: {$exists: true}})
	// find documents where name attribute has type String (for the correspondence between String and type
	//number, refer to http://bsonspec.org/#/specification)
	db.people.find({name: {$type: 2}})
	// find where name attribute matches the regex (in this case ends with a)
	db.people.find({name: {$regex: "a$"}})

	// OR clause: the $or is an attribute of the find argument, and it contains the array of queries to form
	// the or clause
	db.people.find( { $or: [ {name: {$regex: "e$"} }, {age: {$exists: true} } ] } )
	// find scores with score attribute > 90 OR < 50
	db.scores.find({$or: [{score: {$gt: 90}}, {score: {$lt: 50}}]})

	// AND clause
	// find people with name lexicographically > A and ending with k
	db.people.find({$and: [{name: {$gt: "A"} }, {name: {$regex: "k$"}}]})
	//N.B.: No need to use AND most of the times, you can specify multiple conditions on an attribute putting
	//them in a document:
	db.people.find({name: {$gt: "A", $regex: "k$"}})

	//N.B.: the js object that is passed to the find method is evaluated before being submitted. Therefore:
	db.scores.find( { score : { $gt : 50 }, score : { $lt : 60 } } );
	// is equivalent to
	db.scores.find( {score : { $lt : 60 } } );

	// search is polimorphic when applied to arrays, i.e. if it finds an array it matches any of the elements
	// in the array; if it finds a single
	// property, it matches the value of that single property.
	// If the collection is:
	{ "_id" : ObjectId("526523f693e6535ed27424c1"), "name" : "piero", "favorites" : [ "pasta", "pizza", "pane" ] }
	{ "_id" : ObjectId("5265240b93e6535ed27424c2"), "name" : "ania", "favorites" : [ "lasagne", "pesce", "pane" ] }
	{ "_id" : ObjectId("5265242193e6535ed27424c3"), "name" : "moerka", "favorites" : [ "crocchette", "pesce", "grasso" ] }
	{ "_id" : ObjectId("5265259793e6535ed27424c4"), "name" : "tony rocky horror", "favorites" : "grasso" }
	//then the query
	db.accounts.find({favorites: "grasso"})
	//produces
	{ "_id" : ObjectId("5265242193e6535ed27424c3"), "name" : "moerka", "favorites" : [ "crocchette", "pesce", "grasso" ] }
	{ "_id" : ObjectId("5265259793e6535ed27424c4"), "name" : "tony rocky horror", "favorites" : "grasso" }

	// match a set of values in the array. The content of the array must have at least the provided set:
	db.accounts.find({favorites: {$all: ["pesce", "pane"]}})
	// matches the documents who contains in the attribute "favorites" at least one element contained in
	// the provided list.
	// This applies also to non-array types
	db.accounts.find({favorites: {$in: ["pesce", "pane"]}})

	/ CURSORS /
	//these actually modify the istrucitons sent to the DB server
	cur = db.accounts.find()
	cur.hasNext()
	cur.next()
	cur.limit(5) // --> limits the nr of results reacheable through this cursor to 5
	cur.sort({name: -1})
	cur.sort({name: -1}).limit(3)

	cur.sort({name: -1}).skip(2).limit(3) //skip the first 2 and show the next 3

	while(cur.hasNext()) printjson(cur.next)

	/ COUNTING /
	//wherever find() is used, count() can be used

	/ UPDATES /
	//1. WHOLESALE UPDATE
	//it replaces the whole content of the found record, with the whole content provided in the 2nd argument.
	//N.B: IT DISCARDS ANY PREVIOUS CONTENT
	db.people.update({name: "Piero"}, {name: "Pierangelo", salary: 50000})

	//2. SELECTIVE UPDATE
	// N.B.: Default behavior of update is to update only the first document that matches the criteria
	// If you want all matching docs to be updated, you must specify an extra option (3rd parameter) {multi: true}

	//set one single field
	db.users.update({username: "splunker"}, {$set: {"country" : "RU"}})
	//increment an attribute. If the attribute does not exist, it is created and added to 0
	db.users.update({username: "splunker"}, {$inc: {"contactsnr" : 1}})
	// remove an attribute
	db.users.update({username: "splunker"}, {$unset: {"country" : 1}})

	// update single array element:
	db.arrays.insert({"_id": 0, a: [1,2,3,4,5,6,7]})
	db.arrays.update({"_id": 0}, {$set: {"a.2": 100}}) // updates the 3rd element of the array "a"
	//arrays push and pop
	//$push() push on right
	db.arrays.update({"_id": 0}, {$push: {"a": 2}})
	//pop one element on right, specify a > 0 argument (it doesn't matter if the arg is 1 or > 1)
	db.arrays.update({"_id": 0}, {$pop: {"a": 2}})
	//pop on left, specify a < 0 argument
	db.arrays.update({"_id": 0}, {$push: {"a": -2}})
	db.arrays.update({"_id": 0}, {$pushAll: {"a": [101, 102, 103, 104]}})
	//remove a single element from the array.
	// N.B.: the element specified is not the position, but the value!!! If it occurs more than once,
	// it removes them all
	db.arrays.update({"_id": 0}, {$pull: {"a": 4}})
	db.arrays.update({"_id": 0}, {$pullAll: {"a": [4,5,6,7]}})
	//$addToSet treats the array as a set, so it does not add an element if it is already included it
	db.arrays.update({"_id": 0}, {$addToSet: {"a": 4}})

	/ 3. REMOVE/
	db.users.remove({/* criteria object*/})
	db.users.drop() // much faster than db.users.drop({}), but it does also remove the indexes

	// to check the status of last operation, e.g. nr of docs updated:
	db.runCommand({getLastError: 1})



	/************** INDEXES **********************/

	// creates an increasing index on student_id
	db.students.ensureIndex({student_id: 1})
	// creates a compound index that increasing on student_id, and decreasing on class. Indexes are used also to speed up sorting. Single index ascending
	// can be used also for sorting in descending order
	db.students.ensureIndex({student_id: 1, class: -1})
	db.students.dropIndex({student_id: 1, class: -1})

	// N.B.: cannot create compound indexes where 2 of the keys are composed by arrays

	// Unique Index
	db.students.ensureIndex({student_id: 1}, {unique: true})

	// Sparse Index: the meaning is to have an indexed property to refer only to the documents that have that property set, so the other ones (for
	// which the property is null) are just ignored. If I add an unique index on a property only set for a few documents but missing for more than one,
	// they key is not any more unique, because its value is 'null' for more than one.
	// The solution is to mark the index as sparse, so it maps only to the documents that have the property set.
	// Side effect: sorting on a property for which there is a sparse index, retrieves and sorts only the documents with the property set. The docs
	// with property not set are just ignored
	db.students.ensureIndex({primarySchoolName: 1}, {sparse: true})

	// HINT a query
	// with .hint({...}) after the query we can instruct the DB to use a certain index

	// Geospatial queries
	// given documents with a property representing a location, 2 size array:
	{location: [40.23, 50.34]}
	// we can mark it as an index with a special property for 2d locations
	db.positions.ensureIndex({location: '2d', type: 1})
	//and then find the locations near a certain position
	db.positions.find({location: {$near: [x, y]}}) // -> shows them with increasing distance
	// with spherical model we have [long, lat].


	/*** AGGREGATION ***/
	// aggregations work through processing blocks pipelines. Think of it as a sequence of transformations through the documents
	db.products.aggregate([ { $group: { _id : "$category", num_products: {$sum: 1} } } ])

	// the array passed to the aggregate function represent the series of aggregation blocks that process the documents
	// 1. $project: projection, reshape. This does not select nor aggregate anything ==> 1 : 1
	// 2. $match: filter, this does a filtering, a selection, corresponds to the WHERE ==> n : 1
	// 3. $group: aggregation, this does the aggregation ==> n : 1
	// 4. $sort: sorting ==> 1 : 1
	// 5. $skip: skips some documents (for paging) ==> n : 1
	// 6. $limit: limit the result (for paging) ==> n : 1
	// 7. $unwind: it normalizes the data, e.g. if an attribute contains a 'tags' key
	// with value [1, 4, 6], it produces 3 documents one for value 1, one
	// for 4, one for 6


	// to count the nr of products by manufacturer and category, in SQL we run:
	// select manufacturer, category , count(*) from products group by manufacturer, category
	// in MongoDB, we create the _id in the $group object containing both the $manufacturer and the $category:
	db.products.aggregate([ { $group: { _id : { maker: "$manufacturer", "group name": "$category" }, "num_products": {$sum: 1} } } ])
	// this way the transformation process treats as belonging to the same result document all the docs that have the same pair (manufacturer, category)
	// RECOMMENDATION: use always _id: { ... } instead of just _id: "$manufacturer"

	// Group operations:
	// $sum: to sum the attribute value, use {$sum: "$attribute_name" }
	db.products.aggregate([ { $group: { _id : { maker: "$manufacturer", "group name": "$category" }, "aggregate price": {$sum: "$price"} } } ])
	// $avg
	db.products.aggregate([ { $group: { _id : { category: "$category" }, "avg price" : {$avg: "$price"} } } ])
	// $addToSet: this is used to create an array for each grouping. Every time it finds an existing manufacturer, it adds the category to
	// the categories set
	db.products.aggregate([ { $group: { _id : { maker: "$manufacturer"}, categories: {$addToSet: "$category"}}}])
	// $push: it's like addToSet but it just push it on the array
	// $max/$min: it updates the value only if the corresponding value found exploring the collection is bigger/smaller;

	// $project: used to remove keys, add keys, rearrange keys, do simple manipulation such as toLowerCase. We can do reshape also before
	// grouping, to simplify the processing
	// to reshape:
	db.products.aggregate([
	{$project:
	{
	_id:0,
	'maker': {$toLower:"$manufacturer"},
	'details': {'category': "$category",
	'price' : {"$multiply":["$price",10]}
	},
	'item':'$name'
	}
	}
	])


	// $match : to filter the data before/after the aggregation pipe
	db.zips.aggregate([{$match: {state: 'NY'}}])
	// it's equivalent to
	db.zips.find({state: 'NY'})

	// $sort: it works the usual way.
	// $skip, $limit work nicely with sort, for pagination
	db.zips.aggregate([
	{$match:
	{
	state:"NY"
	}
	},
	{$group:
	{
	_id: "$city",
	population: {$sum:"$pop"},
	}
	},
	{$project:
	{
	_id: 0,
	city: "$_id",
	population: 1,
	}
	},
	{$sort:
	{
	population:-1
	}
	},
	{$skip: 10},
	{$limit: 5}
	])

	// $first and $last in Grouping are used to select the first document in partial grouping or last one:
	db.zips.aggregate([
	/* get the population of every city in every state */
	{$group:
	{
	_id: {state:"$state", city:"$city"},
	population: {$sum:"$pop"},
	}
	},
	/* sort by state, population */
	{$sort:
	{"_id.state":1, "population":-1}
	},

	/* group by state, get the first item in each group */
	{$group:
	{
	_id:"$_id.state",
	city: {$first: "$_id.city"},
	population: {$first:"$population"}
	}
	},

	/* now sort by state again */
	{$sort:
	{"_id":1}
	}
	])


	// $unwind: "$tags": it produces a new document for each value in the array, so that I can then group
	// this double unwind creates a cartesian product of all the values of sizes and colors in the collections
	db.inventory.aggregate([
	{$unwind: "$sizes"},
	{$unwind: "$colors"}
	])

	// Note: $unwind is the reverse of a $push operation
	db.inventory.aggregate([
	{$unwind: "$sizes"},
	{$unwind: "$colors"},
	/* create the color array */
	{$group:
	{
	'_id': {name:"$name",size:"$sizes"},
	'colors': {$push: "$colors"},
	}
	},
	/* create the size array */
	{$group:
	{
	'_id': {'name':"$_id.name",
	'colors' : "$colors"},
	'sizes': {$push: "$_id.size"}
	}
	}