Skip to content

Instantly share code, notes, and snippets.

@abhi11210646
Last active May 21, 2018 15:29
Show Gist options
  • Save abhi11210646/215309c2b041e58bc78cef06ee6443af to your computer and use it in GitHub Desktop.
Save abhi11210646/215309c2b041e58bc78cef06ee6443af to your computer and use it in GitHub Desktop.
All possible question's answer by aggregation framework of mongoDB for Movies DATASET. movies data | data science | aggregation | mongoDB
#### Format of Dataset
# Movies
{
"_id" : ObjectId("5aa7d4496b7a5451ba8ce741"),
"movie_id" : 1,
"title" : "Toy Story (1995)",
"genres" : "Animation|Children's|Comedy"
}
# Ratings
{
"_id" : ObjectId("5aa7d3f96b7a5451ba8b6ef5"),
"user_id" : 1,
"movie_id" : 3408,
"rating" : 4,
"ts" : 978300275
}
# Users
{
"_id" : ObjectId("5aa7d4656b7a5451ba8d7858"),
"user_id" : 1,
"gender" : "F",
"age" : 1,
"occupation" : 10,
"zip" : 48067
}
# Total movies released in this(YEAR) year:-
db.getCollection('movies').aggregate([
{$match:{"title":{$regex:"YEAR"}}},
{$project:{ "title":1,"length":{$strLenCP: "$title"} }},
{$project:{"title":1,"yearOfRelease":{$substr:["$title",{$subtract:["$length",5]},4]}}},
{$group:{_id:"$yearOfRelease",total:{"$sum":1}}}
])
# No of Movies released in year group by YEAR :-
db.getCollection('movies').aggregate([
{$project:{ "title":1,"length":{$strLenCP: "$title"} }},
{$project:{"title":1,"yearOfRelease":{$substr:["$title",{$subtract:["$length",5]},4]}}},
{$group:{_id:"$yearOfRelease",total:{"$sum":1}}}
])
# Movie status based on rating(Good or Bad)
db.getCollection('ratings').aggregate([
{
$project:{
summary:{
$cond:[{$gte:["$rating",2.5]},"Good Movie","Bad Movie"]
}
`}
}
],{allowDiskUse:true});
# Movie status based on rating(Good or Bad or Average)
db.getCollection('ratings').aggregate([
{
$project:{
summary:{
$switch:{
branches:[
{
case:{$gt:["$rating",2.5]},
then: "Good Movie"
},
{
case:{$eq:["$rating",2.5]},
then: "Average Movie"
},
{
case:{$lt:["$rating",2.5]},
then: "Bad Movie"
}
]
}
}}}
],{allowDiskUse:true});
##Number of Movies group by Geners
# Number of Movies group by Geners($group+$sort)
db.getCollection('movies').aggregate([
{
$project:{
"title":1,
"genres":{$split: ["$genres",'|']}
}
},
{
$unwind:"$genres"
},
{
$group:{
_id:"$genres",
total:{ $sum:1 }
}
}
])
# Number of Movies group by Geners($sortByCount)
db.getCollection('movies').aggregate([
{
$project:{
"title":1,
"genres":{$split: ["$genres",'|']}
}
},
{
$unwind:"$genres"
},
{ $sortByCount: "$genres" }
])
# Lowest, Highest and Average Rating of a Movie(MOVIEID)
db.getCollection('movies').aggregate([
{
$match:{"movie_id":MOVIEID}
},
{
$lookup:{
from:"ratings",
localField:"movie_id",
foreignField:"movie_id",
as:"movie_ratings"
}
},
{
$unwind:"$movie_ratings"},
{
$sort:{"movie_ratings.rating":1}},
{
$group:{
_id:"$title",
"lowestRating":{"$first":"$movie_ratings.rating"},
"highestRating":{"$last":"$movie_ratings.rating"},
"averageRating":{"$avg":"$movie_ratings.rating"}
}
}
])
# List Movies with Highest Reviews on Top
db.getCollection('movies').aggregate([
{
$lookup:{
from:"ratings",
localField:"movie_id",
foreignField:"movie_id",
as:"ratings"
}
},
{
$project:{
"movie_id":"$movie_id",
"movieName":"$title",
"noOfReviews":{$size:"$ratings"}
}
},
{
$sort:{"noOfReviews":-1}
}
],
{allowDiskUse:true})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment