$group with $lookup - mongodb

I have a question in MongoDB query.
I have the following users collection:
{
"_id" : ObjectId("5aa03bf97d6e1d28a020f488"),
"name":"A1",
"interests" : [
ObjectId("5aa03b877d6e1d28a020f484"),
ObjectId("5aa03bb47d6e1d28a020f485")
]
},
{
"_id" : ObjectId("5affd69339f67335303ddf77"),
"name":"A2",
"interests" : [
ObjectId("5aa03b877d6e1d28a020f484")
]
},
{
"_id" : ObjectId("5affd69339f673ddfjfhri45"),
"name":"A3",
"interests" : [
ObjectId("5aa03bb47d6e1d28a020f485"),
]
},
{
"_id" : ObjectId("5affd69339f67365656ddfg4f"),
"name":"A4",
"interests" : [
ObjectId("5aa16eb8890cbb4c582e8a38"),
]
}
The interests collection look li the following example:
{
"_id" : ObjectId("5aa16eb8890cbb4c582e8a38"),
"name" : "Swimming",
},
{
"_id" : ObjectId("5aa03bb47d6e1d28a020f485"),
"name" : "Basketball",
},
{
"_id" : ObjectId("5aa03b877d6e1d28a020f484"),
"name" : "Fishing",
}
I want to write a query that counts for the interests types of all users:
the expected result is like:
[
{
"name":"fishing"
"count":21
},
{
"name":"Basketball"
"count":15
}
]
Thanks for helpers :)

If you have mongodb 3.6 then you can try below aggregation
db.collection.aggregate([
{ "$lookup": {
"from": Intrest.collection.name,
"let": { "interests": "$interests" },
"pipeline": [
{ "$match": {
"$expr": { "$in": [ "$_id", "$$interests" ] },
"name": "Swimming",
}}
],
"as": "interests"
}},
{ "$unwind": "$interests" },
{ "$group": {
"_id": "$interests.name",
"count": { "$sum": 1 }
}},
{ "$project": {
"name": "$_id", "count": 1
}}
])
And if you want to group with interests name
db.collection.aggregate([
{ "$lookup": {
"from": Intrest.collection.name,
"let": { "interests": "$interests" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$interests" ] } }}
],
"as": "interests"
}},
{ "$unwind": "$interests" },
{ "$group": {
"_id": "$interests.name",
"count": { "$sum": 1 }
}},
{ "$project": {
"name": "$_id", "count": 1
}}
])

db.collection.aggregate(
{
$group: {
_id: '$interests'
}
},
{
$group: {
_id: '$name',
count: {
$sum: 1
}
}
}
)

Related

Simple MongoDB Aggregation

I'm a bit confused on how to group using aggregation but still be able to extract specific values from arrays:
db.collection.aggregate([
{ "$unwind": f"${stat_type}" },
{
"$group": {
"_id": "$userId",
"value" : { "$max" : f"${stat_type}.stat_value" },
"character" : f"${stat_type}.character_name", <-- how do I extract this value that matches where the $max from above is grabbed.
}
},
{ "$sort": { "value": -1 }},
{ '$limit' : 30 }
])
Sample Entries:
{
'name' : "Tony",
'userId' : 12345,
'damage_dealt' : [
"character_name" : "James",
"stat_value" : 100243
]
}
{
'name' : "Jimmy",
'userId' : 12346,
'damage_dealt' : [
"character_name" : "James",
"stat_value" : 1020243
]
}
{
'name' : "Tony",
'userId' : 12345,
'damage_dealt' : [
"character_name" : "Lebron",
"stat_value" : 99900243
]
}
A sample output for what I'm looking for is below:
[
{
'_id':12345,
'user' : 'Tony'
'character_name' : 'Lebron',
'stat_value' : 99900243
},
{
'_id':12346,
'user' : 'Jimmy'
'character_name' : 'James',
'stat_value' : 1020243
}
]
You can use the $top accumulator to achieve the desired result. Like this:
db.collection.aggregate([
{
"$unwind": "$damage_dealt"
},
{
"$group": {
"_id": "$userId",
"value": {
$top: {
output: {
character_name: "$damage_dealt.character_name",
stat_value: "$damage_dealt.stat_value"
},
sortBy: {
"damage_dealt.stat_value": -1
}
}
},
}
},
{
"$project": {
character_name: "$value.character_name",
stat_value: "$value.stat_value"
}
},
{
"$sort": {
"stat_value": -1
}
},
{
"$limit": 30
}
])
Playground link.
Or collects all the group elements in an array, and the max stat_value, then pick the object from the array containing the max stat_value.
db.collection.aggregate([
{
"$unwind": "$damage_dealt"
},
{
"$group": {
"_id": "$userId",
"max_stat": {
"$max": "$damage_dealt.stat_value"
},
"damages": {
"$push": {
name: "$name",
damage_value: "$damage_dealt"
}
}
}
},
{
"$project": {
"damages": {
"$arrayElemAt": [
{
"$filter": {
"input": "$damages",
"as": "damage",
"cond": {
"$eq": [
"$$damage.damage_value.stat_value",
"$max_stat"
]
}
}
},
0
]
}
}
},
{
"$project": {
"character_name": "$damages.damage_value.character_name",
"stat_value": "$damages.damage_value.stat_value",
"name": "$damages.name"
}
},
{
"$sort": {
"stat_value": -1
}
},
{
"$limit": 30
}
])
Playground link.
Here's another way you could do it.
db.collection.aggregate([
{
"$group": {
"_id": "$userId",
"user": {"$first": "$name"},
"damage_dealts": {"$push": "$damage_dealt"},
"maxStat": {"$max": {"$first": "$damage_dealt.stat_value"}}
}
},
{
"$set": {
"outChar": {
"$first": {
"$arrayElemAt": [
"$damage_dealts",
{"$indexOfArray": ["$damage_dealts.stat_value", "$maxStat"]}
]
}
}
}
},
{
"$project": {
"user": 1,
"character_name": "$outChar.character_name",
"stat_value": "$outChar.stat_value"
}
},
{"$sort": {"stat_value": -1}},
{"$limit": 30}
])
Try it on mongoplayground.net.

How to push all values in single array in mongodb

Colleges
/* 1 createdAt:5/9/2019, 7:00:04 PM*/
{
"_id" : ObjectId("5cd42b5c65b41027845938ae"),
"clgID" : "100",
"name" : "Anna University"
},
/* 2 createdAt:5/9/2019, 7:00:04 PM*/
{
"_id" : ObjectId("5cd42b5c65b41027845938ad"),
"clgID" : "200",
"name" : "National"
}
Subjects:
/* 1 createdAt:5/9/2019, 7:03:24 PM*/
{
"_id" : ObjectId("5cd42c2465b41027845938b0"),
"name" : "Hindi",
"members" : {
"student" : [
"123"
]
},
"college" : {
"collegeID" : "100"
}
},
/* 2 createdAt:5/9/2019, 7:03:24 PM*/
{
"_id" : ObjectId("5cd42c2465b41027845938af"),
"name" : "English",
"members" : {
"student" : [
"456",
"789"
]
},
"college" : {
"collegeID" : "100"
}
}
Here i am having two collection and i want to join Colleges table is clgID and Subjects table iscollege.collegeID , then i want to take members.student values and push into single array based on college.collegeID.
My Expected Output
{
"GroupDetails" : [ ],
"clgName" : "National"
},
{
"GroupDetails" : [
"123",
"456",
"789"
],
"clgName" : "Anna University"
}
My Code
db.Colleges.aggregate([
{ $match : { "clgID" : { $in : ["100", "200"] } } },
{ $lookup: { from: "Subjects", localField: "clgID", foreignField: "college.collegeID", as: "GroupDetails" } },
//{ $unwind: "$GroupDetails" },
{ $project: { '_id' : false, 'clgName' : '$name', 'GroupDetails.members.student' : true } }
])
I am getting like this
/* 1 */
{
"GroupDetails" : [ ],
"clgName" : "National"
},
/* 2 */
{
"GroupDetails" : [
{
"members" : {
"student" : [
"456"
]
}
},
{
"members" : {
"student" : [
"123"
]
}
}
],
"clgName" : "Anna University"
}
You can use below aggregation with mongodb 3.6 and above
db.Colleges.aggregate([
{ "$match": { "clgID": { "$in": ["100", "200"] } } },
{ "$lookup": {
"from": "Subjects",
"let": { "clgId": "$clgID" },
"pipeline": [
{ "$match": { "$expr": { "$eq": ["$$clgId", "$college.collegeID"] } } },
{ "$group": {
"_id": "$college.collegeID",
"groupDetails": { "$push": "$members.student" }
}},
{ "$project": {
"groupDetails": {
"$reduce": {
"input": "$groupDetails",
"initialValue": [],
"in": { "$concatArrays": ["$$this", "$$value"] }
}
}
}}
],
"as": "clg"
}},
{ "$unwind": { "path": "$clg", "preserveNullAndEmptyArrays": true } },
{ "$project": {
"clgName": "$name",
"groupDetails": { "$ifNull": ["$clg.groupDetails", []] }
}}
])
MongoPlayground
Or with the mongodb 3.4 and below
db.Colleges.aggregate([
{ "$match": { "clgID": { "$in": ["100", "200"] }}},
{ "$lookup": {
"from": "Subjects",
"localField": "clgID",
"foreignField": "college.collegeID",
"as": "clg"
}},
{ "$unwind": { "path": "$clg", "preserveNullAndEmptyArrays": true }},
{ "$group": {
"_id": { "clgId": "$clg.college.collegeID", "_id": "$_id" },
"groupDetails": { "$push": "$clg.members.student" },
"clgName": { "$first": "$name" }
}},
{ "$project": {
"_id": "$_id._id",
"clgName": 1,
"groupDetails": {
"$reduce": {
"input": "$groupDetails",
"initialValue": [],
"in": { "$concatArrays": ["$$this", "$$value"] }
}
}
}}
])
MongoPlayground

MongoDB aggregate nested grouping

I have Asset collection which has data like
{
"_id" : ObjectId("5bfb962ee2a301554915"),
"users" : [
"abc.abc#abc.com",
"abc.xyz#xyz.com"
],
"remote" : {
"source" : "dropbox",
"bytes" : 1234
}
{
"_id" : ObjectId("5bfb962ee2a301554915"),
"users" : [
"pqr.pqr#pqr.com",
],
"remote" : {
"source" : "google_drive",
"bytes" : 785
}
{
"_id" : ObjectId("5bfb962ee2a301554915"),
"users" : [
"abc.abc#abc.com",
"abc.xyz#xyz.com"
],
"remote" : {
"source" : "gmail",
"bytes" : 5647
}
What I am looking for is group by users and get the total of bytes according to its source like
{
"_id" : "abc.abc#abc.com",
"bytes" : {
"google_drive": 1458,
"dropbox" : 1254
}
}
I am not getting how to get the nested output using grouping.
I have tried with the query
db.asset.aggregate(
[
{$unwind : '$users'},
{$group:{
_id:
{'username': "$users",
'source': "$remote.source",
'total': {$sum: "$remote.bytes"}} }
}
]
)
This way I am getting the result with the repeated username.
With MongoDb 3.6 and newer, you can leverage the use of $arrayToObject operator within a $mergeObjects expression and a $replaceRoot pipeline to get the desired result.
You would need to run the following aggregate pipeline though:
db.asset.aggregate([
{ "$unwind": "$users" },
{ "$group": {
"_id": {
"users": "$users",
"source": "$remote.source"
},
"totalBytes": { "$sum": "$remote.bytes" }
} },
{ "$group": {
"_id": "$_id.users",
"counts": {
"$push": {
"k": "$_id.source",
"v": "$totalBytes"
}
}
} },
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "bytes": { "$arrayToObject": "$counts" } },
"$$ROOT"
]
}
} },
{ "$project": { "counts": 0 } }
])
which yields
/* 1 */
{
"bytes" : {
"gmail" : 5647.0,
"dropbox" : 1234.0
},
"_id" : "abc.abc#abc.com"
}
/* 2 */
{
"bytes" : {
"google_drive" : 785.0
},
"_id" : "pqr.pqr#pqr.com"
}
/* 3 */
{
"bytes" : {
"gmail" : 5647.0,
"dropbox" : 1234.0
},
"_id" : "abc.xyz#xyz.com"
}
using the above sample documents.
You have to use $group couple of times here. First with the users and the source and count the total number of bytes using $sum.
And second with the users and $push the source and the bytes into an array
db.collection.aggregate([
{ "$unwind": "$users" },
{ "$group": {
"_id": {
"users": "$users",
"source": "$remote.source"
},
"bytes": { "$sum": "$remote.bytes" }
}},
{ "$group": {
"_id": "$_id.users",
"data": {
"$push": {
"source": "$_id.source",
"bytes": "$bytes"
}
}
}}
])
And even if you want to convert the source and the bytes into key value format then replace the last $group stage with the below two stages.
{ "$group": {
"_id": "$_id.users",
"data": {
"$push": {
"k": "$_id.source",
"v": "$bytes"
}
}
}},
{ "$project": {
"_id": 0,
"username": "$_id",
"bytes": { "$arrayToObject": "$data" }
}}

Get Count of specific field mongodb

I am using below query to get combined data from users and project collections:
db.collection.aggregate([
{
"$group": {
"_id": "$userId",
"projectId": { "$push": "$projectId" }
}
},
{
"$lookup": {
"from": "users",
"let": { "userId": "$_id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$_id", "$$userId" ] }}},
{ "$project": { "firstName": 1 }}
],
"as": "user"
}
},
{ "$unwind": "$user" },
{
"$lookup": {
"from": "projects",
"let": { "projectId": "$projectId" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$projectId" ] }}},
{ "$project": { "projectName": 1 }}
],
"as": "projects"
}
}
])
and it results like below:
[
{
"_id": "5c0a29e597e71a0d28b910aa",
"projectId": [
"5c0a2a8897e71a0d28b910ac",
"5c0a4083753a321c6c4ee024"
],
"user": {
"_id": "5c0a29e597e71a0d28b910aa",
"firstName": "Amit"
},
"projects": [
{
"_id": "5c0a2a8897e71a0d28b910ac",
"projectName": "LN-PM"
},
{
"_id": "5c0a4083753a321c6c4ee024",
"projectName": "fallbrook winery"
}
]
},
{
"_id": "5c0a29c697e71a0d28b910a9",
"projectId": [
"5c0a4083753a321c6c4ee024"
],
"user": {
"_id": "5c0a29c697e71a0d28b910a9",
"firstName": "Rajat"
},
"projects": [
{
"_id": "5c0a4083753a321c6c4ee024",
"projectName": "fallbrook winery"
}
]
}
]
Now i have another table "Worksheets" and want to include hours field in projects Array, which will be calculated from the worksheets table by specifying the projectId which is _id in the projects array. It will be find in worksheet table and hours will be incremented how many times this _id has in worksheets table. Below is my worksheet collection:
{
"_id" : ObjectId("5c0a4efa91b5021228681f7a"),
"projectId" : ObjectId("5c0a4083753a321c6c4ee024"),
"hours" : 8,
"userId" : ObjectId("5c0a29c697e71a0d28b910a9"),
"__v" : 0
}
{
"_id" : ObjectId("5c0a4f4191b5021228681f7c"),
"projectId" : ObjectId("5c0a2a8897e71a0d28b910ac"),
"hours" : 6,
"userId" : ObjectId("5c0a29e597e71a0d28b910aa"),
"__v" : 0
}
The result will look like below:
{
"_id": "5c0a29c697e71a0d28b910a9",
"projectId": [
"5c0a4083753a321c6c4ee024"
],
"user": {
"_id": "5c0a29c697e71a0d28b910a9",
"firstName": "Rajat"
},
"projects": [
{
"_id": "5c0a4083753a321c6c4ee024",
"projectName": "fallbrook winery",
"hours":8
}
]
}
You can use below aggregation
$lookup 3.6 nested syntax allows you to join nested collection inside the $lookup pipeline. You can perform all the aggregation inside the nested $lookup pipline
db.collection.aggregate([
{ "$group": {
"_id": "$userId",
"projectId": { "$push": "$projectId" }
}},
{ "$lookup": {
"from": "users",
"let": { "userId": "$_id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$_id", "$$userId" ] }}},
{ "$project": { "firstName": 1 }}
],
"as": "user"
}},
{ "$unwind": "$user" },
{ "$lookup": {
"from": "projects",
"let": { "projectId": "$projectId" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$projectId" ] }}},
{ "$lookup": {
"from": "worksheets",
"let": { "projectId": "$_id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$projectId", "$$projectId" ] }}},
{ "$group": {
"_id": "$projectId",
"totalHours": { "$sum": "$hours" }
}}
],
"as": "workHours"
}}
{ "$project": {
"projectName": 1,
"hours": { "$arrayElemAt": ["$workHours.totalHours", 0] }
}}
],
"as": "projects"
}}
])

Filter Array Content to a Query containing $concatArrays

Given this function, I have a data set that I am querying. The data looks like this:
db.activity.insert(
{
"_id" : ObjectId("5908e64e3b03ca372dc945d5"),
"startDate" : ISODate("2017-05-06T00:00:00Z"),
"details" : [
{
"code" : "2",
"_id" : ObjectId("5908ebf96ae5003a4471c9b2"),
"walkDistance" : "03",
"jogDistance" : "01",
"runDistance" : "08",
"sprintDistance" : "01"
}
]
}
)
db.activity.insert(
{
"_id" : ObjectId("58f79163bebac50d5b2ae760"),
"startDate" : ISODate("2017-05-07T00:00:00Z"),
"details" : [
{
"code" : "2",
"_id" : ObjectId("58f7948fbebac50d5b2ae7f2"),
"walkDistance" : "01",
"jogDistance" : "02",
"runDistance" : "09",
"sprintDistance" : ""
}
]
}
)
Using this function, thanks to Neil Lunn, I am able to get my desired output:
db.activity.aggregate([
{ "$project": {
"_id": 0,
"unique": {
"$filter": {
"input": {
"$setDifference": [
{ "$concatArrays": [
"$details.walkDistance",
"$details.jogDistance",
"$details.runDistance",
"$details.sprintDistance"
]},
[]
]
},
"cond": { "$ne": [ "$$this", "" ] }
}
}
}},
{ "$unwind": "$unique" },
{ "$group": {
"_id": null,
"uniqueArray": { "$addToSet": "$unique" }
}}
])
However, I cannot add a match statement to the beginning.
db.activity.aggregate([
{$match: {"startDate" : ISODate("2017-05-06T00:00:00Z"), "details.code": "2" },
{$unwind: '$details'},
{$match: {"startDate" : ISODate("2017-05-06T00:00:00Z"), "details.code": "2" },
{ "$project": {
"_id": 0,
"unique": {
"$filter": {
"input": {
"$setDifference": [
{ "$concatArrays": [
"$details.walkDistance",
"$details.jogDistance",
"$details.runDistance",
"$details.sprintDistance"
]},
[]
]
},
"cond": { "$ne": [ "$$this", "" ] }
}
}
}},
{ "$unwind": "$unique" },
{ "$group": {
"_id": null,
"uniqueArray": { "$addToSet": "$unique" }
}}
])
Because it gives an error message of:
> $concatArrays only supports arrays, not string
How can I modify this query so that a $match statement can be added?
Don't $unwind the array you are feeding to $concatArrays. Instead apply $filter to only extract the matching values. And as stated, we can just use $setUnion for the 'unique concatenation' instead:
db.activity.aggregate([
{ "$match": { "startDate" : ISODate("2017-05-06T00:00:00Z"), "details.code": "2" } },
{ "$project": {
"_id": 0,
"unique": {
"$let": {
"vars": {
"filtered": {
"$filter": {
"input": "$details",
"cond": { "$eq": [ "$$this.code", "2" ] }
}
}
},
"in": {
"$setDifference": [
{ "$setUnion": [
"$$filtered.walkDistance",
"$$filtered.jogDistance",
"$$filtered.runDistance",
"$$filtered.sprintDistance"
]},
[""]
]
}
}
}
}},
{ "$unwind": "$unique" },
{ "$group": {
"_id": null,
"uniqueArray": { "$addToSet": "$unique" }
}}
])
Using $let makes things a bit cleaner syntax wise since you don't need to specify multiple $map and $filter statements "inline" as the source for $setUnion