MongoDB: Get all $matched elements individually from an array

MongoDB: Get all $matched elements individually from an array - mongodb

I'm trying to get all matched elements individually, here is the sample data and the query.
// json
[
{
"name": "Mr Cool",
"ican": [
{
"subcategory": [
{
"id": "5bffdba824488b182ec86f8d", "name": "Cricket"
},
{
"id": "5bffdba824488b182ec86f8c", "name": "Footbal"
}
],
"category": "5bffdba824488b182ec86f88",
"name": "Sports"
}
]
}
]
// query
db.collection.aggregate([
{
"$match": {
"ican.subcategory.name": { $in: ["Cricket","Football"] }
}
},
{
"$project": { "_id": 1, "name": 1, }
}
])
I'm getting the combined result, I need the individual match record. I tried $all and $elementMatch but getting the same response. how can I get the results as below. I'm using $aggregate because I will be using $geoNear pipeline for getting the nearby users.
// current result
[
{
"_id": ObjectId("5a934e000102030405000000"),
"name": "Mr Cool"
}
]
// expected result
[
{
"_id": ObjectId("5a934e000102030405000000"),
"name": "Mr Cool",
"subcategory: "Cricket"
},
{
"_id": ObjectId("5a934e000102030405000000"),
"name": "Mr Cool",
"subcategory: "Footbal"
}
]
Thank you

Try this Mongo Playground
db.col.aggregate([
{"$unwind" : "$ican"},
{"$unwind" : "$ican.subcategory"},
{"$match" : {"ican.subcategory.name": { "$in": ["Cricket","Football"] }}},
{"$group" : {"_id" : null,"data" : {"$push" : {"_id" : "$_id","name" : "$name","subcategory" : "$ican.subcategory.name"}}}},
{"$unwind" : "$data"},
{"$replaceRoot" : {"newRoot" : "$data"}}
])

You can use below aggregation without the $unwind and for better performance
db.collection.aggregate([
{ "$match": { "ican.subcategory.name": { "$in": ["Cricket","Football"] }}},
{ "$project": {
"ican": {
"$reduce": {
"input": "$ican",
"initialValue": [],
"in": {
"$concatArrays": [
{ "$filter": {
"input": {
"$map": {
"input": "$$this.subcategory",
"as": "s",
"in": { "name": "$name", "subcategory": "$$s.name" }
}
},
"as": "fil",
"cond": { "$in": ["$$fil.subcategory", ["Football"]] }
}},
"$$value"
]
}
}
}
}},
{ "$unwind": "$ican" },
{ "$replaceRoot": { "newRoot": "$ican" }}
])

Related

MongoDB. Get every element from array in new field

I have a document with a nested array array_field:
{
"_id": {
"$oid": "1"
},
"id": "1",
"array_field": [
{
"data": [
{
"regions": [
{
"result": {
"item": [
"4",
"5",
"3"
]
}
},
{
"result": {
"item": [
"5"
]
}
},
{
"result": {
"item": [
"1"
]
}
}
]
}
]
}
]
}
I need add new field, new_added_field for example, with each array element from array_field.data.regions.result.item and remove array_field from document.
For example:
{
"_id": {
"$oid": "1"
},
"id": "1",
"new_added_field": [4,5,3,5,1]
}
I think i can do this with help of $unwind or $map but have difficulties and need dome hint, how i can do it with help op aggregation?

As you said,
db.collection.aggregate([
{
"$project": {
newField: {
"$map": {
"input": "$array_field",
"as": "m",
"in": "$$m.data.regions.result.item"
}
}
},
},
{ "$unwind": "$newField" },
{ "$unwind": "$newField" },
{ "$unwind": "$newField" },
{ "$unwind": "$newField" },
{
"$group": {
"_id": "$_id",
"newField": { "$push": "$newField" }
}
}
])
Working Mongo playground

Get Count of specific field mongodb

I am using below query to get combined data from users and project collections:
db.collection.aggregate([
{
"$group": {
"_id": "$userId",
"projectId": { "$push": "$projectId" }
}
},
{
"$lookup": {
"from": "users",
"let": { "userId": "$_id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$_id", "$$userId" ] }}},
{ "$project": { "firstName": 1 }}
],
"as": "user"
}
},
{ "$unwind": "$user" },
{
"$lookup": {
"from": "projects",
"let": { "projectId": "$projectId" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$projectId" ] }}},
{ "$project": { "projectName": 1 }}
],
"as": "projects"
}
}
])
and it results like below:
[
{
"_id": "5c0a29e597e71a0d28b910aa",
"projectId": [
"5c0a2a8897e71a0d28b910ac",
"5c0a4083753a321c6c4ee024"
],
"user": {
"_id": "5c0a29e597e71a0d28b910aa",
"firstName": "Amit"
},
"projects": [
{
"_id": "5c0a2a8897e71a0d28b910ac",
"projectName": "LN-PM"
},
{
"_id": "5c0a4083753a321c6c4ee024",
"projectName": "fallbrook winery"
}
]
},
{
"_id": "5c0a29c697e71a0d28b910a9",
"projectId": [
"5c0a4083753a321c6c4ee024"
],
"user": {
"_id": "5c0a29c697e71a0d28b910a9",
"firstName": "Rajat"
},
"projects": [
{
"_id": "5c0a4083753a321c6c4ee024",
"projectName": "fallbrook winery"
}
]
}
]
Now i have another table "Worksheets" and want to include hours field in projects Array, which will be calculated from the worksheets table by specifying the projectId which is _id in the projects array. It will be find in worksheet table and hours will be incremented how many times this _id has in worksheets table. Below is my worksheet collection:
{
"_id" : ObjectId("5c0a4efa91b5021228681f7a"),
"projectId" : ObjectId("5c0a4083753a321c6c4ee024"),
"hours" : 8,
"userId" : ObjectId("5c0a29c697e71a0d28b910a9"),
"__v" : 0
}
{
"_id" : ObjectId("5c0a4f4191b5021228681f7c"),
"projectId" : ObjectId("5c0a2a8897e71a0d28b910ac"),
"hours" : 6,
"userId" : ObjectId("5c0a29e597e71a0d28b910aa"),
"__v" : 0
}
The result will look like below:
{
"_id": "5c0a29c697e71a0d28b910a9",
"projectId": [
"5c0a4083753a321c6c4ee024"
],
"user": {
"_id": "5c0a29c697e71a0d28b910a9",
"firstName": "Rajat"
},
"projects": [
{
"_id": "5c0a4083753a321c6c4ee024",
"projectName": "fallbrook winery",
"hours":8
}
]
}

You can use below aggregation
$lookup 3.6 nested syntax allows you to join nested collection inside the $lookup pipeline. You can perform all the aggregation inside the nested $lookup pipline
db.collection.aggregate([
{ "$group": {
"_id": "$userId",
"projectId": { "$push": "$projectId" }
}},
{ "$lookup": {
"from": "users",
"let": { "userId": "$_id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$_id", "$$userId" ] }}},
{ "$project": { "firstName": 1 }}
],
"as": "user"
}},
{ "$unwind": "$user" },
{ "$lookup": {
"from": "projects",
"let": { "projectId": "$projectId" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$projectId" ] }}},
{ "$lookup": {
"from": "worksheets",
"let": { "projectId": "$_id" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$projectId", "$$projectId" ] }}},
{ "$group": {
"_id": "$projectId",
"totalHours": { "$sum": "$hours" }
}}
],
"as": "workHours"
}}
{ "$project": {
"projectName": 1,
"hours": { "$arrayElemAt": ["$workHours.totalHours", 0] }
}}
],
"as": "projects"
}}
])

Filter Array Content to a Query containing $concatArrays

Given this function, I have a data set that I am querying. The data looks like this:
db.activity.insert(
{
"_id" : ObjectId("5908e64e3b03ca372dc945d5"),
"startDate" : ISODate("2017-05-06T00:00:00Z"),
"details" : [
{
"code" : "2",
"_id" : ObjectId("5908ebf96ae5003a4471c9b2"),
"walkDistance" : "03",
"jogDistance" : "01",
"runDistance" : "08",
"sprintDistance" : "01"
}
]
}
)
db.activity.insert(
{
"_id" : ObjectId("58f79163bebac50d5b2ae760"),
"startDate" : ISODate("2017-05-07T00:00:00Z"),
"details" : [
{
"code" : "2",
"_id" : ObjectId("58f7948fbebac50d5b2ae7f2"),
"walkDistance" : "01",
"jogDistance" : "02",
"runDistance" : "09",
"sprintDistance" : ""
}
]
}
)
Using this function, thanks to Neil Lunn, I am able to get my desired output:
db.activity.aggregate([
{ "$project": {
"_id": 0,
"unique": {
"$filter": {
"input": {
"$setDifference": [
{ "$concatArrays": [
"$details.walkDistance",
"$details.jogDistance",
"$details.runDistance",
"$details.sprintDistance"
]},
[]
]
},
"cond": { "$ne": [ "$$this", "" ] }
}
}
}},
{ "$unwind": "$unique" },
{ "$group": {
"_id": null,
"uniqueArray": { "$addToSet": "$unique" }
}}
])
However, I cannot add a match statement to the beginning.
db.activity.aggregate([
{$match: {"startDate" : ISODate("2017-05-06T00:00:00Z"), "details.code": "2" },
{$unwind: '$details'},
{$match: {"startDate" : ISODate("2017-05-06T00:00:00Z"), "details.code": "2" },
{ "$project": {
"_id": 0,
"unique": {
"$filter": {
"input": {
"$setDifference": [
{ "$concatArrays": [
"$details.walkDistance",
"$details.jogDistance",
"$details.runDistance",
"$details.sprintDistance"
]},
[]
]
},
"cond": { "$ne": [ "$$this", "" ] }
}
}
}},
{ "$unwind": "$unique" },
{ "$group": {
"_id": null,
"uniqueArray": { "$addToSet": "$unique" }
}}
])
Because it gives an error message of:
> $concatArrays only supports arrays, not string
How can I modify this query so that a $match statement can be added?

Don't $unwind the array you are feeding to $concatArrays. Instead apply $filter to only extract the matching values. And as stated, we can just use $setUnion for the 'unique concatenation' instead:
db.activity.aggregate([
{ "$match": { "startDate" : ISODate("2017-05-06T00:00:00Z"), "details.code": "2" } },
{ "$project": {
"_id": 0,
"unique": {
"$let": {
"vars": {
"filtered": {
"$filter": {
"input": "$details",
"cond": { "$eq": [ "$$this.code", "2" ] }
}
}
},
"in": {
"$setDifference": [
{ "$setUnion": [
"$$filtered.walkDistance",
"$$filtered.jogDistance",
"$$filtered.runDistance",
"$$filtered.sprintDistance"
]},
[""]
]
}
}
}
}},
{ "$unwind": "$unique" },
{ "$group": {
"_id": null,
"uniqueArray": { "$addToSet": "$unique" }
}}
])
Using $let makes things a bit cleaner syntax wise since you don't need to specify multiple $map and $filter statements "inline" as the source for $setUnion

MongoDB aggregate/grouping by key-value pairs

My data looks something like this:
{
"_id" : "9aa072e4-b706-47e6-9607-1a39e904a05a",
"customerId" : "2164289-4",
"channelStatuses" : {
"FOO" : {
"status" : "done"
},
"BAR" : {
"status" : "error"
}
},
"channel" : "BAR",
}
My aggregate/group looks like this:
{
"_id" : {
"customerId" : "$customerId",
"channel" : "$channel",
"status" : "$channelStatuses[$channel].status"
},
"count" : {
"$sum" : 1
}
}
So basically with the example data the group should give me a group grouped by:
{"customerId": "2164289-4", "channel": "BAR", "status": "error"}
But I cannot use []-indexing in a aggregate/group. What should I do instead?

You cannot get the result you want with the current structure using .aggregate(). You "could" change the structure to use an array rather than named keys, and the operation is actually quite simple.
So with a document like:
{
"_id" : "9aa072e4-b706-47e6-9607-1a39e904a05a",
"customerId" : "2164289-4",
"channelStatuses" : [
{
"channel": "FOO",
"status" : "done"
},
{
"channel": "BAR",
"status" : "error"
}
],
"channel" : "BAR",
}
You can then do in modern releases with $filter, $map and $arrayElemAt:
{ "$group": {
"_id": {
"customerId" : "$customerId",
"channel" : "$channel",
"status": {
"$arrayElemAt": [
{ "$map": {
"input": { "$filter": {
"input": "$chanelStatuses",
"as": "el",
"cond": { "$eq": [ "$$el.channel", "$channel" ] }
}},
"as": "el",
"in": "$$el.status"
}},
0
]
}
},
"count": { "$sum": 1 }
}}
Older versions of MongoDB are going to going to require $unwind to access the matched array element.
In MongoDB 2.6 then you can still "pre-filter" the array before unwind:
[
{ "$project": {
"customerId": 1,
"channel": 1,
"status": {
"$setDifference": [
{ "$map": {
"input": "$channelStatuses",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.channel", "$channel" ] },
"$$el.status",
false
]
}
}},
[false]
]
}
}},
{ "$unwind": "$status" },
{ "$group": {
"_id": {
"customerId": "$customerId",
"channel": "$channel",
"status": "$status"
},
"count": { "$sum": 1 }
}}
]
And anything prior to that you "filter" after $unwind instead:
[
{ "$unwind": "$channelStatuses" },
{ "$project": {
"customerId": 1,
"channel": 1,
"status": "$channelStatuses.status",
"same": { "$eq": [ "$channelStatuses.status", "$channel" ] }
}},
{ "$match": { "same": true } },
{ "$group": {
"_id": "$_id",
"customerId": { "$first": "$customerId" },
"channel": { "$first": "$channel" },
"status": { "$first": "$status" }
}},
{ "$group": {
"_id": {
"customerId": "$customerId",
"channel": "$channel",
"status": "$status"
},
"count": { "$sum": 1 }
}}
]
In a lesser version than MongoDB 2.6 you also need to $project the result of the equality test between the two fields and then $match on the result in a seperate stage. You might also note the "two" $group stages, since the first one removes any possible duplicates of the "channel" values after the filter via the $first accumulators. The following $group is exactly the same as in the previous listing.
But if you cannot change the structure and need "flexible" matching of keys where you cannot supply every name, then you must use mapReduce:
db.collection.mapReduce(
function() {
emit({
"customerId": this.customerId,
"channel": this.channel,
"status": this.channelStatuses[this.channel].status
},1);
},
function(key,values) {
return Array.sum(values);
},
{ "out": { "inline": 1 } }
)
Where of course you can use that sort of notation

MongoDB nested query using aggregate function

I have a collection "superpack", which has the nested objects. The sample document looks like below.
{
"_id" : ObjectId("56038c8cca689261baca93eb"),
"name": "Test sub",
"packs": [
{
"id": "55fbc7f6b0ce97a309b3cead",
"name": "Classic",
"packDispVal": "PACK",
"billingPts": [
{
"id": "55fbc7f6b0ce97a309b3ceab",
"name": "Classic 1 month",
"expiryVal": 1,
"amount": 20,
"topUps": [
{
"id": "55fbc7f6b0ce97a309b3cea9",
"name": "1 extra",
"amount": 8
},
{
"id": "55fbc7f6b0ce97a309b3ceaa",
"name": "2 extra",
"amount": 12
}
]
},
{
"id": "55fbc7f6b0ce97a309b3ceac",
"name": "Classic 2 month",
"expiryVal": 1,
"amount": 30,
"topUps": [
{
"id": "55fbc7f6b0ce97a309b3cea8",
"name": "3 extra",
"amount": 16
}
]
}
]
}
]
}
I need to query for the nested object topups with the id field and result should have only the selected topup object and its associated parent. I am expecting the output to like below, when i query it on topup id 55fbc7f6b0ce97a309b3cea9.
{
"_id" : ObjectId("56038c8cca689261baca93eb"),
"name": "Test sub",
"packs": [
{
"id": "55fbc7f6b0ce97a309b3cead",
"name": "Classic",
"packDispVal": "PACK",
"billingPts": [
{
"id": "55fbc7f6b0ce97a309b3ceab",
"name": "Classic 1 month",
"expiryVal": 1,
"amount": 20,
"topUps": [
{
"id": "55fbc7f6b0ce97a309b3cea9",
"name": "1 extra",
"amount": 8
}
]
}
]
}
]
}
I tried with the below aggregate query for the same. However its not returning any result. Can you please help me, what is wrong in the query?
db.superpack.aggregate( [{ $match: { "id": "55fbc7f6b0ce97a309b3cea9" } }, { $redact: {$cond: { if: { $eq: [ "$id", "55fbc7f6b0ce97a309b3cea9" ] }, "then": "$$KEEP", else: "$$PRUNE" }}} ])

Unfortunately $redact is not a viable option here based on the fact that with the recursive $$DESCEND it is basically looking for a field called "id" at all levels of the document. You cannot possibly ask to do this only at a specific level of embedding as it's all or nothing.
This means you need alternate methods of filtering the content rather than $redact. All "id" values are unique so their is no problem filtering via "set" operations.
So the most efficient way to do this is via the following:
db.docs.aggregate([
{ "$match": {
"packs.billingPts.topUps.id": "55fbc7f6b0ce97a309b3cea9"
}},
{ "$project": {
"packs": {
"$setDifference": [
{ "$map": {
"input": "$packs",
"as": "pack",
"in": {
"$let": {
"vars": {
"billingPts": {
"$setDifference": [
{ "$map": {
"input": "$$pack.billingPts",
"as": "billing",
"in": {
"$let": {
"vars": {
"topUps": {
"$setDifference": [
{ "$map": {
"input": "$$billing.topUps",
"as": "topUp",
"in": {
"$cond": [
{ "$eq": [ "$$topUp.id", "55fbc7f6b0ce97a309b3cea9" ] },
"$$topUp",
false
]
}
}},
[false]
]
}
},
"in": {
"$cond": [
{ "$ne": [{ "$size": "$$topUps"}, 0] },
{
"id": "$$billing.id",
"name": "$$billing.name",
"expiryVal": "$$billing.expiryVal",
"amount": "$$billing.amount",
"topUps": "$$topUps"
},
false
]
}
}
}
}},
[false]
]
}
},
"in": {
"$cond": [
{ "$ne": [{ "$size": "$$billingPts"}, 0 ] },
{
"id": "$$pack.id",
"name": "$$pack.name",
"packDispVal": "$$pack.packDispVal",
"billingPts": "$$billingPts"
},
false
]
}
}
}
}},
[false]
]
}
}}
])
Where after digging down to the innermost array that is being filtered, that then the size of each resulting array going outwards is tested to see if it is zero, and omitted from results where it is.
It's a long listing but it is the most efficient way since each array is filtered down first and within each document.
A not so efficient way is to pull apart with $unwind and the $group back the results:
db.docs.aggregate([
{ "$match": {
"packs.billingPts.topUps.id": "55fbc7f6b0ce97a309b3cea9"
}},
{ "$unwind": "$packs" },
{ "$unwind": "$packs.billingPts" },
{ "$unwind": "$packs.billingPts.topUps"},
{ "$match": {
"packs.billingPts.topUps.id": "55fbc7f6b0ce97a309b3cea9"
}},
{ "$group": {
"_id": {
"_id": "$_id",
"packs": {
"id": "$packs.id",
"name": "$packs.name",
"packDispVal": "$packs.packDispVal",
"billingPts": {
"id": "$packs.billingPts.id",
"name": "$packs.billingPts.name",
"expiryVal": "$packs.billingPts.expiryVal",
"amount": "$packs.billingPts.amount"
}
}
},
"topUps": { "$push": "$packs.billingPts.topUps" }
}},
{ "$group": {
"_id": {
"_id": "$_id._id",
"packs": {
"id": "$_id.packs.id",
"name": "$_id.packs.name",
"packDispVal": "$_id.packs.packDispVal"
}
},
"billingPts": {
"$push": {
"id": "$_id.packs.billingPts.id",
"name": "$_id.packs.billingPts.name",
"expiryVal": "$_id.packs.billingPts.expiryVal",
"amount": "$_id.packs.billingPts.amount",
"topUps": "$topUps"
}
}
}},
{ "$group": {
"_id": "$_id._id",
"packs": {
"$push": {
"id": "$_id.packs.id",
"name": "$_id.packs.name",
"packDispVal": "$_id.packs.packDispVal",
"billingPts": "$billingPts"
}
}
}}
])
The listing looks a lot more simple but of course there is a lot of overhead introduced by $unwind here. The process of grouping back is basically keeping a copy of everything outside of the current array level being reconstructed, and then push that content back into the array in the next stage, until you get back to the root _id.
Please note that unless you intend such a search to match more than one document or if you are going to have significant gains from reduced network traffic by effectively reducing down the response size from a very large document, then it would be advised to do neither of these but follow much of the same design as the first pipeline example but in client code.
Whilst the first example would be still okay performance wise, it's still a mouthful to send to the server and as a general listing, that is typically written with the same operations in a cleaner way in client code to process and filter the resulting structure.
{
"_id" : ObjectId("56038c8cca689261baca93eb"),
"packs" : [
{
"id" : "55fbc7f6b0ce97a309b3cead",
"name" : "Classic",
"packDispVal" : "PACK",
"billingPts" : [
{
"id" : "55fbc7f6b0ce97a309b3ceab",
"name" : "Classic 1 month",
"expiryVal" : 1,
"amount" : 20,
"topUps" : [
{
"id" : "55fbc7f6b0ce97a309b3cea9",
"name" : "1 extra",
"amount" : 8
}
]
}
]
}
]
}