Related
Below are the projected Result and I want to get the sum of Expenses Amount where ExpenseType equal to "1" and the result should group by Type and Quarter. How to achieve this functionality without unwinding the Expenses Array.?
{
"Type" : "CreditCard",
"Quarter": "20201",
"Expenses" : [
{
"ExpenseType" : "1",
"Amount" : 123
},
{
"ExpenseType" : "2",
"Amount" : 183
}
]
}
{
"Type" : "Cash",
"Quarter": "20202",
"Expenses" : [
{
"ExpenseType" : "1",
"Amount" : 345
},
{
"ExpenseType" : "2",
"Amount" : 200
}
]
}
Expected Output:
{
"Type" : "CreditCard",
"Quarter": "20201",
"Total":"123"
}
{
"Type" : "Cash",
"Quarter": "20202",
"Total":"345"
}****
Mechanism
Group by Quarter and Tpy
Sum values
Pipeline
db.collection.aggregate({
$group: {
"_id": {
"Quarter": "$Quarter",
"Type": "$Type"
},
"Total": {
$push: {
$reduce: {
input: "$Expenses",
initialValue: 0,
in: {
$cond: [
{
$eq: [
"$$this.ExpenseType",
"1"
]
},
{
$add: [
"$$value",
"$$this.Amount"
]
},
{
$add: [
"$$value",
0
]
}
]
}
}
}
}
}
})
Playground
I'm using following aggregated query to get results (let say query1):
db.fb.aggregate(
[
{
$addFields : { noOfLikes : { $sum : { $map : { input : "$facebookEvents", as : "f", in : {$cond : [ { $eq : ["$$f.type" , "like"] }, 1, 0 ]} }}}}
},
{
$match : {"noOfLikes" : {$gte : 2}}
}
]
)
and another query to get some other results (let's say query2):
db.fb.aggregate(
[
{ $match : { author : "dave" } },
{ $match : { test : "test1" } }
]
)
Is it possible to get query1 OR query2 using a single aggregate query? In other words, I want to get results that match either query1 or query2 using a single query. Appreciate any help
mongo version: 3.4.4
Within a single aggregate query, you would need to run your pipeline as follows:
db.fb.aggregate([
{
"$addFields": {
"noOfLikes": {
"$sum" : {
"$map": {
"input": "$facebookEvents",
"as": "f",
"in": {
"$cond": [{ "$eq": ["$$f.type" , "like"] }, 1, 0 ]
}
}
}
}
}
},
{
"$match" : {
"$or": [
{ "noOfLikes" : { "$gte" : 2 } },
{ "author": "dave", "test": "test1" }
]
}
}
])
or using $redact as:
db.fb.aggregate([
{
"$redact": {
"$cond": [
{
"$or": [
{ "$gte": [
{
"$sum" : {
"$map": {
"input": "$facebookEvents",
"as": "f",
"in": {
"$cond": [
{ "$eq": ["$$f.type" , "like"] },
1,
0
]
}
}
}
}, 2 ]
},
{
"$and": [
{ "$eq": ["$author", "dave"] },
{ "$eq": ["$test", "test1"] }
]
}
]
},
"$$KEEP",
"$$PRUNE"
]
}
}
])
You can use $facet
db.fb.aggregate(
{
$facet : {
byLikes : [
{ $addFields : { noOfLikes : { $sum : { $map : { input : "$facebookEvents", as : "f", in : {$cond : [ { $eq : ["$$f.type" , "like"] }, 1, 0 ]} }}}} },
{ $match : {"noOfLikes" : {$gte : 2}}}
],
byAuthor : [
{ $match : { author : "dave" } },
{ $match : { test : "test1" } }
]
}
}
)
I use the following collection which represents sports > categories > tournaments.
{
"_id" : ObjectId("597846358bbbc4440895f2e8"),
"Name" : [
{ "k" : "en-US", "v" : "Soccer" },
{ "k" : "fr-FR", "v" : "Football" }
],
"Categories" : [
{
"Name" : [
{ "k" : "en-US", "v" : "France" },
{ "k" : "fr-FR", "v" : "France" }
],
"Tournaments" : [
{
"Name" : [
{ "k" : "en-US", "v" : "Ligue 1" },
{ "k" : "fr-FR", "v" : "Ligue 1" }
],
},
{
"Name" : [
{ "k" : "en-US", "v" : "Ligue 2" },
{ "k" : "fr-FR", "v" : "Ligue 2" }
],
}
]
},
{
"Name" : [
{ "k" : "en-US", "v" : "England" },
{ "k" : "fr-FR", "v" : "Angleterre" }
],
"Tournaments" : [
{
"Name" : [
{ "k" : "en-US", "v" : "Premier League" },
{ "k" : "fr-FR", "v" : "Premier League" }
],
},
{
"Name" : [
{ "k" : "en-US", "v" : "Championship" },
{ "k" : "fr-FR", "v" : "Championnat" }
],
}
]
},
]
}
I want to query the collection using the category’s name and the tournament’s name. I’ve successfully use “$elemMatch” with the following code:
db.getCollection('Sport').find({
Categories: {
$elemMatch: {
Name: {
$elemMatch: { v: "France" }
},
Tournaments: {
$elemMatch: {
Name: {
$elemMatch: { v: "Ligue 1" }
}
}
}
}
} },
{ "Categories.$": 1, Name: 1 })
However, I cannot receive only the matching tournament in the category object.
Using the answer in this question: MongoDB Projection of Nested Arrays, I’ve built an aggregation:
db.getCollection('Sport').aggregate([{
"$match": {
"Categories": {
"$elemMatch": {
"Name": {
"$elemMatch": {
"v": "France"
}
},
"Tournaments": {
"$elemMatch": {
"Name": {
"$elemMatch": {
"v": "Ligue 1"
}
}
}
}
}
}
}
}, {
"$addFields": {
"Categories": {
"$filter": {
"input": {
"$map": {
"input": "$Categories",
"as": "category",
"in": {
"Tournaments": {
"$filter": {
"input": "$$category.Tournaments",
"as": "tournament",
"cond": {
// stuck here
}
}
}
}
}
},
"as": "category",
"cond": {
// stuck here
}
}
}
}
}
])
I tried to use a condition but MongoDB doesn’t recognize (Use of undefined variable:) $$KEEP and $$PRUNE ($redact) when I use $anyElementTrue then $map on the “Name” property.
My question: how can I check that the collection of names contains my string?
I'm more surprised that on the answer you reference I did not not "strongly recommend you do not nest arrays" like this. Nesting in this way is impossible to update atomically until the next release of MongoDB, and they are notoriously difficult to query.
For this particular case you would do:
db.getCollection('Sport').aggregate([
{ "$match": {
"Categories": {
"$elemMatch": {
"Name.v": "France",
"Tournaments.Name.v": "Ligue 1"
}
}
}},
{ "$addFields": {
"Categories": {
"$filter": {
"input": {
"$map": {
"input": "$Categories",
"as": "c",
"in": {
"Name": {
"$filter": {
"input": "$$c.Name",
"as": "n",
"cond": { "$eq": [ "$$n.v", "France" ] }
}
},
"Tournaments": {
"$filter": {
"input": {
"$map": {
"input": "$$c.Tournaments",
"as": "t",
"in": {
"Name": {
"$filter": {
"input": "$$t.Name",
"as": "n",
"cond": {
"$eq": [ "$$n.v", "Ligue 1" ]
}
}
}
}
}
},
"as": "t",
"cond": {
"$ne": [{ "$size": "$$t.Name" }, 0]
}
}
}
}
}
},
"as": "c",
"cond": {
"$and": [
{ "$ne": [{ "$size": "$$c.Name" },0] },
{ "$ne": [{ "$size": "$$c.Tournaments" },0] }
]
}
}
}
}}
])
Which returns the result:
/* 1 */
{
"_id" : ObjectId("597846358bbbc4440895f2e8"),
"Name" : [
{
"k" : "en-US",
"v" : "Soccer"
},
{
"k" : "fr-FR",
"v" : "Football"
}
],
"Categories" : [
{
"Name" : [
{
"k" : "en-US",
"v" : "France"
},
{
"k" : "fr-FR",
"v" : "France"
}
],
"Tournaments" : [
{
"Name" : [
{
"k" : "en-US",
"v" : "Ligue 1"
},
{
"k" : "fr-FR",
"v" : "Ligue 1"
}
]
}
]
}
]
}
The whole point is that each array needs a $filter, and at the outer levels you are looking for $size not being 0 as a result of "inner" $filter operations on contained arrays.
Since the "inner" arrays can change in content as a result, the "outer" arrays need a $map in order to return the "changed" elements.
So in terms of the structure "Categories" needs a $map because it has inner elements. And the "inner" "Tournaments" needs a $map for the same reason. Every array all the way to the final properties need $filter, and each wrapping array with a $map has a $filter with a $size condition.
That's the general logic pattern, and it works by repeating that pattern for each nested level. As stated though, it's pretty horrible. Which is why you really should avoid "nesting" like this at all costs. The increased complexity just about always outweighs any perceived gains.
I should also note you went a little overboard with $elemMatch, You really only need it at the "Categories" array level since that's the only thing that has multiple conditions to be met for it's element.
The sub-elements can use plain "Dot Notation" since they are only "singular" conditions within their respective arrays. So that does cut down on the terse syntax somewhat and still matches exactly the same documents.
I've collections of documents as like as below:
{
"_id" : ObjectId("55d4410544c96d6f6578f893"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteStatus" : "PASS"
}
],
"runEndTime" : ISODate("2015-08-19T08:40:47.049Z"),
"runStartTime" : ISODate("2015-08-19T08:40:37.621Z"),
"runStatus" : "PASS",
"__v" : 1
}
{
"_id" : ObjectId("55d44eb4c0422e7b8bffe76b"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteStatus" : "PASS"
}
],
"runEndTime" : ISODate("2015-08-19T09:39:13.528Z"),
"runStartTime" : ISODate("2015-08-19T09:39:00.406Z"),
"runStatus" : "PASS",
"__v" : 1
}
{
"_id" : ObjectId("55d44f0bc0422e7b8bffe76f"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteStatus" : "FAIL"
}
],
"runEndTime" : ISODate("2015-08-19T09:46:31.108Z"),
"runStartTime" : ISODate("2015-08-19T09:40:27.377Z"),
"runStatus" : "PASS",
"__v" : 1
}
{
"_id" : ObjectId("55d463d0c0422e7b8bffe789"),
"executionProject" : "Project2",
"suiteList" : [
{
"suiteStatus" : "PASS"
},
{
"suiteStatus" : "PASS"
}
],
"runEndTime" : ISODate("2015-08-19T11:09:52.537Z"),
"runStartTime" : ISODate("2015-08-19T11:09:04.539Z"),
"runStatus" : "FAIL",
"__v" : 1
}
{
"_id" : ObjectId("55d464ebc0422e7b8bffe7c2"),
"executionProject" : "Project3",
"suiteList" : [
{
"suiteStatus" : "FAIL"
}
],
"runEndTime" : ISODate("2015-08-19T11:18:41.460Z"),
"runStartTime" : ISODate("2015-08-19T11:13:47.268Z"),
"runStatus" : "FAIL",
"__v" : 10
}
And I'm expecting output as follows:
[
{
"executionProject": "Project1",
"suite-pass": 0,
"suite-fail": 1,
"runEndTime": ISODate("2015-08-19T09:46:31.108Z")
},
{
"executionProject": "Project2",
"suite-pass": 2,
"suite-fail": 0,
"runEndTime": ISODate("2015-08-19T11:09:52.537Z")
},
{
"executionProject": "Project3",
"suite-pass": 0,
"suite-fail": 1,
"runEndTime": ISODate("2015-08-19T11:18:41.460Z")
},
]
I want to group by project and order by runEndTime and show the pass and fail counts of suiteList.
I tried this as suggested by Blakes in Mongodb: Group by element and show the sub-document count based on condition and sort the document by date:
db.testruns.aggregate([
{ "$sort": { "runEndTime": 1 } },
{ "$group": {
"_id": "$executionProject",
"suite-pass": {
"$last": {
"$cond": [
{ "$anyElementTrue": {
"$map": {
"input": "$suiteList",
"as": "suite",
"in": {
"$eq": [ "$$suite.suiteStatus", "PASS" ]
}
}
}},
1,
0
]
}
},
"suite-fail": {
"$last": {
"$cond": [
{ "$anyElementTrue": {
"$map": {
"input": "$suiteList",
"as": "suite",
"in": {
"$eq": [ "$$suite.suiteStatus", "FAIL" ]
}
}
}},
1,
0
]
}
},
"runEndTime": { "$last": "$runEndTime" }
}},
{ "$sort": { "runEndTime": 1 } }
]);
I was expecting the suite-pass count for Project2 as 2 since there are 2 elements in suiteList, but it returns 1.
You should have read the answer properly, as there already was another alternate listing and explanation of why the expected result you want from the one you used would be different.
Instead you want this one, which respects the possible multiple "PASS" or "FAIL":
Model.aggregate(
[
{ "$sort": { "executionProject": 1, "runEndTime": 1 } },
{ "$group": {
"_id": "$executionProject",
"suiteList": { "$last": "$suiteList" },
"runEndTime": { "$last": "$runEndTime" }
}},
{ "$unwind": "$suiteList" },
{ "$group": {
"_id": "$_id",
"suite-pass": {
"$sum": {
"$cond": [
{ "$eq": [ "$suiteList.suiteStatus", "PASS" ] },
1,
0
]
}
},
"suite-fail": {
"$sum": {
"$cond": [
{ "$eq": [ "$suiteList.suiteStatus", "FAIL" ] },
1,
0
]
}
},
"runEndTime": {"$first": "$runEndTime"}
}},
{ "$sort": { "runEndTime": 1 }}
],
function(err,result) {
}
);
Which is sort of a "combination" of approaches. The first is to get the "last" by runTime as you were expecting. The next is to break down the array and this time actually "sum up" the possible occurances of pass or fail, rather than just record a 1 for either pass or fail in the array, the actual "pass" or "fail" are counted.
With results:
{
"_id" : "Project1",
"suite-pass" : 0,
"suite-fail" : 1,
"runEndTime" : ISODate("2015-08-19T09:46:31.108Z")
}
{
"_id" : "Project2",
"suite-pass" : 2,
"suite-fail" : 0,
"runEndTime" : ISODate("2015-08-19T11:09:52.537Z")
}
{
"_id" : "Project3",
"suite-pass" : 0,
"suite-fail" : 1,
"runEndTime" : ISODate("2015-08-19T11:18:41.460Z")
}
Unwind suiteList and used $sum in group as below :
db.testruns.aggregate({
"$unwind": "$suiteList"
}, {
"$group": {
"_id": "$executionProject",
"suite-pass": {
"$sum": {
"$cond": {
"if": {
"$eq": ["$suiteList.suiteStatus", "PASS"]
},
"then": 1,
"else": 0
}
}
},
"suite-fail": {
"$sum": {
"$cond": {
"if": {
"$eq": ["$suiteList.suiteStatus", "FAIL"]
},
"then": 1,
"else": 0
}
}
},
"runEndTime": {
"$last": "$runEndTime"
}
}
}, {
"$sort": {
"runEndTime": 1
}
})
I want to group the all field of a collection with unique total. Let's assume there is collection like this:
id country state operator
121 IN HR AIRTEL
212 IN MH AIRTEL
213 US LA AT&T
214 UK JK VODAFONE
Output should be like this:
{
"country": { "IN": 2, "US":1, "UK":1 },
"state": { "HR":1, "MH":1, "LA":1, "JK": 1 },
"operator": { "AIRTEL":2, "AT&T": 1, "VODAFONE": 1 }
}
I am trying to use mongo aggregation framework, but can't really think how to do this?
I find out some similar to your output using aggregation check below code
db.collectionName.aggregate({
"$group": {
"_id": null,
"countryOfIN": {
"$sum": {
"$cond": [{
$eq: ["$country", "IN"]
}, 1, 0]
}
},
"countryOfUK": {
"$sum": {
"$cond": [{
$eq: ["$country", "UK"]
}, 1, 0]
}
},
"countryOfUS": {
"$sum": {
"$cond": [{
$eq: ["$country", "US"]
}, 1, 0]
}
},
"stateOfHR": {
"$sum": {
"$cond": [{
$eq: ["$state", "HR"]
}, 1, 0]
}
},
"stateOfMH": {
"$sum": {
"$cond": [{
$eq: ["$state", "MH"]
}, 1, 0]
}
},
"stateOfLA": {
"$sum": {
"$cond": [{
$eq: ["$state", "LA"]
}, 1, 0]
}
},
"stateOfJK": {
"$sum": {
"$cond": [{
$eq: ["$state", "JK"]
}, 1, 0]
}
},
"operatorOfAIRTEL": {
"$sum": {
"$cond": [{
$eq: ["$operator", "AIRTEL"]
}, 1, 0]
}
},
"operatorOfAT&T": {
"$sum": {
"$cond": [{
$eq: ["$operator", "AT&T"]
}, 1, 0]
}
},
"operatorOfVODAFONE": {
"$sum": {
"$cond": [{
$eq: ["$operator", "VODAFONE"]
}, 1, 0]
}
}
}
}, {
"$group": {
"_id": null,
"country": {
"$push": {
"IN": "$countryOfIN",
"UK": "$countryOfUK",
"US": "$countryOfUS"
}
},
"STATE": {
"$push": {
"HR": "$stateOfHR",
"MH": "$stateOfMH",
"LA": "$stateOfLA",
"JK": "$stateOfJK"
}
},
"operator": {
"$push": {
"AIRTEL": "$operatorOfAIRTEL",
"AT&T": "$operatorOfAT&T",
"VODAFONE": "$operatorOfVODAFONE"
}
}
}
}, {
"$project": {
"_id": 0,
"country": 1,
"STATE": 1,
"operator": 1
}
})
using $cond created groups of matched data and pushed them in second groups to combine.
An output format like you are looking for is not really suited to the aggregation framework since you are tranforming part of your data in to "key" names. The aggregation framework does not do this but rather sticks to database "best practice" as does not transform "data" to "key" names in any way.
You can perform a mapReduce operation instead with allows more flexibilty with the manipulation, but not as good performance due to the need to use JavaScript code to perform the manipulation:
db.collection.mapReduce(
function () {
var obj = {},
doc = this;
delete doc._id;
Object.keys(doc).forEach(function(key) {
obj[key] = {};
obj[key][doc[key]] = 1;
});
emit( null, obj );
},
function (key,values) {
var result = {};
values.forEach(function(value) {
Object.keys(value).forEach(function(outerKey) {
Object.keys(value[outerKey]).forEach(function(innerKey) {
if ( !result.hasOwnProperty(outerKey) ) {
result[outerKey] = {};
}
if ( result[outerKey].hasOwnProperty(innerKey) ) {
result[outerKey][innerKey] += value[outerKey][innerKey];
} else {
result[outerKey][innerKey] = value[outerKey][innerKey];
}
});
});
});
return result;
},
{ "out": { "inline": 1 } }
)
And in the stucture that applies to all mapReduce results:
{
"results" : [
{
"_id" : null,
"value" : {
"country" : {
"IN" : 2,
"US" : 1,
"UK" : 1
},
"state" : {
"HR" : 1,
"MH" : 1,
"LA" : 1,
"JK" : 1
},
"operator" : {
"AIRTEL" : 2,
"AT&T" : 1,
"VODAFONE" : 1
}
}
}
]
}
For the aggregation framework itself, it is better suited to producing aggregation results that are more consistently structured:
db.mapex.aggregate([
{ "$project": {
"country": 1,
"state": 1,
"operator": 1,
"type": { "$literal": ["country","state","operator"] }
}},
{ "$unwind": "$type" },
{ "$group": {
"_id": {
"type": "$type",
"key": { "$cond": {
"if": { "$eq": [ "$type", "country" ] },
"then": "$country",
"else": { "$cond": {
"if": { "$eq": [ "$type", "state" ] },
"then": "$state",
"else": "$operator"
}}
}}
},
"count": { "$sum": 1 }
}}
])
Which would output:
{ "_id" : { "type" : "state", "key" : "JK" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "UK" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "US" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "AT&T" }, "count" : 1 }
{ "_id" : { "type" : "state", "key" : "LA" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "AIRTEL" }, "count" : 2 }
{ "_id" : { "type" : "state", "key" : "MH" }, "count" : 1 }
{ "_id" : { "type" : "state", "key" : "HR" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "VODAFONE" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "IN" }, "count" : 2 }
But is fairly easy to transform in client code while iterating the results:
var result = {};
db.mapex.aggregate([
{ "$project": {
"country": 1,
"state": 1,
"operator": 1,
"type": { "$literal": ["country","state","operator"] }
}},
{ "$unwind": "$type" },
{ "$group": {
"_id": {
"type": "$type",
"key": { "$cond": {
"if": { "$eq": [ "$type", "country" ] },
"then": "$country",
"else": { "$cond": {
"if": { "$eq": [ "$type", "state" ] },
"then": "$state",
"else": "$operator"
}}
}}
},
"count": { "$sum": 1 }
}}
]).forEach(function(doc) {
if ( !result.hasOwnProperty(doc._id.type) )
result[doc._id.type] = {};
result[doc._id.type][doc._id.key] = doc.count;
})
Which gives the final structure in "result":
{
"state" : {
"JK" : 1,
"LA" : 1,
"MH" : 1,
"HR" : 1
},
"country" : {
"UK" : 1,
"US" : 1,
"IN" : 2
},
"operator" : {
"AT&T" : 1,
"AIRTEL" : 2,
"VODAFONE" : 1
}
}