Aggregate with count of sub documents matching the condition and grouping - mongodb

I've collections of documents as like as below:
{
"_id" : ObjectId("55d4410544c96d6f6578f893"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteStatus" : "PASS"
}
],
"runEndTime" : ISODate("2015-08-19T08:40:47.049Z"),
"runStartTime" : ISODate("2015-08-19T08:40:37.621Z"),
"runStatus" : "PASS",
"__v" : 1
}
{
"_id" : ObjectId("55d44eb4c0422e7b8bffe76b"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteStatus" : "PASS"
}
],
"runEndTime" : ISODate("2015-08-19T09:39:13.528Z"),
"runStartTime" : ISODate("2015-08-19T09:39:00.406Z"),
"runStatus" : "PASS",
"__v" : 1
}
{
"_id" : ObjectId("55d44f0bc0422e7b8bffe76f"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteStatus" : "FAIL"
}
],
"runEndTime" : ISODate("2015-08-19T09:46:31.108Z"),
"runStartTime" : ISODate("2015-08-19T09:40:27.377Z"),
"runStatus" : "PASS",
"__v" : 1
}
{
"_id" : ObjectId("55d463d0c0422e7b8bffe789"),
"executionProject" : "Project2",
"suiteList" : [
{
"suiteStatus" : "PASS"
},
{
"suiteStatus" : "PASS"
}
],
"runEndTime" : ISODate("2015-08-19T11:09:52.537Z"),
"runStartTime" : ISODate("2015-08-19T11:09:04.539Z"),
"runStatus" : "FAIL",
"__v" : 1
}
{
"_id" : ObjectId("55d464ebc0422e7b8bffe7c2"),
"executionProject" : "Project3",
"suiteList" : [
{
"suiteStatus" : "FAIL"
}
],
"runEndTime" : ISODate("2015-08-19T11:18:41.460Z"),
"runStartTime" : ISODate("2015-08-19T11:13:47.268Z"),
"runStatus" : "FAIL",
"__v" : 10
}
And I'm expecting output as follows:
[
{
"executionProject": "Project1",
"suite-pass": 0,
"suite-fail": 1,
"runEndTime": ISODate("2015-08-19T09:46:31.108Z")
},
{
"executionProject": "Project2",
"suite-pass": 2,
"suite-fail": 0,
"runEndTime": ISODate("2015-08-19T11:09:52.537Z")
},
{
"executionProject": "Project3",
"suite-pass": 0,
"suite-fail": 1,
"runEndTime": ISODate("2015-08-19T11:18:41.460Z")
},
]
I want to group by project and order by runEndTime and show the pass and fail counts of suiteList.
I tried this as suggested by Blakes in Mongodb: Group by element and show the sub-document count based on condition and sort the document by date:
db.testruns.aggregate([
{ "$sort": { "runEndTime": 1 } },
{ "$group": {
"_id": "$executionProject",
"suite-pass": {
"$last": {
"$cond": [
{ "$anyElementTrue": {
"$map": {
"input": "$suiteList",
"as": "suite",
"in": {
"$eq": [ "$$suite.suiteStatus", "PASS" ]
}
}
}},
1,
0
]
}
},
"suite-fail": {
"$last": {
"$cond": [
{ "$anyElementTrue": {
"$map": {
"input": "$suiteList",
"as": "suite",
"in": {
"$eq": [ "$$suite.suiteStatus", "FAIL" ]
}
}
}},
1,
0
]
}
},
"runEndTime": { "$last": "$runEndTime" }
}},
{ "$sort": { "runEndTime": 1 } }
]);
I was expecting the suite-pass count for Project2 as 2 since there are 2 elements in suiteList, but it returns 1.

You should have read the answer properly, as there already was another alternate listing and explanation of why the expected result you want from the one you used would be different.
Instead you want this one, which respects the possible multiple "PASS" or "FAIL":
Model.aggregate(
[
{ "$sort": { "executionProject": 1, "runEndTime": 1 } },
{ "$group": {
"_id": "$executionProject",
"suiteList": { "$last": "$suiteList" },
"runEndTime": { "$last": "$runEndTime" }
}},
{ "$unwind": "$suiteList" },
{ "$group": {
"_id": "$_id",
"suite-pass": {
"$sum": {
"$cond": [
{ "$eq": [ "$suiteList.suiteStatus", "PASS" ] },
1,
0
]
}
},
"suite-fail": {
"$sum": {
"$cond": [
{ "$eq": [ "$suiteList.suiteStatus", "FAIL" ] },
1,
0
]
}
},
"runEndTime": {"$first": "$runEndTime"}
}},
{ "$sort": { "runEndTime": 1 }}
],
function(err,result) {
}
);
Which is sort of a "combination" of approaches. The first is to get the "last" by runTime as you were expecting. The next is to break down the array and this time actually "sum up" the possible occurances of pass or fail, rather than just record a 1 for either pass or fail in the array, the actual "pass" or "fail" are counted.
With results:
{
"_id" : "Project1",
"suite-pass" : 0,
"suite-fail" : 1,
"runEndTime" : ISODate("2015-08-19T09:46:31.108Z")
}
{
"_id" : "Project2",
"suite-pass" : 2,
"suite-fail" : 0,
"runEndTime" : ISODate("2015-08-19T11:09:52.537Z")
}
{
"_id" : "Project3",
"suite-pass" : 0,
"suite-fail" : 1,
"runEndTime" : ISODate("2015-08-19T11:18:41.460Z")
}

Unwind suiteList and used $sum in group as below :
db.testruns.aggregate({
"$unwind": "$suiteList"
}, {
"$group": {
"_id": "$executionProject",
"suite-pass": {
"$sum": {
"$cond": {
"if": {
"$eq": ["$suiteList.suiteStatus", "PASS"]
},
"then": 1,
"else": 0
}
}
},
"suite-fail": {
"$sum": {
"$cond": {
"if": {
"$eq": ["$suiteList.suiteStatus", "FAIL"]
},
"then": 1,
"else": 0
}
}
},
"runEndTime": {
"$last": "$runEndTime"
}
}
}, {
"$sort": {
"runEndTime": 1
}
})

Related

Aggregate data grouped by date but from different date fields

I'm trying to get a query where the output is grouped by date, but the next fields will be based on different date fields.
So, for the date 2018-11 (year-month), how many registers were, how many activations were, how many customers and how many cancels. But each register/activation/customer/cancel, has to be counted in the month when it happened.
My data is stored as follow:
{
"track" : {
"hasBeenCustomer" : true,
"hasActivated" : true,
"hasActivatedAt" : ISODate("2018-08-21T14:32:53.929Z"),
"hasBeenCustomerAt" : ISODate("2019-02-26T07:21:06Z"),
"hasRegisteredAt" : ISODate("2018-08-09T10:17:38.329Z"),
"hasCanceled" : true,
"hasCanceledAt" : ISODate("2019-04-29T13:56:04Z")
}
}
{
"track" : {
"hasBeenCustomer" : true,
"hasActivated" : true,
"hasActivatedAt" : ISODate("2018-08-26T15:04:58.854Z"),
"hasBeenCustomerAt" : ISODate("2018-11-24T10:37:14Z"),
"hasRegisteredAt" : ISODate("2018-08-25T11:12:36.309Z"),
"hasCanceled" : true,
"hasCanceledAt" : ISODate("2019-05-30T18:11:04Z")
}
}
{
"track" : {
"hasBeenCustomer" : true,
"hasActivated" : true,
"hasActivatedAt" : ISODate("2018-09-24T23:21:55.733Z"),
"hasBeenCustomerAt" : ISODate("2019-03-12T10:26:01Z"),
"hasRegisteredAt" : ISODate("2018-09-22T17:56:57.256Z"),
"hasCanceled" : true,
"hasCanceledAt" : ISODate("2019-04-12T10:22:03Z")
}
}
{
"track" : {
"hasBeenCustomer" : true,
"hasActivated" : true,
"hasActivatedAt" : ISODate("2018-10-18T15:08:15.351Z"),
"hasBeenCustomerAt" : ISODate("2018-12-22T21:37:01Z"),
"hasRegisteredAt" : ISODate("2018-10-16T03:54:16.056Z"),
"hasCanceled" : true,
"hasCanceledAt" : ISODate("2019-01-22T21:39:03Z")
}
}
I have tried this:
db.user.aggregate(
[
{
$match:
{
projectId : "00001"
}
},
{
"$project": {
"createDate": {
"$dateToString": {
"format": "%Y-%m",
"date": "$track.hasRegisteredAt"
}
},
activationAt: {
"$dateToString": {
"format": "%Y-%m",
"date": "$track.hasActivatedAt"
}
},
customerAt: {
"$dateToString": {
"format": "%Y-%m",
"date": "$track.hasBeenCustomerAt"
}
},
cancelAt: {
"$dateToString": {
"format": "%Y-%m",
"date": "$track.hasCanceledAt"
}
},
activations: {
"$sum": {
"$cond": [
{ "$eq": [ "$track.hasActivated", true ] },
1,
0
]
}
},
customers: {
"$sum": {
"$cond": [
{ "$eq": [ "$track.hasBeenCustomer", true ] },
1,
0
]
}
},
cancels: {
"$sum": {
"$cond": [{
"$and": [
{ "$eq": [ "$status", 3 ] },
{ "$eq": [ "$track.hasCanceled", true ] }
]},
1,
0
]
}
}
}
},
{
$group:
{
_id: "$createDate",
users: {$sum: 1},
activations: {$sum: "$activations"},
activationsM: {
"$sum": {
"$cond": [
{ "$eq": [ "$activationAt", "$createDate" ] },
1,
0
]
}
},
customers: {$sum: "$customers"},
customersM: {
"$sum": {
"$cond": [
{ "$eq": [ "$customerAt", "$createDate" ] },
1,
0
]
}
},
cancels: {$sum: "$cancels"},
cancelsM: {
"$sum": {
"$cond": [
{ "$eq": [ "$cancelAt", "$createDate" ] },
1,
0
]
}
},
}
},
{
$sort:
{
_id: 1
}
}
]
)
activationsM, customersM, cancelsM, are supposed to be the counting per month, independently from the _id field, but I have realized that, this query relieves in results matching the _id, and once it matches it, then it check the condintion. I need it to be sum, even if the _id doesn't match the hasActivatedAt, hasBeenCustomerAt, hasCanceledAt fields.
Hope I have explained it properly.
The desired output would be:
{ "_id" : "2018-06", "users" : 18, "activations" : 5, "activationsM" : 2, "customers" : 4, "customersM" : 0, "cancels" : 1, "cancelsM" : 0 }
{ "_id" : "2018-07", "users" : 78, "activations" : 39, "activationsM" : 31, "customers" : 11, "customersM" : 0, "cancels" : 7, "cancelsM" : 0 }
{ "_id" : "2018-08", "users" : 115, "activations" : 49, "activationsM" : 38, "customers" : 18, "customersM" : 0, "cancels" : 8, "cancelsM" : 0 }
Being fiedlM the total count for the correspondient field date and the _id date field.
Thanks.
Try as below:
db.collection.aggregate([
{
$facet: {
"TOTAL_ACTIVATION": [
{
$group: {
_id: "$track.hasActivated",
total: { "$sum": 1 },
"totalActiveCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasActivated", true ] },
1,
0
]}
},
"totalNonActiveCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasActivated", false ] },
1,
0
]}
},
}
}
],
"TOTAL_CUSTOMERS": [
{
$group: {
_id: "$track.hasBeenCustomer",
total: { "$sum": 1 },
"totalCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasBeenCustomer", true ] },
1,
0
]}
},
"totalNonCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasBeenCustomer", false ] },
1,
0
]}
},
}
}
],
"TOTAL_CANCELLED": [
{
$group: {
_id: "$track.hasCanceled",
total: { "$sum": 1 },
"totalCancelledCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasCanceled", true ] },
1,
0
]}
},
"totalNonCancelledCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasCanceled", false ] },
1,
0
]}
},
}
}
],
"MONTHLY_ACTIVATION" : [
{
$group: {
_id: {
year: { $year: "$track.hasActivatedAt" },
month: { $month: "$track.hasActivatedAt" }
},
totalThisMonth: { $sum : 1},
"totalActiveCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasActivated", true ] },
1,
0
]}
},
"totalNonActiveCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasActivated", false ] },
1,
0
]}
},
}
}
],
"MONTHLY_CUSTOMER" : [
{
$group: {
_id: {
year: { $year: "$track.hasBeenCustomerAt" },
month: { $month: "$track.hasBeenCustomerAt" }
},
totalThisMonth: { $sum : 1},
"totalCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasBeenCustomer", true ] },
1,
0
]}
},
"totalNonCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasBeenCustomer", false ] },
1,
0
]}
},
}
}
],
"MONTHLY_CANCELLED" : [
{
$group: {
_id: {
year: { $year: "$track.hasCanceledAt" },
month: { $month: "$track.hasCanceledAt" }
},
totalThisMonth: { $sum : 1},
"totalCancelledCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasCanceled", true ] },
1,
0
]}
},
"totalNonCancelledCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasCanceled", false ] },
1,
0
]}
},
}
}
]
}
}
])
Result of this will be as below:
{
"TOTAL_ACTIVATION" : [
{
"_id" : true,
"total" : 4,
"totalActiveCustomer" : 4,
"totalNonActiveCustomer" : 0
}
],
"TOTAL_CUSTOMERS" : [
{
"_id" : true,
"total" : 4,
"totalCustomer" : 4,
"totalNonCustomer" : 0
}
],
"TOTAL_CANCELLED" : [
{
"_id" : true,
"total" : 4,
"totalCancelledCustomer" : 4,
"totalNonCancelledCustomer" : 0
}
],
"MONTHLY_ACTIVATION" : [
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(10)
},
"totalThisMonth" : 1,
"totalActiveCustomer" : 1,
"totalNonActiveCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(9)
},
"totalThisMonth" : 1,
"totalActiveCustomer" : 1,
"totalNonActiveCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(8)
},
"totalThisMonth" : 2,
"totalActiveCustomer" : 2,
"totalNonActiveCustomer" : 0
}
],
"MONTHLY_CUSTOMER" : [
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(12)
},
"totalThisMonth" : 1,
"totalCustomer" : 1,
"totalNonCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(3)
},
"totalThisMonth" : 1,
"totalCustomer" : 1,
"totalNonCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(11)
},
"totalThisMonth" : 1,
"totalCustomer" : 1,
"totalNonCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(2)
},
"totalThisMonth" : 1,
"totalCustomer" : 1,
"totalNonCustomer" : 0
}
],
"MONTHLY_CANCELLED" : [
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(1)
},
"totalThisMonth" : 1,
"totalCancelledCustomer" : 1,
"totalNonCancelledCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(5)
},
"totalThisMonth" : 1,
"totalCancelledCustomer" : 1,
"totalNonCancelledCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(4)
},
"totalThisMonth" : 2,
"totalCancelledCustomer" : 2,
"totalNonCancelledCustomer" : 0
}
]
}

MongoDB Group By count occurences of values and output as new field

I have a 3 Collections Assignments, Status, Assignee.
Assignments Fields : [_id, status, Assignee]
Assignee and Status Fields : [_id, name].
There can be many assignments associated with various Status and Assignee collections(linked via _id field), There is no nesting or complex data.
I need a query for all assignments ids where Assignees are the row, Status are the Columns, there combined cell is the count with Total counts at the end.
To help you visualize, I am attaching below image. I am new to complex Mongo DB Aggregate framework, kindly guide me to achieve query.
Note: Data in Status and Assignee collection will be dynamic. Nothing is predetermined in the Query. So, the Rows and Columns are going to grow dynamically in future, If the query is given pagination, then it would be of great help. I cannot write a query with hard coded status names like 'pending', 'completed' etc. As data shall grow and existing data may change like 'pending task', 'completed work'.
Below is my query
db.getCollection('Assignments').aggregate([
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.assignee",
"statuses": {
"$push": {
"statusId": "$_id.statusId",
"count": "$statusCount"
},
},
"count": { "$sum": "$statusCount" }
}
},
]);
Below is the output format:
{
"_id" : "John",
"statuses" : {
"statusId" : "Pending",
"count" : 3.0
},
"count" : 3.0
}
{
"_id" : "Katrina",
"statuses" : [{
"statusId" : "Pending",
"count" : 1.0
},
{
"statusId" : "Completed",
"count" : 1.0
},
{
"statusId" : "Assigned",
"count" : 1.0
}],
"count" : 3.0
}
{
"_id" : "Collins",
"statuses" : {
"statusId" : "Pending",
"count" : 4.0
},
"count" : 4.0
}
Expected Output is:
{
"_id" : "Katrina",
"Pending" : 1.0,
"Completed" : 1.0,
"Assigned" : 1.0,
"totalCount" : 3.0
}
Any Idea on how to many various statusId for different assignee as keys and not values in single document.
You need another $group stage after $unwind to count number of status based on statusId string value:
{
"$group": {
"_id": "$_id",
"Pending" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Pending"
]},
"$statuses.count",
0
]
}
},
"Completed" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Completed"
]},
"$statuses.count",
0
]
}
},
"Assigned" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Assigned"
]},
"$statuses.count",
0
]
}
},
"totalCount": { "$sum": 1 }
}
}
The final aggregate command:
db.getCollection('Assignments').aggregate([
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.assignee",
"statuses": {
"$push": {
"statusId": "$_id.statusId",
"count": "$statusCount"
},
},
"count": { "$sum": "$statusCount" }
}
},
{ "$unwind": "$statuses" },
{
"$group": {
"_id": "$_id",
"Pending" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Pending"
]},
"$statuses.count",
0
]
}
},
"Completed" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Completed"
]},
"$statuses.count",
0
]
}
},
"Assigned" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Assigned"
]},
"$statuses.count",
0
]
}
},
"totalCount": { "$sum": 1 }
}
}
]);
Why not just keep statuses as an object so each status is a key/val pair. If that works you do the following
db.getCollection('Assignments').aggregate([
[
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
},
},
{
"$group" : {
"_id" : "$_id.assignee",
"statuses" : {
"$push" : {
"k" : "$_id.statusId", // <- "k" as key value important for $arrayToObject Function
"v" : "$statusCount" // <- "v" as key value important for $arrayToObject Function
}
},
"count" : {
"$sum" : "$statusCount"
}
}
},
{
"$project" : {
"_id" : 1.0,
"statuses" : {
"$arrayToObject" : "$statuses"
},
"totalCount" : "$count"
}
}
],
{
"allowDiskUse" : false
}
);
This gives you:
{
"_id" : "Katrina",
"statuses": {
"Pending" : 1.0,
"Completed" : 1.0,
"Assigned" : 1.0,
},
"totalCount" : 3.0
}
A compromise having it one layer deeper but still the shape of statuses you wanted and dynamic with each new statusId added.

Mongodb use multiple group operator in single aggregation

I am using mongodb aggregation for getting counts of different fields. Here are some documents from the mobile collection:-
{
"title": "Moto G",
"manufacturer": "Motorola",
"releasing": ISODate("2011-03-00T10:26:48.424Z"),
"rating": "high"
}
{
"title": "Asus Zenfone 2",
"manufacturer": "Asus",
"releasing": ISODate("2014-10-00T10:26:48.424Z"),
"rating": "high"
}
{
"title": "Moto Z",
"manufacturer": "Motorola",
"releasing": ISODate("2016-10-12T10:26:48.424Z"),
"rating": "none"
}
{
"title": "Asus Zenfone 3",
"manufacturer": "Asus",
"releasing": ISODate("2016-08-00T10:26:48.424Z"),
"rating": "medium"
}
I can find manufacturer and rating counts but this fails:
db.mobile.aggregate([
{
$group: { _id: "$manufacturer", count: { $sum: 1 } }
}, {
$group: { _id: "$rating", count: { $sum: 1 } }
}
])
Output:-
{
"_id" : null,
"count" : 2.0
}
Expected Output something like:-
{
"_id":"Motorola",
"count" : 2.0
}
{
"_id":"Asus",
"count" : 2.0
}
{
"_id":"high",
"count" : 2.0
}
{
"_id":"none",
"count" : 1.0
}
{
"_id":"medium",
"count" : 1.0
}
I believe you are after an aggregation operation that groups the documents by the manufacturer and rating keys, then do a further group on the manufacturer while aggregating the ratings per manufacturer, something like the following pipeline:
db.mobile.aggregate([
{
"$group": {
"_id": {
"manufacturer": "$manufacturer",
"rating": "$rating"
},
"count": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.manufacturer",
"total": { "$sum": 1 },
"counts": {
"$push": {
"rating": "$_id.rating",
"count": "$count"
}
}
}
}
])
Sample Output
/* 1 */
{
"_id" : "Motorola",
"total" : 2,
"counts" : [
{
"rating" : "high",
"count" : 1
},
{
"rating" : "none",
"count" : 1
}
]
}
/* 2 */
{
"_id" : "Asus",
"total" : 2,
"counts" : [
{
"rating" : "high",
"count" : 1
},
{
"rating" : "medium",
"count" : 1
}
]
}
or if you are after a more "flat" or "denormalised" result, run this aggregate operation:
db.mobile.aggregate([
{
"$group": {
"_id": "$manufacturer",
"total": { "$sum": 1 },
"high_ratings": {
"$sum": {
"$cond": [ { "$eq": [ "$rating", "high" ] }, 1, 0 ]
}
},
"medium_ratings": {
"$sum": {
"$cond": [ { "$eq": [ "$rating", "medium" ] }, 1, 0 ]
}
},
"low_ratings": {
"$sum": {
"$cond": [ { "$eq": [ "$rating", "low" ] }, 1, 0 ]
}
},
"none_ratings": {
"$sum": {
"$cond": [ { "$eq": [ "$rating", "none" ] }, 1, 0 ]
}
}
}
}
])
Sample Output
/* 1 */
{
"_id" : "Motorola",
"total" : 2,
"high_ratings" : 1,
"medium_ratings" : 0,
"low_ratings" : 0,
"none_ratings" : 1
}
/* 2 */
{
"_id" : "Asus",
"total" : 2,
"high_ratings" : 1,
"medium_ratings" : 1,
"low_ratings" : 0,
"none_ratings" : 0
}

$push and $sum with the aggregation framework on sub-documents

I've a data as follows:
{
"_id" : ObjectId("55d4410544c96d6f6578f893"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteName": "Suite1",
"suiteStatus" : "PASS",
},
{
"suiteName": "Suite2",
"suiteStatus" : "FAIL",
},
{
"suiteName": "Suite3",
"suiteStatus" : "PASS",
}
],
"runEndTime" : ISODate("2015-08-19T08:40:47.049Z")
}
{
"_id" : ObjectId("55d4410544c96d6f6578f894"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteName": "Suite1",
"suiteStatus" : "PASS",
},
{
"suiteName": "Suite2",
"suiteStatus" : "FAIL",
},
{
"suiteName": "Suite3",
"suiteStatus" : "FAIL",
}
],
"runEndTime" : ISODate("2015-08-19T08:50:47.049Z")
}
And I was trying to get the result like this:
{
"executionProject": "Project1",
"data": [
{
"date": "2015-08-19 08:40:47",
"suitePass": 2,
"suiteFail": 1
},
{
"date": "2015-08-19 08:50:47",
"suitePass": 1,
"suiteFail": 2
}
]
}
Here I'm trying to group by executionProject and push the runEndTime and the pass and fail counts of suites to the result.
I tried this, but giving me wrong way of projection:
db.testruns.aggregate([
{
$project: {
executionProject: "$executionProject",
runEndTime: "$runEndTime",
suiteList: "$suiteList"
}
},
{
$unwind: "$suiteList"
},
{
$group: {
_id: "$executionProject",
runEndTime: {
$addToSet: "$runEndTime"
},
suite_pass: {
$sum: {
$cond: {
"if": {
$eq: ["$suiteList.suiteStatus", "PASS"]
},
"then": 1,
"else": 0
}
}
}
}
},
{
$group: {
_id: "$_id",
runEndTime: { $push: {runTime: "$runEndTime", suite_pass: "$suite_pass"} }
}
},
{
$project: {
executionProject: "$_id",
runEndTime: "$runEndTime",
_id: 0
}
}
]);
First you need to group by the document to get the suite totals, then you add to the array as you group on the project. Also don't forget to "sort" if you want things in order:
[
{ "$unwind": "$suiteList" },
{ "$group": {
"_id": "$_id",
"executionProject": { "$first": "$executionProject" },
"suite-pass": {
"$sum": {
"$cond": [
{ "$eq": [ "$suiteList.suiteStatus", "PASS" ] },
1,
0
]
}
},
"suite-fail": {
"$sum": {
"$cond": [
{ "$eq": [ "$suiteList.suiteStatus", "FAIL" ] },
1,
0
]
}
},
"date": { "$first": "$runEndTime" }
}},
{ "$sort": { "executionProject": 1, "date": 1 } },
{ "$group": {
"_id": "$executionProject",
"data": {
"$push": {
"suite-pass": "$suite-pass",
"suite-fail": "$suite-fail",
"date": "$date"
}
}
}}
]
Produces:
{
"_id" : "Project1",
"data" : [
{
"suite-pass" : 2,
"suite-fail" : 1,
"date" : ISODate("2015-08-19T08:40:47.049Z")
},
{
"suite-pass" : 1,
"suite-fail" : 2,
"date" : ISODate("2015-08-19T08:50:47.049Z")
}
]
}

Group Multiple Values in Aggregation

I want to group the all field of a collection with unique total. Let's assume there is collection like this:
id country state operator
121 IN HR AIRTEL
212 IN MH AIRTEL
213 US LA AT&T
214 UK JK VODAFONE
Output should be like this:
{
"country": { "IN": 2, "US":1, "UK":1 },
"state": { "HR":1, "MH":1, "LA":1, "JK": 1 },
"operator": { "AIRTEL":2, "AT&T": 1, "VODAFONE": 1 }
}
I am trying to use mongo aggregation framework, but can't really think how to do this?
I find out some similar to your output using aggregation check below code
db.collectionName.aggregate({
"$group": {
"_id": null,
"countryOfIN": {
"$sum": {
"$cond": [{
$eq: ["$country", "IN"]
}, 1, 0]
}
},
"countryOfUK": {
"$sum": {
"$cond": [{
$eq: ["$country", "UK"]
}, 1, 0]
}
},
"countryOfUS": {
"$sum": {
"$cond": [{
$eq: ["$country", "US"]
}, 1, 0]
}
},
"stateOfHR": {
"$sum": {
"$cond": [{
$eq: ["$state", "HR"]
}, 1, 0]
}
},
"stateOfMH": {
"$sum": {
"$cond": [{
$eq: ["$state", "MH"]
}, 1, 0]
}
},
"stateOfLA": {
"$sum": {
"$cond": [{
$eq: ["$state", "LA"]
}, 1, 0]
}
},
"stateOfJK": {
"$sum": {
"$cond": [{
$eq: ["$state", "JK"]
}, 1, 0]
}
},
"operatorOfAIRTEL": {
"$sum": {
"$cond": [{
$eq: ["$operator", "AIRTEL"]
}, 1, 0]
}
},
"operatorOfAT&T": {
"$sum": {
"$cond": [{
$eq: ["$operator", "AT&T"]
}, 1, 0]
}
},
"operatorOfVODAFONE": {
"$sum": {
"$cond": [{
$eq: ["$operator", "VODAFONE"]
}, 1, 0]
}
}
}
}, {
"$group": {
"_id": null,
"country": {
"$push": {
"IN": "$countryOfIN",
"UK": "$countryOfUK",
"US": "$countryOfUS"
}
},
"STATE": {
"$push": {
"HR": "$stateOfHR",
"MH": "$stateOfMH",
"LA": "$stateOfLA",
"JK": "$stateOfJK"
}
},
"operator": {
"$push": {
"AIRTEL": "$operatorOfAIRTEL",
"AT&T": "$operatorOfAT&T",
"VODAFONE": "$operatorOfVODAFONE"
}
}
}
}, {
"$project": {
"_id": 0,
"country": 1,
"STATE": 1,
"operator": 1
}
})
using $cond created groups of matched data and pushed them in second groups to combine.
An output format like you are looking for is not really suited to the aggregation framework since you are tranforming part of your data in to "key" names. The aggregation framework does not do this but rather sticks to database "best practice" as does not transform "data" to "key" names in any way.
You can perform a mapReduce operation instead with allows more flexibilty with the manipulation, but not as good performance due to the need to use JavaScript code to perform the manipulation:
db.collection.mapReduce(
function () {
var obj = {},
doc = this;
delete doc._id;
Object.keys(doc).forEach(function(key) {
obj[key] = {};
obj[key][doc[key]] = 1;
});
emit( null, obj );
},
function (key,values) {
var result = {};
values.forEach(function(value) {
Object.keys(value).forEach(function(outerKey) {
Object.keys(value[outerKey]).forEach(function(innerKey) {
if ( !result.hasOwnProperty(outerKey) ) {
result[outerKey] = {};
}
if ( result[outerKey].hasOwnProperty(innerKey) ) {
result[outerKey][innerKey] += value[outerKey][innerKey];
} else {
result[outerKey][innerKey] = value[outerKey][innerKey];
}
});
});
});
return result;
},
{ "out": { "inline": 1 } }
)
And in the stucture that applies to all mapReduce results:
{
"results" : [
{
"_id" : null,
"value" : {
"country" : {
"IN" : 2,
"US" : 1,
"UK" : 1
},
"state" : {
"HR" : 1,
"MH" : 1,
"LA" : 1,
"JK" : 1
},
"operator" : {
"AIRTEL" : 2,
"AT&T" : 1,
"VODAFONE" : 1
}
}
}
]
}
For the aggregation framework itself, it is better suited to producing aggregation results that are more consistently structured:
db.mapex.aggregate([
{ "$project": {
"country": 1,
"state": 1,
"operator": 1,
"type": { "$literal": ["country","state","operator"] }
}},
{ "$unwind": "$type" },
{ "$group": {
"_id": {
"type": "$type",
"key": { "$cond": {
"if": { "$eq": [ "$type", "country" ] },
"then": "$country",
"else": { "$cond": {
"if": { "$eq": [ "$type", "state" ] },
"then": "$state",
"else": "$operator"
}}
}}
},
"count": { "$sum": 1 }
}}
])
Which would output:
{ "_id" : { "type" : "state", "key" : "JK" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "UK" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "US" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "AT&T" }, "count" : 1 }
{ "_id" : { "type" : "state", "key" : "LA" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "AIRTEL" }, "count" : 2 }
{ "_id" : { "type" : "state", "key" : "MH" }, "count" : 1 }
{ "_id" : { "type" : "state", "key" : "HR" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "VODAFONE" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "IN" }, "count" : 2 }
But is fairly easy to transform in client code while iterating the results:
var result = {};
db.mapex.aggregate([
{ "$project": {
"country": 1,
"state": 1,
"operator": 1,
"type": { "$literal": ["country","state","operator"] }
}},
{ "$unwind": "$type" },
{ "$group": {
"_id": {
"type": "$type",
"key": { "$cond": {
"if": { "$eq": [ "$type", "country" ] },
"then": "$country",
"else": { "$cond": {
"if": { "$eq": [ "$type", "state" ] },
"then": "$state",
"else": "$operator"
}}
}}
},
"count": { "$sum": 1 }
}}
]).forEach(function(doc) {
if ( !result.hasOwnProperty(doc._id.type) )
result[doc._id.type] = {};
result[doc._id.type][doc._id.key] = doc.count;
})
Which gives the final structure in "result":
{
"state" : {
"JK" : 1,
"LA" : 1,
"MH" : 1,
"HR" : 1
},
"country" : {
"UK" : 1,
"US" : 1,
"IN" : 2
},
"operator" : {
"AT&T" : 1,
"AIRTEL" : 2,
"VODAFONE" : 1
}
}