Aggregate data grouped by date but from different date fields - mongodb

I'm trying to get a query where the output is grouped by date, but the next fields will be based on different date fields.
So, for the date 2018-11 (year-month), how many registers were, how many activations were, how many customers and how many cancels. But each register/activation/customer/cancel, has to be counted in the month when it happened.
My data is stored as follow:
{
"track" : {
"hasBeenCustomer" : true,
"hasActivated" : true,
"hasActivatedAt" : ISODate("2018-08-21T14:32:53.929Z"),
"hasBeenCustomerAt" : ISODate("2019-02-26T07:21:06Z"),
"hasRegisteredAt" : ISODate("2018-08-09T10:17:38.329Z"),
"hasCanceled" : true,
"hasCanceledAt" : ISODate("2019-04-29T13:56:04Z")
}
}
{
"track" : {
"hasBeenCustomer" : true,
"hasActivated" : true,
"hasActivatedAt" : ISODate("2018-08-26T15:04:58.854Z"),
"hasBeenCustomerAt" : ISODate("2018-11-24T10:37:14Z"),
"hasRegisteredAt" : ISODate("2018-08-25T11:12:36.309Z"),
"hasCanceled" : true,
"hasCanceledAt" : ISODate("2019-05-30T18:11:04Z")
}
}
{
"track" : {
"hasBeenCustomer" : true,
"hasActivated" : true,
"hasActivatedAt" : ISODate("2018-09-24T23:21:55.733Z"),
"hasBeenCustomerAt" : ISODate("2019-03-12T10:26:01Z"),
"hasRegisteredAt" : ISODate("2018-09-22T17:56:57.256Z"),
"hasCanceled" : true,
"hasCanceledAt" : ISODate("2019-04-12T10:22:03Z")
}
}
{
"track" : {
"hasBeenCustomer" : true,
"hasActivated" : true,
"hasActivatedAt" : ISODate("2018-10-18T15:08:15.351Z"),
"hasBeenCustomerAt" : ISODate("2018-12-22T21:37:01Z"),
"hasRegisteredAt" : ISODate("2018-10-16T03:54:16.056Z"),
"hasCanceled" : true,
"hasCanceledAt" : ISODate("2019-01-22T21:39:03Z")
}
}
I have tried this:
db.user.aggregate(
[
{
$match:
{
projectId : "00001"
}
},
{
"$project": {
"createDate": {
"$dateToString": {
"format": "%Y-%m",
"date": "$track.hasRegisteredAt"
}
},
activationAt: {
"$dateToString": {
"format": "%Y-%m",
"date": "$track.hasActivatedAt"
}
},
customerAt: {
"$dateToString": {
"format": "%Y-%m",
"date": "$track.hasBeenCustomerAt"
}
},
cancelAt: {
"$dateToString": {
"format": "%Y-%m",
"date": "$track.hasCanceledAt"
}
},
activations: {
"$sum": {
"$cond": [
{ "$eq": [ "$track.hasActivated", true ] },
1,
0
]
}
},
customers: {
"$sum": {
"$cond": [
{ "$eq": [ "$track.hasBeenCustomer", true ] },
1,
0
]
}
},
cancels: {
"$sum": {
"$cond": [{
"$and": [
{ "$eq": [ "$status", 3 ] },
{ "$eq": [ "$track.hasCanceled", true ] }
]},
1,
0
]
}
}
}
},
{
$group:
{
_id: "$createDate",
users: {$sum: 1},
activations: {$sum: "$activations"},
activationsM: {
"$sum": {
"$cond": [
{ "$eq": [ "$activationAt", "$createDate" ] },
1,
0
]
}
},
customers: {$sum: "$customers"},
customersM: {
"$sum": {
"$cond": [
{ "$eq": [ "$customerAt", "$createDate" ] },
1,
0
]
}
},
cancels: {$sum: "$cancels"},
cancelsM: {
"$sum": {
"$cond": [
{ "$eq": [ "$cancelAt", "$createDate" ] },
1,
0
]
}
},
}
},
{
$sort:
{
_id: 1
}
}
]
)
activationsM, customersM, cancelsM, are supposed to be the counting per month, independently from the _id field, but I have realized that, this query relieves in results matching the _id, and once it matches it, then it check the condintion. I need it to be sum, even if the _id doesn't match the hasActivatedAt, hasBeenCustomerAt, hasCanceledAt fields.
Hope I have explained it properly.
The desired output would be:
{ "_id" : "2018-06", "users" : 18, "activations" : 5, "activationsM" : 2, "customers" : 4, "customersM" : 0, "cancels" : 1, "cancelsM" : 0 }
{ "_id" : "2018-07", "users" : 78, "activations" : 39, "activationsM" : 31, "customers" : 11, "customersM" : 0, "cancels" : 7, "cancelsM" : 0 }
{ "_id" : "2018-08", "users" : 115, "activations" : 49, "activationsM" : 38, "customers" : 18, "customersM" : 0, "cancels" : 8, "cancelsM" : 0 }
Being fiedlM the total count for the correspondient field date and the _id date field.
Thanks.

Try as below:
db.collection.aggregate([
{
$facet: {
"TOTAL_ACTIVATION": [
{
$group: {
_id: "$track.hasActivated",
total: { "$sum": 1 },
"totalActiveCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasActivated", true ] },
1,
0
]}
},
"totalNonActiveCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasActivated", false ] },
1,
0
]}
},
}
}
],
"TOTAL_CUSTOMERS": [
{
$group: {
_id: "$track.hasBeenCustomer",
total: { "$sum": 1 },
"totalCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasBeenCustomer", true ] },
1,
0
]}
},
"totalNonCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasBeenCustomer", false ] },
1,
0
]}
},
}
}
],
"TOTAL_CANCELLED": [
{
$group: {
_id: "$track.hasCanceled",
total: { "$sum": 1 },
"totalCancelledCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasCanceled", true ] },
1,
0
]}
},
"totalNonCancelledCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasCanceled", false ] },
1,
0
]}
},
}
}
],
"MONTHLY_ACTIVATION" : [
{
$group: {
_id: {
year: { $year: "$track.hasActivatedAt" },
month: { $month: "$track.hasActivatedAt" }
},
totalThisMonth: { $sum : 1},
"totalActiveCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasActivated", true ] },
1,
0
]}
},
"totalNonActiveCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasActivated", false ] },
1,
0
]}
},
}
}
],
"MONTHLY_CUSTOMER" : [
{
$group: {
_id: {
year: { $year: "$track.hasBeenCustomerAt" },
month: { $month: "$track.hasBeenCustomerAt" }
},
totalThisMonth: { $sum : 1},
"totalCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasBeenCustomer", true ] },
1,
0
]}
},
"totalNonCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasBeenCustomer", false ] },
1,
0
]}
},
}
}
],
"MONTHLY_CANCELLED" : [
{
$group: {
_id: {
year: { $year: "$track.hasCanceledAt" },
month: { $month: "$track.hasCanceledAt" }
},
totalThisMonth: { $sum : 1},
"totalCancelledCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasCanceled", true ] },
1,
0
]}
},
"totalNonCancelledCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasCanceled", false ] },
1,
0
]}
},
}
}
]
}
}
])
Result of this will be as below:
{
"TOTAL_ACTIVATION" : [
{
"_id" : true,
"total" : 4,
"totalActiveCustomer" : 4,
"totalNonActiveCustomer" : 0
}
],
"TOTAL_CUSTOMERS" : [
{
"_id" : true,
"total" : 4,
"totalCustomer" : 4,
"totalNonCustomer" : 0
}
],
"TOTAL_CANCELLED" : [
{
"_id" : true,
"total" : 4,
"totalCancelledCustomer" : 4,
"totalNonCancelledCustomer" : 0
}
],
"MONTHLY_ACTIVATION" : [
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(10)
},
"totalThisMonth" : 1,
"totalActiveCustomer" : 1,
"totalNonActiveCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(9)
},
"totalThisMonth" : 1,
"totalActiveCustomer" : 1,
"totalNonActiveCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(8)
},
"totalThisMonth" : 2,
"totalActiveCustomer" : 2,
"totalNonActiveCustomer" : 0
}
],
"MONTHLY_CUSTOMER" : [
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(12)
},
"totalThisMonth" : 1,
"totalCustomer" : 1,
"totalNonCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(3)
},
"totalThisMonth" : 1,
"totalCustomer" : 1,
"totalNonCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(11)
},
"totalThisMonth" : 1,
"totalCustomer" : 1,
"totalNonCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(2)
},
"totalThisMonth" : 1,
"totalCustomer" : 1,
"totalNonCustomer" : 0
}
],
"MONTHLY_CANCELLED" : [
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(1)
},
"totalThisMonth" : 1,
"totalCancelledCustomer" : 1,
"totalNonCancelledCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(5)
},
"totalThisMonth" : 1,
"totalCancelledCustomer" : 1,
"totalNonCancelledCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(4)
},
"totalThisMonth" : 2,
"totalCancelledCustomer" : 2,
"totalNonCancelledCustomer" : 0
}
]
}

Related

MongoDB aggregate multiple group by top fields and array fields

My collection will look like this,
{
"_id" : ObjectId("591c5971240033283736860a"),
"status" : "Done",
"createdDate" : ISODate("2017-05-17T14:09:20.653Z")
"communications" : [
{
"communicationUUID" : "df07948e-4a14-468e-beb1-db55ff72b215",
"communicationType" : "CALL",
"recipientId" : 12345,
"createdDate" : ISODate("2017-05-18T14:09:20.653Z")
"callResponse" : {
"Status" : "completed",
"id" : "dsd45554545ds92a9bd2c12e0e6436d",
}
}
]}
{
"_id" : ObjectId("45sdsd59124003345121450a"),
"status" : "ToDo",
"createdDate" : ISODate("2017-05-17T14:09:20.653Z")
"communications" : [
{
"communicationUUID" : "45sds55-4a14-468e-beb1-db55ff72b215",
"communicationType" : "CALL",
"recipientId" : 1234,
"createdDate" : ISODate("2017-05-18T14:09:20.653Z")
"callResponse" : {
"Status" : "completed",
"id" : "84fe862f1924455dsds5556436d",
}
}
]}
Currently I am writing two aggregate query to achieve my requirement and my query will be below
db.collection.aggregate(
{ $project: {
dayMonthYear: { $dateToString: { format: "%d/%m/%Y", date: "$createdDate" } },
status: 1,
}},
{ $group: {
_id: "$dayMonthYear",
Pending: { $sum: { $cond : [{ $eq : ["$status", "ToDo"]}, 1, 0]} },
InProgress: { $sum: { $cond : [{ $eq : ["$status", "InProgress"]}, 1, 0]} },
Done: { $sum: { $cond : [{ $eq : ["$status", "Done"]}, 1, 0]} },
Total: { $sum: 1 }
}}
My output will be,
{"_id" : "17/05/2017", "Pending" : 1.0, "InProgress" : 0.0, "Done" : 1.0, "Total" : 2.0 }
Using above query I can able to get count but I need to find the count based on communication Status too so I am writing one more query to achieve,
db.collection.aggregate(
{"$unwind":"$communications"},
{ $project: {
dayMonthYear: { $dateToString: { format: "%d/%m/%Y", date: "$createdDate" } },
communications: 1
}},
{ "$group": {
_id: "$dayMonthYear",
"total_call": { $sum: { $cond : [{ $or : [ { $eq: [ "$communications.callResponse.Status", "failed"] },
{ $eq: [ "$communications.callResponse.Status", "busy"] },
{ $eq: [ "$communications.callResponse.Status", "completed"] },
{ $eq: [ "$communications.callResponse.Status", "no-answer"] }
]}, 1, 0 ] }},
"engaged": { $addToSet: { $cond : [{ $eq : ["$communications.callResponse.Status", "completed"]},
"$communications.recipientId", "null" ]} },
"not_engaged": { $addToSet: { $cond: [{ $or : [ { $eq: [ "$communications.callResponse.Status", "failed"] },
{ $eq: [ "$communications.callResponse.Status", "busy"] },
{ $eq: [ "$communications.callResponse.Status", "no-answer"] } ]},
"$communications.recipientId", "null" ] }}
}},
{ "$project": {
"_id": 1,
"total_call": 1,
"engaged": { "$setDifference": [ "$ngaged", ["null"] ] },
"not_engaged": { "$setDifference": [ "$not_engaged", ["null"] ] },
}},
{ "$project": {
"total_call": 1,
"engaged": { "$size": "$engaged" },
"not_engaged": { "$size": { "$setDifference": [ "$not_engaged", "$engaged" ] }},
}})
My output will be,
{"_id" : "18/05/2017", "total_call" : 2.0, "engaged" : 2, "not_engaged" : 0}
Using above query I can able to get count but I want to achieve it in single query
I am looking for output like
{"_id":"17/05/2017", "Pending" : 1.0, "InProgress" : 0.0, "Done" : 1.0, "total_call" : 0, "engaged" : 0, "not_engaged" : 0}
{"_id":"18/05/2017", "Pending" : 0.0, "InProgress" : 0.0, "Done" : 0.0, "total_call" : 2, "engaged" : 2, "not_engaged" : 0}
Can anyone suggest or provide me good way to get above result.
You can use $concatArrays to merge the status& createdDate documents followed by $group to count the occurrences.
db.collection.aggregate([
{
"$project": {
"statusandcreateddate": {
"$concatArrays": [
[
{
"status": "$status",
"createdDate": "$createdDate"
}
],
{
"$map": {
"input": "$communications",
"as": "l",
"in": {
"status": "$$l.callResponse.Status",
"createdDate": "$$l.createdDate"
}
}
}
]
}
}
},
{
"$unwind": "$statusandcreateddate"
},
{
"$group": {
"_id": {
"$dateToString": {
"format": "%d/%m/%Y",
"date": "$statusandcreateddate.createdDate"
}
},
"total_call": {
"$sum": {
"$cond": [
{
"$or": [
{
"$eq": [
"$statusandcreateddate.status",
"failed"
]
},
{
"$eq": [
"$statusandcreateddate.status",
"busy"
]
},
{
"$eq": [
"$statusandcreateddate.status",
"completed"
]
},
{
"$eq": [
"$statusandcreateddate.status",
"no-answer"
]
}
]
},
1,
0
]
}
},
"engaged": {
"$sum": {
"$cond": [
{
"$eq": [
"$statusandcreateddate.status",
"completed"
]
},
1,
0
]
}
},
"not_engaged": {
"$sum": {
"$cond": [
{
"$or": [
{
"$eq": [
"$statusandcreateddate.status",
"failed"
]
},
{
"$eq": [
"$statusandcreateddate.status",
"busy"
]
},
{
"$eq": [
"$statusandcreateddate.status",
"no-answer"
]
}
]
},
1,
0
]
}
},
"Pending": {
"$sum": {
"$cond": [
{
"$eq": [
"$statusandcreateddate.status",
"ToDo"
]
},
1,
0
]
}
},
"InProgress": {
"$sum": {
"$cond": [
{
"$eq": [
"$statusandcreateddate.status",
"InProgress"
]
},
1,
0
]
}
},
"Done": {
"$sum": {
"$cond": [
{
"$eq": [
"$statusandcreateddate.status",
"Done"
]
},
1,
0
]
}
}
}
}
])

Mongodb use multiple group operator in single aggregation

I am using mongodb aggregation for getting counts of different fields. Here are some documents from the mobile collection:-
{
"title": "Moto G",
"manufacturer": "Motorola",
"releasing": ISODate("2011-03-00T10:26:48.424Z"),
"rating": "high"
}
{
"title": "Asus Zenfone 2",
"manufacturer": "Asus",
"releasing": ISODate("2014-10-00T10:26:48.424Z"),
"rating": "high"
}
{
"title": "Moto Z",
"manufacturer": "Motorola",
"releasing": ISODate("2016-10-12T10:26:48.424Z"),
"rating": "none"
}
{
"title": "Asus Zenfone 3",
"manufacturer": "Asus",
"releasing": ISODate("2016-08-00T10:26:48.424Z"),
"rating": "medium"
}
I can find manufacturer and rating counts but this fails:
db.mobile.aggregate([
{
$group: { _id: "$manufacturer", count: { $sum: 1 } }
}, {
$group: { _id: "$rating", count: { $sum: 1 } }
}
])
Output:-
{
"_id" : null,
"count" : 2.0
}
Expected Output something like:-
{
"_id":"Motorola",
"count" : 2.0
}
{
"_id":"Asus",
"count" : 2.0
}
{
"_id":"high",
"count" : 2.0
}
{
"_id":"none",
"count" : 1.0
}
{
"_id":"medium",
"count" : 1.0
}
I believe you are after an aggregation operation that groups the documents by the manufacturer and rating keys, then do a further group on the manufacturer while aggregating the ratings per manufacturer, something like the following pipeline:
db.mobile.aggregate([
{
"$group": {
"_id": {
"manufacturer": "$manufacturer",
"rating": "$rating"
},
"count": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.manufacturer",
"total": { "$sum": 1 },
"counts": {
"$push": {
"rating": "$_id.rating",
"count": "$count"
}
}
}
}
])
Sample Output
/* 1 */
{
"_id" : "Motorola",
"total" : 2,
"counts" : [
{
"rating" : "high",
"count" : 1
},
{
"rating" : "none",
"count" : 1
}
]
}
/* 2 */
{
"_id" : "Asus",
"total" : 2,
"counts" : [
{
"rating" : "high",
"count" : 1
},
{
"rating" : "medium",
"count" : 1
}
]
}
or if you are after a more "flat" or "denormalised" result, run this aggregate operation:
db.mobile.aggregate([
{
"$group": {
"_id": "$manufacturer",
"total": { "$sum": 1 },
"high_ratings": {
"$sum": {
"$cond": [ { "$eq": [ "$rating", "high" ] }, 1, 0 ]
}
},
"medium_ratings": {
"$sum": {
"$cond": [ { "$eq": [ "$rating", "medium" ] }, 1, 0 ]
}
},
"low_ratings": {
"$sum": {
"$cond": [ { "$eq": [ "$rating", "low" ] }, 1, 0 ]
}
},
"none_ratings": {
"$sum": {
"$cond": [ { "$eq": [ "$rating", "none" ] }, 1, 0 ]
}
}
}
}
])
Sample Output
/* 1 */
{
"_id" : "Motorola",
"total" : 2,
"high_ratings" : 1,
"medium_ratings" : 0,
"low_ratings" : 0,
"none_ratings" : 1
}
/* 2 */
{
"_id" : "Asus",
"total" : 2,
"high_ratings" : 1,
"medium_ratings" : 1,
"low_ratings" : 0,
"none_ratings" : 0
}

$push and $sum with the aggregation framework on sub-documents

I've a data as follows:
{
"_id" : ObjectId("55d4410544c96d6f6578f893"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteName": "Suite1",
"suiteStatus" : "PASS",
},
{
"suiteName": "Suite2",
"suiteStatus" : "FAIL",
},
{
"suiteName": "Suite3",
"suiteStatus" : "PASS",
}
],
"runEndTime" : ISODate("2015-08-19T08:40:47.049Z")
}
{
"_id" : ObjectId("55d4410544c96d6f6578f894"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteName": "Suite1",
"suiteStatus" : "PASS",
},
{
"suiteName": "Suite2",
"suiteStatus" : "FAIL",
},
{
"suiteName": "Suite3",
"suiteStatus" : "FAIL",
}
],
"runEndTime" : ISODate("2015-08-19T08:50:47.049Z")
}
And I was trying to get the result like this:
{
"executionProject": "Project1",
"data": [
{
"date": "2015-08-19 08:40:47",
"suitePass": 2,
"suiteFail": 1
},
{
"date": "2015-08-19 08:50:47",
"suitePass": 1,
"suiteFail": 2
}
]
}
Here I'm trying to group by executionProject and push the runEndTime and the pass and fail counts of suites to the result.
I tried this, but giving me wrong way of projection:
db.testruns.aggregate([
{
$project: {
executionProject: "$executionProject",
runEndTime: "$runEndTime",
suiteList: "$suiteList"
}
},
{
$unwind: "$suiteList"
},
{
$group: {
_id: "$executionProject",
runEndTime: {
$addToSet: "$runEndTime"
},
suite_pass: {
$sum: {
$cond: {
"if": {
$eq: ["$suiteList.suiteStatus", "PASS"]
},
"then": 1,
"else": 0
}
}
}
}
},
{
$group: {
_id: "$_id",
runEndTime: { $push: {runTime: "$runEndTime", suite_pass: "$suite_pass"} }
}
},
{
$project: {
executionProject: "$_id",
runEndTime: "$runEndTime",
_id: 0
}
}
]);
First you need to group by the document to get the suite totals, then you add to the array as you group on the project. Also don't forget to "sort" if you want things in order:
[
{ "$unwind": "$suiteList" },
{ "$group": {
"_id": "$_id",
"executionProject": { "$first": "$executionProject" },
"suite-pass": {
"$sum": {
"$cond": [
{ "$eq": [ "$suiteList.suiteStatus", "PASS" ] },
1,
0
]
}
},
"suite-fail": {
"$sum": {
"$cond": [
{ "$eq": [ "$suiteList.suiteStatus", "FAIL" ] },
1,
0
]
}
},
"date": { "$first": "$runEndTime" }
}},
{ "$sort": { "executionProject": 1, "date": 1 } },
{ "$group": {
"_id": "$executionProject",
"data": {
"$push": {
"suite-pass": "$suite-pass",
"suite-fail": "$suite-fail",
"date": "$date"
}
}
}}
]
Produces:
{
"_id" : "Project1",
"data" : [
{
"suite-pass" : 2,
"suite-fail" : 1,
"date" : ISODate("2015-08-19T08:40:47.049Z")
},
{
"suite-pass" : 1,
"suite-fail" : 2,
"date" : ISODate("2015-08-19T08:50:47.049Z")
}
]
}

Aggregate with count of sub documents matching the condition and grouping

I've collections of documents as like as below:
{
"_id" : ObjectId("55d4410544c96d6f6578f893"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteStatus" : "PASS"
}
],
"runEndTime" : ISODate("2015-08-19T08:40:47.049Z"),
"runStartTime" : ISODate("2015-08-19T08:40:37.621Z"),
"runStatus" : "PASS",
"__v" : 1
}
{
"_id" : ObjectId("55d44eb4c0422e7b8bffe76b"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteStatus" : "PASS"
}
],
"runEndTime" : ISODate("2015-08-19T09:39:13.528Z"),
"runStartTime" : ISODate("2015-08-19T09:39:00.406Z"),
"runStatus" : "PASS",
"__v" : 1
}
{
"_id" : ObjectId("55d44f0bc0422e7b8bffe76f"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteStatus" : "FAIL"
}
],
"runEndTime" : ISODate("2015-08-19T09:46:31.108Z"),
"runStartTime" : ISODate("2015-08-19T09:40:27.377Z"),
"runStatus" : "PASS",
"__v" : 1
}
{
"_id" : ObjectId("55d463d0c0422e7b8bffe789"),
"executionProject" : "Project2",
"suiteList" : [
{
"suiteStatus" : "PASS"
},
{
"suiteStatus" : "PASS"
}
],
"runEndTime" : ISODate("2015-08-19T11:09:52.537Z"),
"runStartTime" : ISODate("2015-08-19T11:09:04.539Z"),
"runStatus" : "FAIL",
"__v" : 1
}
{
"_id" : ObjectId("55d464ebc0422e7b8bffe7c2"),
"executionProject" : "Project3",
"suiteList" : [
{
"suiteStatus" : "FAIL"
}
],
"runEndTime" : ISODate("2015-08-19T11:18:41.460Z"),
"runStartTime" : ISODate("2015-08-19T11:13:47.268Z"),
"runStatus" : "FAIL",
"__v" : 10
}
And I'm expecting output as follows:
[
{
"executionProject": "Project1",
"suite-pass": 0,
"suite-fail": 1,
"runEndTime": ISODate("2015-08-19T09:46:31.108Z")
},
{
"executionProject": "Project2",
"suite-pass": 2,
"suite-fail": 0,
"runEndTime": ISODate("2015-08-19T11:09:52.537Z")
},
{
"executionProject": "Project3",
"suite-pass": 0,
"suite-fail": 1,
"runEndTime": ISODate("2015-08-19T11:18:41.460Z")
},
]
I want to group by project and order by runEndTime and show the pass and fail counts of suiteList.
I tried this as suggested by Blakes in Mongodb: Group by element and show the sub-document count based on condition and sort the document by date:
db.testruns.aggregate([
{ "$sort": { "runEndTime": 1 } },
{ "$group": {
"_id": "$executionProject",
"suite-pass": {
"$last": {
"$cond": [
{ "$anyElementTrue": {
"$map": {
"input": "$suiteList",
"as": "suite",
"in": {
"$eq": [ "$$suite.suiteStatus", "PASS" ]
}
}
}},
1,
0
]
}
},
"suite-fail": {
"$last": {
"$cond": [
{ "$anyElementTrue": {
"$map": {
"input": "$suiteList",
"as": "suite",
"in": {
"$eq": [ "$$suite.suiteStatus", "FAIL" ]
}
}
}},
1,
0
]
}
},
"runEndTime": { "$last": "$runEndTime" }
}},
{ "$sort": { "runEndTime": 1 } }
]);
I was expecting the suite-pass count for Project2 as 2 since there are 2 elements in suiteList, but it returns 1.
You should have read the answer properly, as there already was another alternate listing and explanation of why the expected result you want from the one you used would be different.
Instead you want this one, which respects the possible multiple "PASS" or "FAIL":
Model.aggregate(
[
{ "$sort": { "executionProject": 1, "runEndTime": 1 } },
{ "$group": {
"_id": "$executionProject",
"suiteList": { "$last": "$suiteList" },
"runEndTime": { "$last": "$runEndTime" }
}},
{ "$unwind": "$suiteList" },
{ "$group": {
"_id": "$_id",
"suite-pass": {
"$sum": {
"$cond": [
{ "$eq": [ "$suiteList.suiteStatus", "PASS" ] },
1,
0
]
}
},
"suite-fail": {
"$sum": {
"$cond": [
{ "$eq": [ "$suiteList.suiteStatus", "FAIL" ] },
1,
0
]
}
},
"runEndTime": {"$first": "$runEndTime"}
}},
{ "$sort": { "runEndTime": 1 }}
],
function(err,result) {
}
);
Which is sort of a "combination" of approaches. The first is to get the "last" by runTime as you were expecting. The next is to break down the array and this time actually "sum up" the possible occurances of pass or fail, rather than just record a 1 for either pass or fail in the array, the actual "pass" or "fail" are counted.
With results:
{
"_id" : "Project1",
"suite-pass" : 0,
"suite-fail" : 1,
"runEndTime" : ISODate("2015-08-19T09:46:31.108Z")
}
{
"_id" : "Project2",
"suite-pass" : 2,
"suite-fail" : 0,
"runEndTime" : ISODate("2015-08-19T11:09:52.537Z")
}
{
"_id" : "Project3",
"suite-pass" : 0,
"suite-fail" : 1,
"runEndTime" : ISODate("2015-08-19T11:18:41.460Z")
}
Unwind suiteList and used $sum in group as below :
db.testruns.aggregate({
"$unwind": "$suiteList"
}, {
"$group": {
"_id": "$executionProject",
"suite-pass": {
"$sum": {
"$cond": {
"if": {
"$eq": ["$suiteList.suiteStatus", "PASS"]
},
"then": 1,
"else": 0
}
}
},
"suite-fail": {
"$sum": {
"$cond": {
"if": {
"$eq": ["$suiteList.suiteStatus", "FAIL"]
},
"then": 1,
"else": 0
}
}
},
"runEndTime": {
"$last": "$runEndTime"
}
}
}, {
"$sort": {
"runEndTime": 1
}
})

Group Multiple Values in Aggregation

I want to group the all field of a collection with unique total. Let's assume there is collection like this:
id country state operator
121 IN HR AIRTEL
212 IN MH AIRTEL
213 US LA AT&T
214 UK JK VODAFONE
Output should be like this:
{
"country": { "IN": 2, "US":1, "UK":1 },
"state": { "HR":1, "MH":1, "LA":1, "JK": 1 },
"operator": { "AIRTEL":2, "AT&T": 1, "VODAFONE": 1 }
}
I am trying to use mongo aggregation framework, but can't really think how to do this?
I find out some similar to your output using aggregation check below code
db.collectionName.aggregate({
"$group": {
"_id": null,
"countryOfIN": {
"$sum": {
"$cond": [{
$eq: ["$country", "IN"]
}, 1, 0]
}
},
"countryOfUK": {
"$sum": {
"$cond": [{
$eq: ["$country", "UK"]
}, 1, 0]
}
},
"countryOfUS": {
"$sum": {
"$cond": [{
$eq: ["$country", "US"]
}, 1, 0]
}
},
"stateOfHR": {
"$sum": {
"$cond": [{
$eq: ["$state", "HR"]
}, 1, 0]
}
},
"stateOfMH": {
"$sum": {
"$cond": [{
$eq: ["$state", "MH"]
}, 1, 0]
}
},
"stateOfLA": {
"$sum": {
"$cond": [{
$eq: ["$state", "LA"]
}, 1, 0]
}
},
"stateOfJK": {
"$sum": {
"$cond": [{
$eq: ["$state", "JK"]
}, 1, 0]
}
},
"operatorOfAIRTEL": {
"$sum": {
"$cond": [{
$eq: ["$operator", "AIRTEL"]
}, 1, 0]
}
},
"operatorOfAT&T": {
"$sum": {
"$cond": [{
$eq: ["$operator", "AT&T"]
}, 1, 0]
}
},
"operatorOfVODAFONE": {
"$sum": {
"$cond": [{
$eq: ["$operator", "VODAFONE"]
}, 1, 0]
}
}
}
}, {
"$group": {
"_id": null,
"country": {
"$push": {
"IN": "$countryOfIN",
"UK": "$countryOfUK",
"US": "$countryOfUS"
}
},
"STATE": {
"$push": {
"HR": "$stateOfHR",
"MH": "$stateOfMH",
"LA": "$stateOfLA",
"JK": "$stateOfJK"
}
},
"operator": {
"$push": {
"AIRTEL": "$operatorOfAIRTEL",
"AT&T": "$operatorOfAT&T",
"VODAFONE": "$operatorOfVODAFONE"
}
}
}
}, {
"$project": {
"_id": 0,
"country": 1,
"STATE": 1,
"operator": 1
}
})
using $cond created groups of matched data and pushed them in second groups to combine.
An output format like you are looking for is not really suited to the aggregation framework since you are tranforming part of your data in to "key" names. The aggregation framework does not do this but rather sticks to database "best practice" as does not transform "data" to "key" names in any way.
You can perform a mapReduce operation instead with allows more flexibilty with the manipulation, but not as good performance due to the need to use JavaScript code to perform the manipulation:
db.collection.mapReduce(
function () {
var obj = {},
doc = this;
delete doc._id;
Object.keys(doc).forEach(function(key) {
obj[key] = {};
obj[key][doc[key]] = 1;
});
emit( null, obj );
},
function (key,values) {
var result = {};
values.forEach(function(value) {
Object.keys(value).forEach(function(outerKey) {
Object.keys(value[outerKey]).forEach(function(innerKey) {
if ( !result.hasOwnProperty(outerKey) ) {
result[outerKey] = {};
}
if ( result[outerKey].hasOwnProperty(innerKey) ) {
result[outerKey][innerKey] += value[outerKey][innerKey];
} else {
result[outerKey][innerKey] = value[outerKey][innerKey];
}
});
});
});
return result;
},
{ "out": { "inline": 1 } }
)
And in the stucture that applies to all mapReduce results:
{
"results" : [
{
"_id" : null,
"value" : {
"country" : {
"IN" : 2,
"US" : 1,
"UK" : 1
},
"state" : {
"HR" : 1,
"MH" : 1,
"LA" : 1,
"JK" : 1
},
"operator" : {
"AIRTEL" : 2,
"AT&T" : 1,
"VODAFONE" : 1
}
}
}
]
}
For the aggregation framework itself, it is better suited to producing aggregation results that are more consistently structured:
db.mapex.aggregate([
{ "$project": {
"country": 1,
"state": 1,
"operator": 1,
"type": { "$literal": ["country","state","operator"] }
}},
{ "$unwind": "$type" },
{ "$group": {
"_id": {
"type": "$type",
"key": { "$cond": {
"if": { "$eq": [ "$type", "country" ] },
"then": "$country",
"else": { "$cond": {
"if": { "$eq": [ "$type", "state" ] },
"then": "$state",
"else": "$operator"
}}
}}
},
"count": { "$sum": 1 }
}}
])
Which would output:
{ "_id" : { "type" : "state", "key" : "JK" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "UK" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "US" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "AT&T" }, "count" : 1 }
{ "_id" : { "type" : "state", "key" : "LA" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "AIRTEL" }, "count" : 2 }
{ "_id" : { "type" : "state", "key" : "MH" }, "count" : 1 }
{ "_id" : { "type" : "state", "key" : "HR" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "VODAFONE" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "IN" }, "count" : 2 }
But is fairly easy to transform in client code while iterating the results:
var result = {};
db.mapex.aggregate([
{ "$project": {
"country": 1,
"state": 1,
"operator": 1,
"type": { "$literal": ["country","state","operator"] }
}},
{ "$unwind": "$type" },
{ "$group": {
"_id": {
"type": "$type",
"key": { "$cond": {
"if": { "$eq": [ "$type", "country" ] },
"then": "$country",
"else": { "$cond": {
"if": { "$eq": [ "$type", "state" ] },
"then": "$state",
"else": "$operator"
}}
}}
},
"count": { "$sum": 1 }
}}
]).forEach(function(doc) {
if ( !result.hasOwnProperty(doc._id.type) )
result[doc._id.type] = {};
result[doc._id.type][doc._id.key] = doc.count;
})
Which gives the final structure in "result":
{
"state" : {
"JK" : 1,
"LA" : 1,
"MH" : 1,
"HR" : 1
},
"country" : {
"UK" : 1,
"US" : 1,
"IN" : 2
},
"operator" : {
"AT&T" : 1,
"AIRTEL" : 2,
"VODAFONE" : 1
}
}