Multiple conditional sums in mongodb aggregation - mongodb

I'm trying to return the total of requests by type based on their status:
If there is no status set, the request should be added to requested
If the status is ordered, the request should be added to ordered
If the status is arrived, the request should be added to arrived
caseRequest.aggregate([{
$group: {
_id: "$product",
suggested: {
$sum: {
$cond: [{
$ifNull: ["$status", true]
},
1, 0
]}
},
ordered: {
$sum: {
$cond: [{
$eq: ["$status", "ordered"]
},
1, 0
]
}
},
arrived: {
$sum: {
$cond: [{
$eq: ["$status", "arrived"]
},
1, 0
]
}
}
}
}
But for some reason it doesn't find any request status ordered or arrived. If in the database I have 48 requests, 45 of them without status, 2 with ordered and 1 with arrived, it returns:
[
{
_id: "xxx",
suggested: 48,
ordered: 0,
arrived: 0,
},
...
]

Try this approach,
Return the total number of requests by type based on their status
Now the simplest way to get the count of different status is to use aggregate pipeline with $group on the status field
db.stackoverflow.aggregate([{ $group: {_id: "$status", count: {$sum:1}} }])
We will be getting a result similar to this
{ "_id" : "", "count" : 2 }
{ "_id" : "arrived", "count" : 3 }
{ "_id" : "ordered", "count" : 4 }
The schema which is used to retrieve these records is very simple so that it will be easier to understand. The schema will have a parameter on the top level of the document and the value of status can be "ordered", "arrived" or empty
Schema
{ "_id" : ObjectId("5798c348d345404e7f9e0ced"), "status" : "ordered" }
The collection is populated with 9 records, with status as ordered, arrived and empty
db.stackoverflow.find()
{ "_id" : ObjectId("5798c348d345404e7f9e0ced"), "status" : "ordered" }
{ "_id" : ObjectId("5798c349d345404e7f9e0cee"), "status" : "ordered" }
{ "_id" : ObjectId("5798c34ad345404e7f9e0cef"), "status" : "ordered" }
{ "_id" : ObjectId("5798c356d345404e7f9e0cf0"), "status" : "arrived" }
{ "_id" : ObjectId("5798c357d345404e7f9e0cf1"), "status" : "arrived" }
{ "_id" : ObjectId("5798c358d345404e7f9e0cf2"), "status" : "arrived" }
{ "_id" : ObjectId("5798c35ad345404e7f9e0cf3"), "status" : "ordered" }
{ "_id" : ObjectId("5798c361d345404e7f9e0cf4"), "status" : "" }
{ "_id" : ObjectId("5798c362d345404e7f9e0cf5"), "status" : "" }
db.stackoverflow.count()
9
Hope it Helps!!

Related

How to count total of result aggregate mongodb returns?

My query use aggregate to summary all alert group by alert type, filter by houseId.
I can count only total alert of a group.
How I can count total alert of a house ( total of all group )
db.getCollection('alerts').aggregate([{
$match: {
houseId: ObjectId("609100a56ed9f8001351aee3")
}
}, {
$group: {
_id: '$type',
count: { $sum: 1 },
alerts: {
$push: {
_id: '$_id',
type: '$type'
}
}
}
}
])
Result:
[{
"_id" : "cool",
"count" : 1.0,
"alerts" : [
{
"_id" : ObjectId("61387e740dc3d853f1eee5b0"),
"type" : "cool"
}
]
}, {
"_id" : "hot",
"count" : 2.0,
"alerts" : [
{
"_id" : ObjectId("61387e740dc3d853f1eee5b0"),
"type" : "hot"
},
{
"_id" : ObjectId("61387e740dc3d853f1eee5b0"),
"type" : "hot"
}
]
}]
I expect a field total: 2 = cool alert + hot alert

Partition data around a match query during aggregation

What I have been trying to get my head around is to perform some kind of partitioning(split by predicate) in a mongo query. My current query looks like:
db.posts.aggregate([
{"$match": { $and:[ {$or:[{"toggled":false},{"toggled":true, "status":"INACTIVE"}]} , {"updatedAt":{$gte:1549786260000}} ] }},
{"$unwind" :"$interests"},
{"$group" : {"_id": {"iid": "$interests", "pid":"$publisher"}, "count": {"$sum" : 1}}},
{"$project":{ _id: 0, "iid": "$_id.iid", "pid": "$_id.pid", "count": 1 }}
])
This results in the following output:
{
"count" : 3.0,
"iid" : "INT456",
"pid" : "P789"
}
{
"count" : 2.0,
"iid" : "INT789",
"pid" : "P789"
}
{
"count" : 1.0,
"iid" : "INT123",
"pid" : "P789"
}
{
"count" : 1.0,
"iid" : "INT123",
"pid" : "P123"
}
All good so far, but then I had realized that for the documents that match the specific filter {"toggled":true, "status":"INACTIVE"}, I would rather decrement the count (-1). (considering the eventual value can be negative as well.)
Is there a way to somehow partition the data after match to make sure different grouping operations are performed for both the collection of documents?
Something that sounds similar to what I am looking for is
$mergeObjects, or maybe $reduce, but not much that I can relate from the documentation examples.
Note: I can sense, one straightforward way to deal with this would be to perform two queries, but I am looking for a single query to perform the operation.
Sample documents for the above output would be:
/* 1 */
{
"_id" : ObjectId("5d1f7******"),
"id" : "CON123",
"title" : "Game",
"content" : {},
"status" : "ACTIVE",
"toggle":false,
"publisher" : "P789",
"interests" : [
"INT456"
],
"updatedAt" : NumberLong(1582078628264)
}
/* 2 */
{
"_id" : ObjectId("5d1f8******"),
"id" : "CON456",
"title" : "Home",
"content" : {},
"status" : "INACTIVE",
"toggle":true,
"publisher" : "P789",
"interests" : [
"INT456",
"INT789"
],
"updatedAt" : NumberLong(1582078628264)
}
/* 3 */
{
"_id" : ObjectId("5d0e9******"),
"id" : "CON654",
"title" : "School",
"content" : {},
"status" : "ACTIVE",
"toggle":false,
"publisher" : "P789",
"interests" : [
"INT123",
"INT456",
"INT789"
],
"updatedAt" : NumberLong(1582078628264)
}
/* 4 */
{
"_id" : ObjectId("5d207*******"),
"id" : "CON789",
"title":"Stack",
"content" : { },
"status" : "ACTIVE",
"toggle":false,
"publisher" : "P123",
"interests" : [
"INT123"
],
"updatedAt" : NumberLong(1582078628264)
}
What I am looking forward to as a result though is
{
"count" : 1.0, (2-1)
"iid" : "INT456",
"pid" : "P789"
}
{
"count" : 0.0, (1-1)
"iid" : "INT789",
"pid" : "P789"
}
{
"count" : 1.0,
"iid" : "INT123",
"pid" : "P789"
}
{
"count" : 1.0,
"iid" : "INT123",
"pid" : "P123"
}
This aggregation gives the desired result.
db.posts.aggregate( [
{ $match: { updatedAt: { $gte: 1549786260000 } } },
{ $facet: {
FALSE: [
{ $match: { toggle: false } },
{ $unwind : "$interests" },
{ $group : { _id : { iid: "$interests", pid: "$publisher" }, count: { $sum : 1 } } },
],
TRUE: [
{ $match: { toggle: true, status: "INACTIVE" } },
{ $unwind : "$interests" },
{ $group : { _id : { iid: "$interests", pid: "$publisher" }, count: { $sum : -1 } } },
]
} },
{ $project: { result: { $concatArrays: [ "$FALSE", "$TRUE" ] } } },
{ $unwind: "$result" },
{ $replaceRoot: { newRoot: "$result" } },
{ $group : { _id : "$_id", count: { $sum : "$count" } } },
{ $project:{ _id: 0, iid: "$_id.iid", pid: "$_id.pid", count: 1 } }
] )
[ EDIT ADD ]
The output from the query using the input data from the question post:
{ "count" : 1, "iid" : "INT123", "pid" : "P789" }
{ "count" : 1, "iid" : "INT123", "pid" : "P123" }
{ "count" : 0, "iid" : "INT789", "pid" : "P789" }
{ "count" : 1, "iid" : "INT456", "pid" : "P789" }
[ EDIT ADD 2 ]
This query gets the same result with different approach (code):
db.posts.aggregate( [
{
$match: { updatedAt: { $gte: 1549786260000 } }
},
{
$unwind : "$interests"
},
{
$group : {
_id : {
iid: "$interests",
pid: "$publisher"
},
count: {
$sum: {
$switch: {
branches: [
{ case: { $eq: [ "$toggle", false ] },
then: 1 },
{ case: { $and: [ { $eq: [ "$toggle", true] }, { $eq: [ "$status", "INACTIVE" ] } ] },
then: -1 }
]
}
}
}
}
},
{
$project:{
_id: 0,
iid: "$_id.iid",
pid: "$_id.pid",
count: 1
}
}
] )
[ EDIT ADD 3 ]
NOTE:
The facet query runs the two facets (TRUE and FALSE) on the same set of documents; it is like two queries running in parallel. But, there is some duplication of code as well as additional stages for shaping the documents down the pipeline to get the desired output.
The second query avoids the code duplication, and there are much lesser stages in the aggregation pipeline. This will make difference when the input dataset has a large number of documents to process - in terms of performance. In general, lesser stages means lesser iterations of the documents (as a stage has to scan the documents which are output from the previous stage).

mongodb with groupby with multi filed count

MongoDB, I have a collection like this
/* 1 */
{
"_id" : ObjectId("5d02308e129aab55b3df814a"),
"title" : "fgdfg",
"user_id" : "5bc5dc03f6d24d29077dd362",
"click" : false,
"type_id" : "5d00a304430fee3160ac881f",
"type" : "user_notification",
"__v" : 0
}
/* 2 */
{
"_id" : ObjectId("5d02308e129aab55b3df8149"),
"title" : "fgdfg",
"user_id" : "5bc5dc03f6d24d29077dd362",
"click" : true,
"type_id" : "5d00a304430fee3160ac881f",
"type" : "user_notification",
"__v" : 0
}
/* 3 */
{
"_id" : ObjectId("5d02308e129aab55b3df8148"),
"title" : "fgdfg",
"user_id" : "5bc5dc03f6d24d29077dd362",
"click" : true,
"type_id" : "5d00a304430fee3160ac881f",
"type" : "user_notification",
"__v" : 0
}
I want with the "type_id" group by query and same query I want how many "click" field true and false of type_id.
Conclusion: I want total type_id count and total "click" field "true" count in each type id in a single query.
You can use MongoDB Group,
let query = [{
$group: {
_id: "$type_id",
"true": { $sum: { $cond: [{ $eq: ['$click', true] }, 1, 0] } },
"false": { $sum: { $cond: [{ $eq: ['$click', false] }, 1, 0] } },
},
}, {
$addFields: {
"addition": { $add: ["$true", "$false"] }
}
}]

Count of MongoDB aggregation match results

I'm working with a MongoDB collection that has a lot of duplicate keys. I regularly do aggregation queries to find out what those duplicates are, so that I can dig in and find out what is and isn't different about them.
Unfortunately the database is huge and duplicates are often intentional. What I'd like to do is to find the count of keys that have duplicates, instead of printing a result with thousands of lines of output. Is this possible?
(Side Note: I do all of my querying through the shell, so solutions that don't require external tools or a lot of code would be preferred, but I understand that's not always possible.)
Example Records:
{ "_id" : 1, "type" : "example", "key" : "111111", "value" : "abc" }
{ "_id" : 2, "type" : "example", "key" : "222222", "value" : "def" }
{ "_id" : 3, "type" : "example", "key" : "222222", "value" : "ghi" }
{ "_id" : 4, "type" : "example", "key" : "333333", "value" : "jkl" }
{ "_id" : 5, "type" : "example", "key" : "333333", "value" : "mno" }
{ "_id" : 6, "type" : "example", "key" : "333333", "value" : "pqr" }
{ "_id" : 7, "type" : "example", "key" : "444444", "value" : "stu" }
{ "_id" : 8, "type" : "example", "key" : "444444", "value" : "vwx" }
{ "_id" : 9, "type" : "example", "key" : "444444", "value" : "yz1" }
{ "_id" : 10, "type" : "example", "key" : "444444", "value" : "234" }
Here is the query that I've been using to find duplicates based on key:
db.collection.aggregate([
{
$match: {
type: "example"
}
},
{
$group: {
_id: "$key",
count: {
$sum: 1
}
}
},
{
$match: {
count: {
$gt: 1
}
}
}
])
Which gives me an output of:
{
"_id": "222222",
"count": 2
},
{
"_id": "333333",
"count": 3
},
{
"_id": "444444",
"count": 4
}
The result I want to get instead:
3
You are almost there, just missing the last $count:
db.collection.aggregate([
{
$match: {
type: "example"
}
},
{
$group: {
_id: "$key",
count: {
$sum: 1
}
}
},
{
$match: {
count: {
$gt: 1
}
}
},
{
$count: "count"
}
])
Akrion's answer seems to be correct, but I can't test it because we're on an older version of MongoDB. A coworker gave me an alternative solution that works on 3.2 (not sure about other versions).
Adding .toArray() will convert the results to an array, and you can then get the size of the array using .length.
db.collection.aggregate([
{
$match: {
type: "example"
}
},
{
$group: {
_id: "$key",
count: {
$sum: 1
}
}
},
{
$match: {
count: {
$gt: 1
}
}
}
]).toArray().length

Mongodb aggregate by day and delete duplicate value

I'm trying to clean a huge database.
Sample DB :
{
"_id" : ObjectId("59fc5249d5ab401d99f3de7f"),
"addedAt" : ISODate("2017-11-03T11:26:01.744Z"),
"__v" : 0,
"check" : 17602,
"lastCheck" : ISODate("2018-04-05T11:47:00.609Z"),
"tracking" : [
{
"timeCheck" : ISODate("2017-11-06T13:17:20.861Z"),
"_id" : ObjectId("5a0060e00f3c330012bafe39"),
"rank" : 2395,
},
{
"timeCheck" : ISODate("2017-11-06T13:22:31.254Z"),
"_id" : ObjectId("5a0062170f3c330012bafe77"),
"rank" : 2395,
},
{
"timeCheck" : ISODate("2017-11-06T13:27:40.551Z"),
"_id" : ObjectId("5a00634c0f3c330012bafebe"),
"rank" : 2379,
},
{
"timeCheck" : ISODate("2017-11-06T13:32:41.084Z"),
"_id" : ObjectId("5a0064790f3c330012baff03"),
"rank" : 2395,
},
{
"timeCheck" : ISODate("2017-11-06T13:37:51.012Z"),
"_id" : ObjectId("5a0065af0f3c330012baff32"),
"rank" : 2379,
},
{
"timeCheck" : ISODate("2017-11-07T13:37:51.012Z"),
"_id" : ObjectId("5a0065af0f3c330012baff34"),
"rank" : 2379,
}]
}
I have a lot of duplicate value but I need to clean only by day.
To obtain this for example :
{
"_id" : ObjectId("59fc5249d5ab401d99f3de7f"),
"addedAt" : ISODate("2017-11-03T11:26:01.744Z"),
"__v" : 0,
"check" : 17602,
"lastCheck" : ISODate("2018-04-05T11:47:00.609Z"),
"tracking" : [
{
"timeCheck" : ISODate("2017-11-06T13:17:20.861Z"),
"_id" : ObjectId("5a0060e00f3c330012bafe39"),
"rank" : 2395,
},
{
"timeCheck" : ISODate("2017-11-06T13:27:40.551Z"),
"_id" : ObjectId("5a00634c0f3c330012bafebe"),
"rank" : 2379,
},
{
"timeCheck" : ISODate("2017-11-07T13:37:51.012Z"),
"_id" : ObjectId("5a0065af0f3c330012baff34"),
"rank" : 2379,
}]
}
How can I aggregate by day and after delete last value duplicate?
I need to keep the values per day even if they are identical with another day.
The aggregation framework cannot update data at this stage. However, you can use the following aggregation pipeline in order to get the desired output and then use e.g. a bulk replace to update all your documents:
db.collection.aggregate({
$unwind: "$tracking" // flatten the "tracking" array into separate documents
}, {
$sort: {
"tracking.timeCheck": 1 // sort by timeCheck to allow us to use the $first operator in the next stage reliably
}
}, {
$group: {
_id: { // group by
"_id": "$_id", // "_id" and
"rank": "$tracking.rank", // "rank" and
"date": { // the "date" part of the "timeCheck" field
$dateFromParts : {
year: { $year: "$tracking.timeCheck" },
month: { $month: "$tracking.timeCheck" },
day: { $dayOfWeek: "$tracking.timeCheck" }
}
}
},
"doc": { $first: "$$ROOT" } // only keep the first document per group
}
}, {
$sort: {
"doc.tracking.timeCheck": 1 // restore ascending sort order - may or may not be needed...
}
}, {
$group: {
_id: "$_id._id", // merge everything again per "_id"
"addedAt": { $first: "$doc.addedAt" },
"__v": { $first: "$doc.__v" },
"check": { $first: "$doc.check" },
"lastCheck": { $first: "$doc.lastCheck" },
"tracking": { $push: "$doc.tracking" } // in order to join the tracking values into an array again
}
})