Compare 2 count aggregations - mongodb

I have a collection in MongoDB that looks something like the following:
{ "_id" : 1, "type" : "start", userid: "101", placementid: 1 }
{ "_id" : 2, "type" : "start", userid: "101", placementid: 2 }
{ "_id" : 3, "type" : "start", userid: "101", placementid: 3 }
{ "_id" : 4, "type" : "end", userid: "101", placementid: 1 }
{ "_id" : 5, "type" : "end", userid: "101", placementid: 2 }
and I want to group results by userid then placementid and then count the types of "start" and "end", but only when the two counts are different. In this particular example I would want to get placementid: 3 because when grouped and counted this is the only case where the counts don't match.
I've written a query that gets the 2 counts and the grouping but I can't do the filtering when counts don't match. This is my query:
db.getCollection('mycollection').aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$match: {
countStart: {$ne: "$countEnd"}
}
}
])
It seems like I'm using the match aggregation incorrectly because I'm seeing results where countStart and countEnd are the same.
{ "_id" : {"userid" : "101", "placementid" : "1"}, "countStart" : 1.0, "countEnd" : 1.0 }
{ "_id" : {"userid" : "101", "placementid" : "2"}, "countStart" : 1.0, "countEnd" : 1.0 }
{ "_id" : {"userid" : "101", "placementid" : "3"}, "countStart" : 1.0, "countEnd" : 0 }
Can anybody point into the right direction please?

To compare two fields inside $match stage you need $expr which is available in MongoDB 3.6:
db.myCollection.aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$match: {
$expr: { $ne: [ "$countStart", "$countEnd" ] }
}
}
])
If you're using older version of MongoDB you can use $redact:
db.myCollection.aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$redact: {
$cond: { if: { $ne: [ "$countStart", "$countEnd" ] }, then: "$$KEEP", else: "$$PRUNE" }
}
}
])

You run do the following pipeline to get this - no need to use $expr or $redact or anything special really:
db.mycollection.aggregate({
$group: {
_id: {
"userid": "$userid",
"placementid": "$placementid"
},
"sum": {
$sum: {
$cond: {
if: { $eq: [ "$type", "start" ] },
then: 1, // +1 for start
else: -1 // -1 for anything else
}
}
}
}
}, {
$match: {
"sum": { $ne: 0 } // only return the non matching-up ones
}
})

Related

Need help to MongoDB aggregate $group state

I have a collection of 1000 documents like this:
{
"_id" : ObjectId("628b63d66a5951db6bb79905"),
"index" : 0,
"name" : "Aurelia Gonzales",
"isActive" : false,
"registered" : ISODate("2015-02-11T04:22:39.000+0000"),
"age" : 41,
"gender" : "female",
"eyeColor" : "green",
"favoriteFruit" : "banana",
"company" : {
"title" : "YURTURE",
"email" : "aureliagonzales#yurture.com",
"phone" : "+1 (940) 501-3963",
"location" : {
"country" : "USA",
"address" : "694 Hewes Street"
}
},
"tags" : [
"enim",
"id",
"velit",
"ad",
"consequat"
]
}
I want to group those by year and gender. Like In 2014 male registration 105 and female registration 131. And finally return documents like this:
{
_id:2014,
male:105,
female:131,
total:236
},
{
_id:2015,
male:136,
female:128,
total:264
}
I have tried till group by registered and gender like this:
db.persons.aggregate([
{ $group: { _id: { year: { $year: "$registered" }, gender: "$gender" }, total: { $sum: NumberInt(1) } } },
{ $sort: { "_id.year": 1,"_id.gender":1 } }
])
which is return document like this:
{
"_id" : {
"year" : 2014,
"gender" : "female"
},
"total" : 131
}
{
"_id" : {
"year" : 2014,
"gender" : "male"
},
"total" : 105
}
Please guide to figure out from this whole.
db.collection.aggregate([
{
"$group": { //Group things
"_id": "$_id.year",
"gender": {
"$addToSet": {
k: "$_id.gender",
v: "$total"
}
},
sum: { //Sum it
$sum: "$total"
}
}
},
{
"$project": {//Reshape it
g: {
"$arrayToObject": "$gender"
},
_id: 1,
sum: 1
}
},
{
"$project": { //Reshape it
_id: 1,
"g.female": 1,
"g.male": 1,
sum: 1
}
}
])
Play
Just add one more group stage to your aggregation pipeline, like this:
db.persons.aggregate([
{ $group: { _id: { year: { $year: "$registered" }, gender: "$gender" }, total: { $sum: NumberInt(1) } } },
{ $sort: { "_id.year": 1,"_id.gender":1 } },
{
$group: {
_id: "$_id.year",
male: {
$sum: {
$cond: {
if: {
$eq: [
"$_id.gender",
"male"
]
},
then: "$total",
else: 0
}
}
},
female: {
$sum: {
$cond: {
if: {
$eq: [
"$_id.gender",
"female"
]
},
then: "$total",
else: 0
}
}
},
total: {
$sum: "$total"
}
},
}
]);
Here's the working link. We are grouping by year in this last step, and calculating the counts for gender conditionally and the total is just the total of the counts irrespective of the gender.
Besides #Gibbs mentioned in the comment which proposes the solution with 2 $group stages,
You can achieve the result as below:
$group - Group by year of registered. Add gender value into genders array.
$sort - Order by _id.
$project - Decorate output documents.
3.1. male - Get the size of array from $filter the value of "male" in "genders" array.
3.2. female - Get the size of array from $filter the value of "female" in "genders" array.
3.3. total - Get the size of "genders" array.
Propose this method if you are expected to count and return the "male" and "female" gender fields.
db.collection.aggregate([
{
$group: {
_id: {
$year: "$registered"
},
genders: {
$push: "$gender"
}
}
},
{
$sort: {
"_id": 1
}
},
{
$project: {
_id: 1,
male: {
$size: {
$filter: {
input: "$genders",
cond: {
$eq: [
"$$this",
"male"
]
}
}
}
},
female: {
$size: {
$filter: {
input: "$genders",
cond: {
$eq: [
"$$this",
"female"
]
}
}
}
},
total: {
$size: "$genders"
}
}
}
])
Sample Mongo Playground

How to use aggregate in mongodb

I have data like this in mongodb
{
"_id" : 1,
"data" : "ARIN",
"status" : "CLOSED",
"createdDate" : Date("2020-02-16T17:32:28+07:00")
},
{
"_id" : 2,
"source" : "ARIN",
"status" : "NEW",
"createdDate" : Date("2020-02-16T17:32:28+07:00")
},
{
"_id" : 3,
"data" : "APNIC",
"status" : "ONPROGRESS",
"createdDate" : Date("2020-02-17T17:32:28+07:00")
},
{
"_id" : 4,
"data" : "RIPE",
"status" : "NEW",
"createdDate" : Date("2020-02-17T17:32:28+07:00")
}
I want to result like this
{
statusNew : 2,
statusOnProgress : 1,
statusClosed : 1
statusTicketClosedDate1602 : 1,
statusTicketNewdDate1602 : 1,
statusTicketOnProgressDate1602 : 0
}
I have try use group and cond in mongodb, but to no avail. How can I write this query?
You can use this query,
db.collection.aggregate([{
$group: {
_id: null,
statusNew: { $sum: { $cond: [{ "$eq": ["$status", "NEW"] }, 1, 0] } },
statusOnProgress: { $sum: { $cond: [{ "$eq": ["$status", "ONPROGRESS"] }, 1, 0] } },
statusClosed: { $sum: { $cond: [{ "$eq": ["$status", "CLOSED"] }, 1, 0] } },
statusTicketClosedDate1602: {
$sum: {
$cond: [{
$and: [{ "$eq": ["$status", "CLOSED"] },
{ "$gte": ["$createdDate", ISODate("2020-02-16T00:00:00Z")] },
{ "$lt": ["$createdDate", ISODate("2020-02-17T00:00:00Z")] }]
}, 1, 0]
}
},
statusTicketNewdDate1602: {
$sum: {
$cond: [{
$and: [{ "$eq": ["$status", "NEW"] },
{ "$gte": ["$createdDate", ISODate("2020-02-16T00:00:00Z")] },
{ "$lt": ["$createdDate", ISODate("2020-02-17T00:00:00Z")] }]
}, 1, 0]
}
},
statusTicketOnProgressDate1602: {
$sum: {
$cond: [{
$and: [{ "$eq": ["$status", "ONPROGRESS"] },
{ "$gte": ["$createdDate", ISODate("2020-02-16T00:00:00Z")] },
{ "$lt": ["$createdDate", ISODate("2020-02-17T00:00:00Z")] }]
}, 1, 0]
}
}
}
}])

$unwind, $aggregation manipulation in mongodb nodejs

please check this query
db.billsummaryofthedays.aggregate([
{
'$match': {
'userId': ObjectId('5e43de778b57693cd46859eb'),
'adminId': ObjectId('5e43e5cdc11f750864f46820'),
'date': ISODate("2020-02-11T16:30:00Z"),
}
},
{
$lookup:
{
from: "paymentreceivables",
let: { userId: '$userId', adminId: '$adminId' },
pipeline: [
{
$match:
{
paymentReceivedOnDate:ISODate("2020-02-11T16:30:00Z"),
$expr:
{
$and:
[
{ $eq: ["$userId", "$$userId"] },
{ $eq: ["$adminId", "$$adminId"] }
]
}
}
},
{ $project: { amount: 1, _id: 0 } }
],
as: "totalPayment"
}
}, {'$unwind':'$totalPayment'},
{ $group:
{ _id:
{ date: '$date',
userId: '$userId',
adminId: '$adminId' },
totalBill:
{
$sum: '$billOfTheDay'
},
totalPayment:
{
$sum: '$totalPayment.amount'
}
}
},
}
}])
this is the result i am getting in the shell
{
"_id" : {
"date" : ISODate("2020-02-11T18:30:00Z"),
"userId" : ObjectId("5e43de778b57693cd46859eb"),
"adminId" : ObjectId("5e43e5cdc11f750864f46820")
},
"totalBill" : 1595.6799999999998,
"totalPayments" : 100
}
now this is not what i expected,i assume due to {'$unwind':'$totalPayment'} it takes out all the values from the array and because of which every document is getting counted 2 times. When i remove {'$unwind':'$totalPayment'} then totalBill sum turns out to be correct but totalPayment is 0.
I have tried several other ways but not able to achieve the desired result
Below are my collections:
// collection:billsummaryofthedays//
{
"_id" : ObjectId("5e54f784f4032c1694535c0e"),
"userId" : ObjectId("5e43de778b57693cd46859eb"),
"adminId" : ObjectId("5e43e5cdc11f750864f46820"),
"date" : ISODate("2020-02-11T16:30:00Z"),
"UID":"acex01"
"billOfTheDay" : 468,
}
{
"_id" : ObjectId("5e54f784f4032c1694535c0f"),
"UID":"bdex02"
"userId" : ObjectId("5e43de778b57693cd46859eb"),
"adminId" : ObjectId("5e43e5cdc11f750864f46820"),
"date" : ISODate("2020-02-11T16:30:00Z"),
"billOfTheDay" : 329.84,
}
// collection:paymentreceivables//
{
"_id" : ObjectId("5e43e73169fe1e3fc07eb7c5"),
"paymentReceivedOnDate" : ISODate("2020-02-11T16:30:00Z"),
"adminId" : ObjectId("5e43e5cdc11f750864f46820"),
"userId" : ObjectId("5e43de778b57693cd46859eb"),
"amount" : 20,
}
{
"_id" : ObjectId("5e43e73b69fe1e3fc07eb7c6"),
"paymentReceivedOnDate" : ISODate("2020-02-11T16:30:00Z"),
"adminId" : ObjectId("5e43e5cdc11f750864f46820"),
"userId" : ObjectId("5e43de778b57693cd46859eb"),
"amount" : 30,
}
desired result should be totalBill:797.83 i.e[468+329.84,] and totalPayment:50 i.e[30+20,] but i am getting double the expected result and even if i am able to calculate one of the value correctly the other one result 0.How to tackle this??
Since you've multiple documents with same data in billsummaryofthedays collection then you can group first & then do $lookup - that way JOIN between two collections would be 1-Vs-many rather than many-Vs-many as like it's currently written, So you can try below query for desired o/p & performance gains :
db.billsummaryofthedays.aggregate([
{
"$match": {
"userId": ObjectId("5e43de778b57693cd46859eb"),
"adminId": ObjectId("5e43e5cdc11f750864f46820"),
"date": ISODate("2020-02-11T16:30:00Z"),
}
},
{
$group: {
_id: {
date: "$date",
userId: "$userId",
adminId: "$adminId"
},
totalBill: {
$sum: "$billOfTheDay"
}
}
},
{
$lookup: {
from: "paymentreceivables",
let: {
userId: "$_id.userId",
adminId: "$_id.adminId"
},
pipeline: [
{
$match: {
paymentReceivedOnDate: ISODate("2020-02-11T16:30:00Z"),
$expr: {
$and: [
{
$eq: [
"$userId",
"$$userId"
]
},
{
$eq: [
"$adminId",
"$$adminId"
]
}
]
}
}
},
{
$project: {
amount: 1,
_id: 0
}
}
],
as: "totalPayment"
}
},
{
$addFields: {
totalPayment: {
$reduce: {
input: "$totalPayment",
initialValue: 0,
in: {
$add: [
"$$value",
"$$this.amount"
]
}
}
}
}
}
])
Test : MongoDB-Playground

Count Both Outer and Inner embedded array in a single query

{
_id: ObjectId("5dbdacc28cffef0b94580dbd"),
"comments" : [
{
"_id" : ObjectId("5dbdacc78cffef0b94580dbf"),
"replies" : [
{
"_id" : ObjectId("5dbdacd78cffef0b94580dc0")
},
]
},
]
}
How to count the number of element in comments and sum with number of relies
My approach is do 2 query like this:
1. total elements of replies
db.posts.aggregate([
{$match: {_id:ObjectId("5dbdacc28cffef0b94580dbd")}},
{ $unwind: "$comments",},
{$project:{total:{$size:"$comments.replies"} , _id: 0} }
])
2. count total elements of comments
db.posts.aggregate([
{$match: {_id:ObjectId("5dbdacc28cffef0b94580dbd")}},
{$project:{total:{$size:"$comments.replies"} , _id: 0} }
])
Then sum up both, do we have any better solution to write the query like return the sum of of total element comments + replies
You can use $reduce and $concatArrays to "merge" an inner "array of arrays" into a single list and measure the $size of that. Then simply $add the two results together:
db.posts.aggregate([
{ "$match": { _id:ObjectId("5dbdacc28cffef0b94580dbd") } },
{ "$addFields": {
"totalBoth": {
"$add": [
{ "$size": "$comments" },
{ "$size": {
"$reduce": {
"input": "$comments.replies",
"initialValue": [],
"in": {
"$concatArrays": [ "$$value", "$$this" ]
}
}
}}
]
}
}}
])
Noting that an "array of arrays" is the effect of an expression like $comments.replies, so hence the operation to make these into a single array where you can measure all elements.
Try using the $unwind to flatten the list you get from the $project before using $count.
This is another way of getting the result.
Input documents:
{ "_id" : 1, "array1" : [ { "array2" : [ { id: "This is a test!"}, { id: "test1" } ] }, { "array2" : [ { id: "This is 2222!"}, { id: "test 222" }, { id: "222222" } ] } ] }
{ "_id" : 2, "array1" : [ { "array2" : [ { id: "aaaa" }, { id: "bbbb" } ] } ] }
The query:
db.arrsizes2.aggregate( [
{ $facet: {
array1Sizes: [
{ $project: { array1Size: { $size: "$array1" } } }
],
array2Sizes: [
{ $unwind: "$array1" },
{ $project: { array2Size: { $size: "$array1.array2" } } },
],
} },
{ $project: { result: { $concatArrays: [ "$array1Sizes", "$array2Sizes" ] } } },
{ $unwind: "$result" },
{ $group: { _id: "$result._id", total1: { $sum: "$result.array1Size" }, total2: { $sum: "$result.array2Size" } } },
{ $addFields: { total: { $add: [ "$total1", "$total2" ] } } },
] )
The output:
{ "_id" : 2, "total1" : 1, "total2" : 2, "total" : 3 }
{ "_id" : 1, "total1" : 2, "total2" : 5, "total" : 7 }

Aggregate query in MongoDB

I'm trying to create some daily stats from a MongoDB table. The document contains messages that have a create-date, state (Warn, Error, Complete). I'd like to product a query that results in one record per - Date,Count of Warn, Count of Error, Count of Complete. I'm a newbie with Mongo and just learning the query language. I've tried aggregation with mixed results:
db.TransactionLogs.aggregate(
{ $group : {
_id : {
category: {$substr:["$startDate",0,10]},
term: "$Status",
},
total: { $sum : 2 }
}
})
results in multiple records per date by status:
"result" : [
{
"_id" : {
"category" : "2015-02-10",
"term" : "Completed",
},
"total" : 532
},
{
"_id" : {
"category" : "2015-02-10",
"term" : "Error",
},
"total" : 616
},
Message:
{ "_id" : "2ceda481-3dd3-480d-800d-95288edce6f2", "MID" : "02de5194-7a1d-4854-922c-934902840136", "Status" : "Completed", "firstName" : "Willy", "lastName" : "Wire", "allocation" : "100", "initEvent" : "Marriage", "system" : "Oracle", "startDate" : "2015-02-06T19:03:34.237Z", "stopDate" : "2015-02-06T19:23:34.237Z", "plan" : "445-A" }
I'm sure that its a lack of understanding of aggregation on my part. Any help or direction is greatly appreciated!
I figured it out. I needed to look at how to "pivot" in Mongo. This works:
db.TransactionLogs.aggregate([ { $project: { startdate: {$substr:["$startDate",0,10]},
cnt_e1: { $cond: [ { $eq: [ "$Status", "Error" ] }, "$count", 1 ] },
cnt_e2: { $cond: [ { $eq: [ "$Status", "Warning" ] }, "$count", 1 ] },
cnt_e3: { $cond: [ { $eq: [ "$Status", "Completed" ] }, "$count", 1 ] },
} },
{ $group: { _id: "$startdate", cnt_e1: { $sum: "$cnt_e1" }, cnt_e2: { $sum: "$cnt_e2" }, cnt_e3: { $sum: "$cnt_e3" } } },
{ $sort: { _id: 1 } },
Here's the code...
db.TransactionLogs.aggregate([ { $project: { startdate: {$substr:["$startDate",0,10]},
cnt_e1: { $cond: [ { $eq: [ "$Status", "Error" ] }, "$count", 1 ] },
cnt_e2: { $cond: [ { $eq: [ "$Status", "Warning" ] }, "$count", 1 ] },
cnt_e3: { $cond: [ { $eq: [ "$Status", "Completed" ] }, "$count", 1 ] },
} },
{ $group: { _id: "$startdate", cnt_e1: { $sum: "$cnt_e1" }, cnt_e2: { $sum: "$cnt_e2" }, cnt_e3: { $sum: "$cnt_e3" } } },
{ $sort: { _id: 1 } },