mongodb aggregate multiple arrays - mongodb

I am using MongoDB version v3.4. I have a documents collection and sample datas are like this:
{
"mlVoters" : [
{"email" : "a#b.com", "isApproved" : false}
],
"egVoters" : [
{"email" : "a#b.com", "isApproved" : false},
{"email" : "c#d.com", "isApproved" : true}
]
},{
"mlVoters" : [
{"email" : "a#b.com", "isApproved" : false},
{"email" : "e#f.com", "isApproved" : true}
],
"egVoters" : [
{"email" : "e#f.com", "isApproved" : true}
]
}
Now if i want the count of distinct email addresses for mlVoters:
db.documents.aggregate([
{$project: { mlVoters: 1 } },
{$unwind: "$mlVoters" },
{$group: { _id: "$mlVoters.email", mlCount: { $sum: 1 } }},
{$project: { _id: 0, email: "$_id", mlCount: 1 } },
{$sort: { mlCount: -1 } }
])
Result of the query is:
{"mlCount" : 2.0,"email" : "a#b.com"}
{"mlCount" : 1.0,"email" : "e#f.com"}
And if i want the count of distinct email addresses for egVoters i do the same for egVoters field. And the result of that query would be:
{"egCount" : 1.0,"email" : "a#b.com"}
{"egCount" : 1.0,"email" : "c#d.com"}
{"egCount" : 1.0,"email" : "e#f.com"}
So, I want to combine these two aggregation and get the result as following (sorted by totalCount):
{"email" : "a#b.com", "mlCount" : 2, "egCount" : 1, "totalCount":3}
{"email" : "e#f.com", "mlCount" : 1, "egCount" : 1, "totalCount":2}
{"email" : "c#d.com", "mlCount" : 0, "egCount" : 1, "totalCount":1}
How can I do this? How should the query be like? Thanks.

First you add a field voteType in each vote. This field indicates its type. Having this field, you don't need to keep the votes in two separate arrays mlVoters and egVoters; you can instead concatenate those arrays into a single array per document, and unwind afterwards.
At this point you have one document per vote, with a field that indicates which type it is. Now you simply need to group by email and, in the group stage, perform two conditional sums to count how many votes of each type there are for every email.
Finally you add a field totalCount as the sum of the other two counts.
db.documents.aggregate([
{
$addFields: {
mlVoters: {
$ifNull: [ "$mlVoters", []]
},
egVoters: {
$ifNull: [ "$egVoters", []]
}
}
},
{
$addFields: {
"mlVoters.voteType": "ml",
"egVoters.voteType": "eg"
}
},
{
$project: {
voters: { $concatArrays: ["$mlVoters", "$egVoters"] }
}
},
{
$unwind: "$voters"
},
{
$project: {
email: "$voters.email",
voteType: "$voters.voteType"
}
},
{
$group: {
_id: "$email",
mlCount: {
$sum: {
$cond: {
"if": { $eq: ["$voteType", "ml"] },
"then": 1,
"else": 0
}
}
},
egCount: {
$sum: {
$cond: {
"if": { $eq: ["$voteType", "eg"] },
"then": 1,
"else": 0
}
}
}
}
},
{
$addFields: {
totalCount: {
$sum: ["$mlCount", "$egCount"]
}
}
}
])

Related

Mongodb aggregation count by nested object key

db.artists.insertMany([
{ "_id" : 1, "achievements" : {"third_record":true, "second_record": true} },
{ "_id" : 3, "achievements" : {"sixth_record":true, "second_record": true} },
{ "_id" : 2, "achievements" : {"first_record":true, "fifth_record": true} },
{ "_id" : 4, "achievements" : {"first_record":true, "second_record": true} },
])
I would like to count how many first_record, second_record, etc achievements have been obtained, I don't know beforehand the names of the achievements. I just want it to count all the achievements matched in the first stage. How do I use aggregation to count this? I saw another question suggest using unwind but that seems to be for arrays only and not objects?
May be this:
db.collection.aggregate([
{
$project: {
as: {
$objectToArray: "$achievements"
}
}
},
{
$unwind: "$as"
},
{
$group: {
_id: "$as.k",
number: {
$sum: {
"$cond": [
{
$eq: [
"$as.v",
true
]
},
1,
0
]
}
}
}
}
])
Idea
convert object to array
unwind to get them separate
group by id, adding 1 for true, 0 for false.

$unwind, $aggregation manipulation in mongodb nodejs

please check this query
db.billsummaryofthedays.aggregate([
{
'$match': {
'userId': ObjectId('5e43de778b57693cd46859eb'),
'adminId': ObjectId('5e43e5cdc11f750864f46820'),
'date': ISODate("2020-02-11T16:30:00Z"),
}
},
{
$lookup:
{
from: "paymentreceivables",
let: { userId: '$userId', adminId: '$adminId' },
pipeline: [
{
$match:
{
paymentReceivedOnDate:ISODate("2020-02-11T16:30:00Z"),
$expr:
{
$and:
[
{ $eq: ["$userId", "$$userId"] },
{ $eq: ["$adminId", "$$adminId"] }
]
}
}
},
{ $project: { amount: 1, _id: 0 } }
],
as: "totalPayment"
}
}, {'$unwind':'$totalPayment'},
{ $group:
{ _id:
{ date: '$date',
userId: '$userId',
adminId: '$adminId' },
totalBill:
{
$sum: '$billOfTheDay'
},
totalPayment:
{
$sum: '$totalPayment.amount'
}
}
},
}
}])
this is the result i am getting in the shell
{
"_id" : {
"date" : ISODate("2020-02-11T18:30:00Z"),
"userId" : ObjectId("5e43de778b57693cd46859eb"),
"adminId" : ObjectId("5e43e5cdc11f750864f46820")
},
"totalBill" : 1595.6799999999998,
"totalPayments" : 100
}
now this is not what i expected,i assume due to {'$unwind':'$totalPayment'} it takes out all the values from the array and because of which every document is getting counted 2 times. When i remove {'$unwind':'$totalPayment'} then totalBill sum turns out to be correct but totalPayment is 0.
I have tried several other ways but not able to achieve the desired result
Below are my collections:
// collection:billsummaryofthedays//
{
"_id" : ObjectId("5e54f784f4032c1694535c0e"),
"userId" : ObjectId("5e43de778b57693cd46859eb"),
"adminId" : ObjectId("5e43e5cdc11f750864f46820"),
"date" : ISODate("2020-02-11T16:30:00Z"),
"UID":"acex01"
"billOfTheDay" : 468,
}
{
"_id" : ObjectId("5e54f784f4032c1694535c0f"),
"UID":"bdex02"
"userId" : ObjectId("5e43de778b57693cd46859eb"),
"adminId" : ObjectId("5e43e5cdc11f750864f46820"),
"date" : ISODate("2020-02-11T16:30:00Z"),
"billOfTheDay" : 329.84,
}
// collection:paymentreceivables//
{
"_id" : ObjectId("5e43e73169fe1e3fc07eb7c5"),
"paymentReceivedOnDate" : ISODate("2020-02-11T16:30:00Z"),
"adminId" : ObjectId("5e43e5cdc11f750864f46820"),
"userId" : ObjectId("5e43de778b57693cd46859eb"),
"amount" : 20,
}
{
"_id" : ObjectId("5e43e73b69fe1e3fc07eb7c6"),
"paymentReceivedOnDate" : ISODate("2020-02-11T16:30:00Z"),
"adminId" : ObjectId("5e43e5cdc11f750864f46820"),
"userId" : ObjectId("5e43de778b57693cd46859eb"),
"amount" : 30,
}
desired result should be totalBill:797.83 i.e[468+329.84,] and totalPayment:50 i.e[30+20,] but i am getting double the expected result and even if i am able to calculate one of the value correctly the other one result 0.How to tackle this??
Since you've multiple documents with same data in billsummaryofthedays collection then you can group first & then do $lookup - that way JOIN between two collections would be 1-Vs-many rather than many-Vs-many as like it's currently written, So you can try below query for desired o/p & performance gains :
db.billsummaryofthedays.aggregate([
{
"$match": {
"userId": ObjectId("5e43de778b57693cd46859eb"),
"adminId": ObjectId("5e43e5cdc11f750864f46820"),
"date": ISODate("2020-02-11T16:30:00Z"),
}
},
{
$group: {
_id: {
date: "$date",
userId: "$userId",
adminId: "$adminId"
},
totalBill: {
$sum: "$billOfTheDay"
}
}
},
{
$lookup: {
from: "paymentreceivables",
let: {
userId: "$_id.userId",
adminId: "$_id.adminId"
},
pipeline: [
{
$match: {
paymentReceivedOnDate: ISODate("2020-02-11T16:30:00Z"),
$expr: {
$and: [
{
$eq: [
"$userId",
"$$userId"
]
},
{
$eq: [
"$adminId",
"$$adminId"
]
}
]
}
}
},
{
$project: {
amount: 1,
_id: 0
}
}
],
as: "totalPayment"
}
},
{
$addFields: {
totalPayment: {
$reduce: {
input: "$totalPayment",
initialValue: 0,
in: {
$add: [
"$$value",
"$$this.amount"
]
}
}
}
}
}
])
Test : MongoDB-Playground

How can i count total documents and also grouped counts simultanously in mongodb aggregation?

I have a dataset in mongodb collection named visitorsSession like
{ip : 192.2.1.1,country : 'US', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'},
{ip : 192.3.1.8,country : 'UK', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'},
{ip : 192.5.1.4,country : 'UK', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'},
{ip : 192.8.1.7,country : 'US', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'},
{ip : 192.1.1.3,country : 'US', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'}
I am using this mongodb aggregation
[{$match: {
nsp : "/hrm.sbtjapan.com",
creationDate : {
$gte: "2019-12-15T00:00:00.359Z",
$lte: "2019-12-20T23:00:00.359Z"
},
type : "Visitors"
}}, {$group: {
_id : "$country",
totalSessions : {
$sum: 1
}
}}, {$project: {
_id : 0,
country : "$_id",
totalSessions : 1
}}, {$sort: {
country: -1
}}]
using above aggregation i am getting results like this
[{country : 'US',totalSessions : 3},{country : 'UK',totalSessions : 2}]
But i also total visitors also along with result like totalVisitors : 5
How can i do this in mongodb aggregation ?
You can use $facet aggregation stage to calculate total visitors as well as visitors by country in a single pass:
db.visitorsSession.aggregate( [
{
$match: {
nsp : "/hrm.sbtjapan.com",
creationDate : {
$gte: "2019-12-15T00:00:00.359Z",
$lte: "2019-12-20T23:00:00.359Z"
},
type : "Visitors"
}
},
{
$facet: {
totalVisitors: [
{
$count: "count"
}
],
countrySessions: [
{
$group: {
_id : "$country",
sessions : { $sum: 1 }
}
},
{
$project: {
country: "$_id",
_id: 0,
sessions: 1
}
}
],
}
},
{
$addFields: {
totalVisitors: { $arrayElemAt: [ "$totalVisitors.count" , 0 ] },
}
}
] )
The output:
{
"totalVisitors" : 5,
"countrySessions" : [
{
"sessions" : 2,
"country" : "UK"
},
{
"sessions" : 3,
"country" : "US"
}
]
}
You could be better off with two queries to do this.
To save the two db round trips following aggregation can be used which IMO is kinda verbose (and might be little expensive if documents are very large) to just count the documents.
Idea: Is to have a $group at the top to count documents and preserve the original documents using $push and $$ROOT. And then before other matches/filter ops $unwind the created array of original docs.
db.collection.aggregate([
{
$group: {
_id: null,
docsCount: {
$sum: 1
},
originals: {
$push: "$$ROOT"
}
}
},
{
$unwind: "$originals"
},
{ $match: "..." }, //and other stages on `originals` which contains the source documents
{
$group: {
_id: "$originals.country",
totalSessions: {
$sum: 1
},
totalVisitors: {
$first: "$docsCount"
}
}
}
]);
Sample O/P: Playground Link
[
{
"_id": "UK",
"totalSessions": 2,
"totalVisitors": 5
},
{
"_id": "US",
"totalSessions": 3,
"totalVisitors": 5
}
]

Compare 2 count aggregations

I have a collection in MongoDB that looks something like the following:
{ "_id" : 1, "type" : "start", userid: "101", placementid: 1 }
{ "_id" : 2, "type" : "start", userid: "101", placementid: 2 }
{ "_id" : 3, "type" : "start", userid: "101", placementid: 3 }
{ "_id" : 4, "type" : "end", userid: "101", placementid: 1 }
{ "_id" : 5, "type" : "end", userid: "101", placementid: 2 }
and I want to group results by userid then placementid and then count the types of "start" and "end", but only when the two counts are different. In this particular example I would want to get placementid: 3 because when grouped and counted this is the only case where the counts don't match.
I've written a query that gets the 2 counts and the grouping but I can't do the filtering when counts don't match. This is my query:
db.getCollection('mycollection').aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$match: {
countStart: {$ne: "$countEnd"}
}
}
])
It seems like I'm using the match aggregation incorrectly because I'm seeing results where countStart and countEnd are the same.
{ "_id" : {"userid" : "101", "placementid" : "1"}, "countStart" : 1.0, "countEnd" : 1.0 }
{ "_id" : {"userid" : "101", "placementid" : "2"}, "countStart" : 1.0, "countEnd" : 1.0 }
{ "_id" : {"userid" : "101", "placementid" : "3"}, "countStart" : 1.0, "countEnd" : 0 }
Can anybody point into the right direction please?
To compare two fields inside $match stage you need $expr which is available in MongoDB 3.6:
db.myCollection.aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$match: {
$expr: { $ne: [ "$countStart", "$countEnd" ] }
}
}
])
If you're using older version of MongoDB you can use $redact:
db.myCollection.aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$redact: {
$cond: { if: { $ne: [ "$countStart", "$countEnd" ] }, then: "$$KEEP", else: "$$PRUNE" }
}
}
])
You run do the following pipeline to get this - no need to use $expr or $redact or anything special really:
db.mycollection.aggregate({
$group: {
_id: {
"userid": "$userid",
"placementid": "$placementid"
},
"sum": {
$sum: {
$cond: {
if: { $eq: [ "$type", "start" ] },
then: 1, // +1 for start
else: -1 // -1 for anything else
}
}
}
}
}, {
$match: {
"sum": { $ne: 0 } // only return the non matching-up ones
}
})

using mongo aggregation how to replace the fields names [duplicate]

I have large collection of documents which represent some kind of events. Collection contains events for different userId.
{
"_id" : ObjectId("57fd7d00e4b011cafdb90d22"),
"userId" : "123123123",
"userType" : "mobile",
"event_type" : "clicked_ok",
"country" : "US",
"timestamp" : ISODate("2016-10-12T00:00:00.308Z")
}
{
"_id" : ObjectId("57fd7d00e4b011cafdb90d22"),
"userId" : "123123123",
"userType" : "mobile",
"event_type" : "clicked_cancel",
"country" : "US",
"timestamp" : ISODate("2016-10-12T00:00:00.308Z")
}
At midnight I need to run aggregation for all documents for the previous day. Documents need to aggregated in the way so I could get number of different events for particular userId.
{
"userId" : "123123123",
"userType" : "mobile",
"country" : "US",
"clicked_ok" : 23,
"send_message" : 14,
"clicked_cancel" : 100,
"date" : "2016-11-24",
}
During aggregation I need to perform two things:
calculate number of events for particular userId
add "date" text fields with date
Any help is greatly appreciated! :)
you can do this with aggregation like this :
db.user.aggregate([
{
$match:{
$and:[
{
timestamp:{
$gte: ISODate("2016-10-12T00:00:00.000Z")
}
},
{
timestamp:{
$lt: ISODate("2016-10-13T00:00:00.000Z")
}
}
]
}
},
{
$group:{
_id:"$userId",
timestamp:{
$first:"$timestamp"
},
send_message:{
$sum:{
$cond:[
{
$eq:[
"$event_type",
"send_message"
]
},
1,
0
]
}
},
clicked_cancel:{
$sum:{
$cond:[
{
$eq:[
"$event_type",
"clicked_cancel"
]
},
1,
0
]
}
},
clicked_ok:{
$sum:{
$cond:[
{
$eq:[
"$event_type",
"clicked_ok"
]
},
1,
0
]
}
}
}
},
{
$project:{
date:{
$dateToString:{
format:"%Y-%m-%d",
date:"$timestamp"
}
},
userId:1,
clicked_cancel:1,
send_message:1,
clicked_ok:1
}
}
])
explanation:
keep only document for a specific day in $match stage
group doc by userId and count occurrences for each event in $group stage
finally format the timestamp field into yyyy_MM-dd format in $project stage
for the data you provided, this will output
{
"_id":"123123123",
"send_message":0,
"clicked_cancel":1,
"clicked_ok":1,
"date":"2016-10-12"
}
Check the following query
db.sandbox.aggregate([{
$group: {
_id: {
userId: "$userId",
date: {
$dateToString: { format: "%Y-%m-%d", date: "$timestamp" }}
},
send_message: {
$sum: {
$cond: { if: { $eq: ["$event_type", "send_message"] }, then: 1, else: 0 } }
},
clicked_cancel: {
$sum: {
$cond: { if: { $eq: ["$event_type", "clicked_cancel"] }, then: 1, else: 0 }
}
},
clicked_ok: {
$sum: {
$cond: { if: { $eq: ["$event_type", "clicked_ok"] }, then: 1, else: 0 }
}
}
}
}])