MongoDB. Aggregate the sum of two arrays sizes - mongodb

With MongoDB 3.4.10 and mongoose 4.13.6 I'm able to count sizes of two arrays on the User model:
User.aggregate()
.project({
'_id': 1,
'leftVotesCount': { '$size': '$leftVoted' },
'rightVotesCount': { '$size': '$rightVoted' }
})
where my Users are (per db.users.find())
{ "_id" : ObjectId("5a2b21e63023c6117085c240"), "rightVoted" : [ 2 ],
"leftVoted" : [ 1, 6 ] }
{ "_id" : ObjectId("5a2c0d68efde3416bc8b7020"), "rightVoted" : [ 2 ],
"leftVoted" : [ 1 ] }
Here I'm getting expected result:
[ { _id: '5a2b21e63023c6117085c240', leftVotesCount: 2, rightVotesCount: 1 },
{ _id: '5a2c0d68efde3416bc8b7020', leftVotesCount: 1, rightVotesCount: 1 } ]
Question. How can I get a cumulative value of leftVotesCount and rightVotesCount data? I tried folowing:
User.aggregate()
.project({
'_id': 1,
'leftVotesCount': { '$size': '$leftVoted' },
'rightVotesCount': { '$size': '$rightVoted' },
'votesCount': { '$add': [ '$leftVotesCount', '$rightVotesCount' ] },
'votesCount2': { '$sum': [ '$leftVotesCount', '$rightVotesCount' ] }
})
But votesCount is null and votesCount2 is 0 for both users. I'm expecting votesCount = 3 for User 1 and votesCount = 2 for User 2.

$leftVotesCount, $rightVotesCount become available only on the next stage. Try something like:
User.aggregate()
.project({
'_id': 1,
'leftVotesCount': { '$size': '$leftVoted' },
'rightVotesCount': { '$size': '$rightVoted' }
})
.project({
'_id': 1,
'leftVotesCount': 1,
'rightVotesCount': 1
'votesCount': { '$add': [ '$leftVotesCount', '$rightVotesCount' ] },
'votesCount2': { '$sum': [ '$leftVotesCount', '$rightVotesCount' ] }
})

You can't reference the project variables created in the same project stage.
You can wrap the variables in a $let expression.
User.aggregate().project({
"$let": {
"vars": {
"leftVotesCount": {
"$size": "$leftVoted"
},
"rightVotesCount": {
"$size": "$rightVoted"
}
},
"in": {
"votesCount": {
"$add": [
"$$leftVotesCount",
"$$rightVotesCount"
]
},
"leftVotesCount": "$$leftVotesCount",
"rightVotesCount": "$$rightVotesCount"
}
}
})

It turned out that $add supports nested expressions, so I was able to solve the issue by excluding intermediate variables:
User.aggregate().project({
'_id': 1,
'votesCount': { '$add': [ { '$size': '$leftVoted' }, { '$size': '$rightVoted' } ] }
});
// [ {_id: '...', votesCount: 3}, {_id: '...', votesCount: 2} ]

Related

MongoDB - Update a parent array field using another child array field

I've a collection like this
db.aa1.insertMany([
{ parentArr: [] },
{ parentArr: [
{childArr: [ {childField: 2}, {childField: 4} ]}
] },
{ parentArr: [
{childArr: []}
] },
{ parentArr: [
{childArr: [ {childField: 3}, {childField: 5} ]}
] },
])
Now I want the end result to be like
[
{ parentArr: [] },
{ parentArr: [ { childArr: [] } ] },
{ parentArr: [
{
childArr: [ {childField: 2}, {childField: 4} ],
parentField: [2, 4]
},
] },
{ parentArr: [
{
childArr: [ {childField: 3}, {childField: 5} ],
parentField: [3, 5]
}
] },
]
Here I've copied the childArr.childField values in the parentArr.parentField.
Now in plain JS, I could do something like this
parentArr.forEach(p => p.parentField = p.childArr ? p.childArr.map(c => c.childField) : [])
How can I achieve this using a MongoDB Query?
I've tried the following $push $set combinations, of course, one at a time.
For the example sake, I've written all push and set together.
db.myCollection.update(
{
"parentArr.childArr.0": {$exists: true}
},
{
$set: {"parentArr.$[].parentField": ["$parentArr.$[].childArr.$[].childField"]}
$set: {"parentArr.parentField": ["$parentArr.childArr.childField"]}
$push: {
"parentArr.$[].parentField": {$each: ["$parentArr.$[].childArr.$[].childField"]}
}
$push: {
"parentArr.parentField": {$each: ["$parentArr.childArr.childField"]}
}
},
{
upsert: true,
multi: true
}
)
If you're using Mongo version 4.2+ they have introduced pipeline'd updates meaning we now have more power when updating:
db.aa1.updateMany(
{
"parentArr.childArr.childField": {$exists: true}
},
[
{
$set: {
"parentArr.parentField": {
$reduce: {
input: {
$map: {
input: "$parentArr",
as: "parent",
in: {
$map: {
input: "$$parent.childArr",
as: "child",
in: "$$child.childField"
}
}
}
},
initialValue: [],
in: {$setUnion: ["$$value", "$$this"]}
}
}
}
}
]
)
If you're on an older Mongo version then you'll have to do it in code, as you already posted a relevant snippet I have no more to add.

Count Both Outer and Inner embedded array in a single query

{
_id: ObjectId("5dbdacc28cffef0b94580dbd"),
"comments" : [
{
"_id" : ObjectId("5dbdacc78cffef0b94580dbf"),
"replies" : [
{
"_id" : ObjectId("5dbdacd78cffef0b94580dc0")
},
]
},
]
}
How to count the number of element in comments and sum with number of relies
My approach is do 2 query like this:
1. total elements of replies
db.posts.aggregate([
{$match: {_id:ObjectId("5dbdacc28cffef0b94580dbd")}},
{ $unwind: "$comments",},
{$project:{total:{$size:"$comments.replies"} , _id: 0} }
])
2. count total elements of comments
db.posts.aggregate([
{$match: {_id:ObjectId("5dbdacc28cffef0b94580dbd")}},
{$project:{total:{$size:"$comments.replies"} , _id: 0} }
])
Then sum up both, do we have any better solution to write the query like return the sum of of total element comments + replies
You can use $reduce and $concatArrays to "merge" an inner "array of arrays" into a single list and measure the $size of that. Then simply $add the two results together:
db.posts.aggregate([
{ "$match": { _id:ObjectId("5dbdacc28cffef0b94580dbd") } },
{ "$addFields": {
"totalBoth": {
"$add": [
{ "$size": "$comments" },
{ "$size": {
"$reduce": {
"input": "$comments.replies",
"initialValue": [],
"in": {
"$concatArrays": [ "$$value", "$$this" ]
}
}
}}
]
}
}}
])
Noting that an "array of arrays" is the effect of an expression like $comments.replies, so hence the operation to make these into a single array where you can measure all elements.
Try using the $unwind to flatten the list you get from the $project before using $count.
This is another way of getting the result.
Input documents:
{ "_id" : 1, "array1" : [ { "array2" : [ { id: "This is a test!"}, { id: "test1" } ] }, { "array2" : [ { id: "This is 2222!"}, { id: "test 222" }, { id: "222222" } ] } ] }
{ "_id" : 2, "array1" : [ { "array2" : [ { id: "aaaa" }, { id: "bbbb" } ] } ] }
The query:
db.arrsizes2.aggregate( [
{ $facet: {
array1Sizes: [
{ $project: { array1Size: { $size: "$array1" } } }
],
array2Sizes: [
{ $unwind: "$array1" },
{ $project: { array2Size: { $size: "$array1.array2" } } },
],
} },
{ $project: { result: { $concatArrays: [ "$array1Sizes", "$array2Sizes" ] } } },
{ $unwind: "$result" },
{ $group: { _id: "$result._id", total1: { $sum: "$result.array1Size" }, total2: { $sum: "$result.array2Size" } } },
{ $addFields: { total: { $add: [ "$total1", "$total2" ] } } },
] )
The output:
{ "_id" : 2, "total1" : 1, "total2" : 2, "total" : 3 }
{ "_id" : 1, "total1" : 2, "total2" : 5, "total" : 7 }

Compare 2 count aggregations

I have a collection in MongoDB that looks something like the following:
{ "_id" : 1, "type" : "start", userid: "101", placementid: 1 }
{ "_id" : 2, "type" : "start", userid: "101", placementid: 2 }
{ "_id" : 3, "type" : "start", userid: "101", placementid: 3 }
{ "_id" : 4, "type" : "end", userid: "101", placementid: 1 }
{ "_id" : 5, "type" : "end", userid: "101", placementid: 2 }
and I want to group results by userid then placementid and then count the types of "start" and "end", but only when the two counts are different. In this particular example I would want to get placementid: 3 because when grouped and counted this is the only case where the counts don't match.
I've written a query that gets the 2 counts and the grouping but I can't do the filtering when counts don't match. This is my query:
db.getCollection('mycollection').aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$match: {
countStart: {$ne: "$countEnd"}
}
}
])
It seems like I'm using the match aggregation incorrectly because I'm seeing results where countStart and countEnd are the same.
{ "_id" : {"userid" : "101", "placementid" : "1"}, "countStart" : 1.0, "countEnd" : 1.0 }
{ "_id" : {"userid" : "101", "placementid" : "2"}, "countStart" : 1.0, "countEnd" : 1.0 }
{ "_id" : {"userid" : "101", "placementid" : "3"}, "countStart" : 1.0, "countEnd" : 0 }
Can anybody point into the right direction please?
To compare two fields inside $match stage you need $expr which is available in MongoDB 3.6:
db.myCollection.aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$match: {
$expr: { $ne: [ "$countStart", "$countEnd" ] }
}
}
])
If you're using older version of MongoDB you can use $redact:
db.myCollection.aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$redact: {
$cond: { if: { $ne: [ "$countStart", "$countEnd" ] }, then: "$$KEEP", else: "$$PRUNE" }
}
}
])
You run do the following pipeline to get this - no need to use $expr or $redact or anything special really:
db.mycollection.aggregate({
$group: {
_id: {
"userid": "$userid",
"placementid": "$placementid"
},
"sum": {
$sum: {
$cond: {
if: { $eq: [ "$type", "start" ] },
then: 1, // +1 for start
else: -1 // -1 for anything else
}
}
}
}
}, {
$match: {
"sum": { $ne: 0 } // only return the non matching-up ones
}
})

aggregate operation coming as null in mongodb

Inventors
.aggregate([{
$match: filter
},
{
$group: {
"_id": {
"store_id": "$store_id"
},
stockAmount: {
$sum: {
$multiply: ["$intProductQty", "$dblMRP"]
}
},
storeValue: {
$sum: "$intProductQty"
},
}
},
])
.exec(function(err, stock) {
return res.send(stock);
});
schema
{
"store_id" : "BST000433",
"strProductCode" : "9000000064775",
"dblMRP" : 25,
"intProductQty" : 1,
}
I initailized these fields(intProductQty, dblMRP, strPurchasePrice) as integer. But when I execute above command, I'm getting that three values(stockAmount, purchaseAmount, storeValue) as null.
If it is still possible that some of those values are not set, you could check if they are null with $ifNull and set them to 0 for those calculations in a $project step after the $match:
$project: {
intProductQty: { $ifNull: [ "$intProductQty", 0 ] },
dblMRP: { $ifNull: [ "$dblMRP", 0 ] },
strPurchasePrice: { $ifNull: [ "$strPurchasePrice", 0 ] }
},
Also, I guess it's not your case, but you could filter out those that are not numeric with $type:
$match: {
intProductQty: { $type: "number" },
dblMRP: { $type: "number" },
strPurchasePrice: { $type: "number" }
},

Mongo projection field that field not same

there
My question may be confuse. Let's me explain further more.
I have document for aggregation like this.
{
"metric" : "user_act",
"stream_id" : "f00001",
"values" : {
"likes" : 57,
"comments" : 0,
"shares" : 0
}
}
{
"metric" : "user_act",
"stream_id" : "f00002",
"values" : {
"likes" : 28,
"comments" : 0,
"shares" : 1
}
}
{
"metric" : "user_act",
"stream_id" : "t00001",
"values" : {
"favorites" : 5,
"retweets" : 15
}
}
I would like to calculate engagement by sum likes, comments and shares together. So before calculating, I have to projection data before grouping. I would like to mapping values.favorites to likes, values.retweets to shares and comments if don't have any data set default to 0.
I try projection like below but does not work because value of second line of $ifNull override the first line.
{
$project: {
"stream_id" : 1,
"shares": {
$ifNull: ["$values.retweets",0],
$ifNull: ["$values.shares", 0]
} ,
"likes": {
$ifNull: ["$values.favorites",0],
$ifNull: ["$values.likes", 0]
} ,
"comments": {
$ifNull: ["$values.replys",0],
$ifNull: ["$values.comments", 0]
}
}
}
Anyone any idea? Thank you in advanced.
[Update]
I try to projection like this but not work in case: how can I check the field exists?
{
$project: {
"stream_id" : 1,
"shares": {
$switch: {
branches: [
{ case: {$ne: ["$values.retweets", 0]},
then: {$ifNull: ["$values.retweets", "$values.retweets"]}
},
{ case: {$ne: ["$values.shares", 0]},
then: {$ifNull: ["$values.shares", "$values.shares"]}
}
],
default: 0
}
}
}
}
You can try $cond projection.
db.collection.aggregate({
$project: {
"stream_id" : 1,
"shares": { $cond: [ { $eq:[ { $ifNull: [ "$values.shares", 0 ] }, 0 ] },{ $ifNull: [ "$values.retweets", 0 ] }, "$values.shares" ] },
"likes": { $cond: [ { $eq:[ { $ifNull: [ "$values.likes", 0 ] }, 0 ] }, { $ifNull: [ "$values.favorites", 0 ] }, "$values.likes" ] },
"comments": { $cond: [ { $eq:[ { $ifNull: [ "$values.comments", 0] }, 0 ] }, { $ifNull: [ "$values.replys", 0 ] }, "$values.comments" ] }
}})
Using $switch
db.collection.aggregate([{
$project: {
"stream_id" : 1,
"shares": {
$switch: {
branches: [
{ case: { $ifNull: [ "$values.shares", false ] },
then: "$values.shares"
},
{ case: { $ifNull: [ "$values.retweets", false ] },
then: "$values.retweets"
}],
default: 0
}
}
}
}])