Count Both Outer and Inner embedded array in a single query - mongodb

{
_id: ObjectId("5dbdacc28cffef0b94580dbd"),
"comments" : [
{
"_id" : ObjectId("5dbdacc78cffef0b94580dbf"),
"replies" : [
{
"_id" : ObjectId("5dbdacd78cffef0b94580dc0")
},
]
},
]
}
How to count the number of element in comments and sum with number of relies
My approach is do 2 query like this:
1. total elements of replies
db.posts.aggregate([
{$match: {_id:ObjectId("5dbdacc28cffef0b94580dbd")}},
{ $unwind: "$comments",},
{$project:{total:{$size:"$comments.replies"} , _id: 0} }
])
2. count total elements of comments
db.posts.aggregate([
{$match: {_id:ObjectId("5dbdacc28cffef0b94580dbd")}},
{$project:{total:{$size:"$comments.replies"} , _id: 0} }
])
Then sum up both, do we have any better solution to write the query like return the sum of of total element comments + replies

You can use $reduce and $concatArrays to "merge" an inner "array of arrays" into a single list and measure the $size of that. Then simply $add the two results together:
db.posts.aggregate([
{ "$match": { _id:ObjectId("5dbdacc28cffef0b94580dbd") } },
{ "$addFields": {
"totalBoth": {
"$add": [
{ "$size": "$comments" },
{ "$size": {
"$reduce": {
"input": "$comments.replies",
"initialValue": [],
"in": {
"$concatArrays": [ "$$value", "$$this" ]
}
}
}}
]
}
}}
])
Noting that an "array of arrays" is the effect of an expression like $comments.replies, so hence the operation to make these into a single array where you can measure all elements.

Try using the $unwind to flatten the list you get from the $project before using $count.

This is another way of getting the result.
Input documents:
{ "_id" : 1, "array1" : [ { "array2" : [ { id: "This is a test!"}, { id: "test1" } ] }, { "array2" : [ { id: "This is 2222!"}, { id: "test 222" }, { id: "222222" } ] } ] }
{ "_id" : 2, "array1" : [ { "array2" : [ { id: "aaaa" }, { id: "bbbb" } ] } ] }
The query:
db.arrsizes2.aggregate( [
{ $facet: {
array1Sizes: [
{ $project: { array1Size: { $size: "$array1" } } }
],
array2Sizes: [
{ $unwind: "$array1" },
{ $project: { array2Size: { $size: "$array1.array2" } } },
],
} },
{ $project: { result: { $concatArrays: [ "$array1Sizes", "$array2Sizes" ] } } },
{ $unwind: "$result" },
{ $group: { _id: "$result._id", total1: { $sum: "$result.array1Size" }, total2: { $sum: "$result.array2Size" } } },
{ $addFields: { total: { $add: [ "$total1", "$total2" ] } } },
] )
The output:
{ "_id" : 2, "total1" : 1, "total2" : 2, "total" : 3 }
{ "_id" : 1, "total1" : 2, "total2" : 5, "total" : 7 }

Related

In MongoDB aggregation pipeline, how to project indices of embedded array that matched?

In a mongodb aggregation pipeline, I want to $project the indices of an embedded array (a sub-document) that matches a previous $match stage.
Say, I have the following docs.
{_id: '1', tags: ['aaa', 'bbb', 'ccc']},
{_id: '2', tags: ['baa', 'aaa', 'aaa']},
{_id: '3', tags: ['aac', 'cbb', 'aca']},
Now, if I query with {tags: 'aaa'}, I want to output something similar to
{_id: '1', tags: [0]},
{_id: '2', tags: [1, 2]}
db.inventory.aggregate([
{ $match : {tags : 'aaa' }},
{ $unwind : { path: "$tags", includeArrayIndex: "arrayIndex"}},
{ $match : {tags : 'aaa' }},
{ $group : {
_id : '$_id',
tags : { $push : '$arrayIndex'}
}
}
])
Output :
{ "_id" : "2", "tags" : [ NumberLong(1), NumberLong(2) ] }
{ "_id" : "1", "tags" : [ NumberLong(0) ] }
Another way :
db.inventory.aggregate([
{ $match : {tags : 'aaa' }},
{ $project : {
tags: {
$filter: {
input: {
$zip: {
inputs: [ "$tags", { $range: [0, { $size: "$tags" }] } ]
}
},
as: "tagWithIndex",
cond: {
$let: {
vars: {
tag : { $arrayElemAt: [ "$$tagWithIndex", 0 ] }
},
in: { $eq: [ "$$tag", 'aaa' ] }
}
}
}
}
}},
{ $unwind : '$tags'},
{ $group : {
_id : '$_id',
tags : {
$push : { $arrayElemAt: [ "$tags", 1]}
}
}
}
])
Output :
{ "_id" : "2", "tags" : [ 1, 2 ] }
{ "_id" : "1", "tags" : [ 0 ] }
hope this helps.
You need to $map over the $size of the $tags array to include index of the each element inside the tags array and then you can easily use $filter aggregation to exclude the elements which do contain letter aaa
db.collection.aggregate([
{ "$match": { "tags": "aaa" }},
{ "$project": {
"tags": {
"$filter": {
"input": {
"$map": {
"input": { "$range": [0, { "$size": "$tags" }] },
"in": {
"string": { "$arrayElemAt": ["$tags", "$$this"] },
"index": "$$this"
}
}
},
"cond": { "$eq": ["$$this.string", "aaa"] }
}
}
}},
{ "$project": { "tags": "$tags.index" }}
])
Output
[
{
"_id": "1",
"tags": [0]
},
{
"_id": "2",
"tags": [1, 2]
}
]
If you're searching for an array, you should use $in.
db.inventory.find( { tags: { $in: [ 'aaa' ] } } )
You can also write the same in the match. spelling is the same.
Will help for detail. That's what you're looking for.
Source : https://docs.mongodb.com/manual/reference/operator/query/in/
db.inventory.find( { "tags": { $in: 'aaa' } },
{ "tags.$": 1 } )
This is probably what you want.

MongoDB , getting the minimum & maximum of array subset

Am trying to find a way to get the minimum number of orders between
2019-03-17 and 2019-03-19 excluding 2019-03-15 from the results ..
{
"_id" : ObjectId("5c8ffdadde62bf097d54ec47"),
"productId" : "32886845998",
"orders" : [
{
"date" : ISODate("2019-03-15T00:00:00.000+0000"),
"orders" : NumberInt(9)
},
{
"date" : ISODate("2019-03-17T00:00:00.000+0000"),
"orders" : NumberInt(21)
},
{
"date" : ISODate("2019-03-18T00:00:00.000+0000"),
"orders" : NumberInt(20)
},
{
"date" : ISODate("2019-03-19T00:00:00.000+0000"),
"orders" : NumberInt(30)
}
]
}
I tried using $min and $max operator but that didn't help because it iterated through the full array to find maximum & minimum
db.products.aggregate([
{
$project: {
maximum: {
$reduce: {
input: "$orders",
initialValue: 0,
in: {
$max: [
"$$value",
{
$cond: [
{ $gte: [ "$$this.date", ISODate("2019-03-17T00:00:00.000+0000") ] },
"$$this.orders",
0
]
}
]
}
}
}
}
}
])
You can use $filter to apply filtering by orders.date and then you can apply $min and $max on filtered set:
db.col.aggregate([
{
$project: {
filteredOrders: {
$filter: {
input: "$orders",
cond: {
$and: [
{ $gte: [ "$$this.date", ISODate("2019-03-17T00:00:00.000+0000") ] },
{ $lte: [ "$$this.date", ISODate("2019-03-19T00:00:00.000+0000") ] },
]
}
}
}
}
},
{
$project: {
min: { $min: "$filteredOrders.orders" },
max: { $max: "$filteredOrders.orders" },
}
}
])

Compare 2 count aggregations

I have a collection in MongoDB that looks something like the following:
{ "_id" : 1, "type" : "start", userid: "101", placementid: 1 }
{ "_id" : 2, "type" : "start", userid: "101", placementid: 2 }
{ "_id" : 3, "type" : "start", userid: "101", placementid: 3 }
{ "_id" : 4, "type" : "end", userid: "101", placementid: 1 }
{ "_id" : 5, "type" : "end", userid: "101", placementid: 2 }
and I want to group results by userid then placementid and then count the types of "start" and "end", but only when the two counts are different. In this particular example I would want to get placementid: 3 because when grouped and counted this is the only case where the counts don't match.
I've written a query that gets the 2 counts and the grouping but I can't do the filtering when counts don't match. This is my query:
db.getCollection('mycollection').aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$match: {
countStart: {$ne: "$countEnd"}
}
}
])
It seems like I'm using the match aggregation incorrectly because I'm seeing results where countStart and countEnd are the same.
{ "_id" : {"userid" : "101", "placementid" : "1"}, "countStart" : 1.0, "countEnd" : 1.0 }
{ "_id" : {"userid" : "101", "placementid" : "2"}, "countStart" : 1.0, "countEnd" : 1.0 }
{ "_id" : {"userid" : "101", "placementid" : "3"}, "countStart" : 1.0, "countEnd" : 0 }
Can anybody point into the right direction please?
To compare two fields inside $match stage you need $expr which is available in MongoDB 3.6:
db.myCollection.aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$match: {
$expr: { $ne: [ "$countStart", "$countEnd" ] }
}
}
])
If you're using older version of MongoDB you can use $redact:
db.myCollection.aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$redact: {
$cond: { if: { $ne: [ "$countStart", "$countEnd" ] }, then: "$$KEEP", else: "$$PRUNE" }
}
}
])
You run do the following pipeline to get this - no need to use $expr or $redact or anything special really:
db.mycollection.aggregate({
$group: {
_id: {
"userid": "$userid",
"placementid": "$placementid"
},
"sum": {
$sum: {
$cond: {
if: { $eq: [ "$type", "start" ] },
then: 1, // +1 for start
else: -1 // -1 for anything else
}
}
}
}
}, {
$match: {
"sum": { $ne: 0 } // only return the non matching-up ones
}
})

MongoDb - Pop array element based on if condition

I am trying to update my mongo database which has following structure.
{
"_id" : ObjectId("5a64d076bfd103df081967ae"),
"values" : [
{
"date" : "2018-01-22",
"Price" : "1289.4075"
},
{
"date" : "2018-01-22",
"Price" : "1289.4075"
},
{
"date" : "2015-05-18",
"Price" : 1289.41
}
],
"Code" : 123456,
"schemeStatus" : "Inactive"
}
I want to compare first 2 array element's date value i.e values[0].date and values[1].date. If both matches then I want to delete values[0] so that there will be only 1 entry with that date.
You can use aggregation framework's pipeline with $out as a last stage to update your collection
db.collection.aggregate([
{
$addFields: {
sameDate: {
$let: {
vars: {
fst: { $arrayElemAt: [ "$values", 0 ] },
snd: { $arrayElemAt: [ "$values", 1 ] }
},
in: { $cond: { if: { $eq: [ "$$fst.date", "$$snd.date" ] }, then: 1, else: 0 } }
}
}
}
},
{
$project: {
_id: 1,
values : { $cond: { if: { $eq: [ "$sameDate", 0 ] }, then: "$values", else: { $slice: [ "$values", 1, { $size: "$values" } ] } } },
Code: 1,
schemeStatus: 1
}
},
{ $out: "collection" }
])
Some more important operators used here:
$cond to handle if-else logic
$let to define some helper variables
$arrayElemAt to get first and second element
$slice to pop first element

Intersection of several arrays

I have some documents having a array protperty Items.
I want to get the intercept between n docuements.
db.things.insert({name:"A", items:[1,2,3,4,5]})
db.things.insert({name:"B", items:[2,4,6,8]})
db.things.insert({name:"C", items:[1,2]})
db.things.insert({name:"D", items:[5,6]})
db.things.insert({name:"E", items:[9,10]})
db.things.insert({name:"F", items:[1,5]})
Data:
{ "_id" : ObjectId("57974a0d356baff265710a1c"), "name" : "A", "items" : [ 1, 2, 3, 4, 5 ] },
{ "_id" : ObjectId("57974a0d356baff265710a1d"), "name" : "B", "items" : [ 2, 4, 6, 8 ] },
{ "_id" : ObjectId("57974a0d356baff265710a1e"), "name" : "C", "items" : [ 1, 2 ] },
{ "_id" : ObjectId("57974a0d356baff265710a1f"), "name" : "D", "items" : [ 5, 6 ] },
{ "_id" : ObjectId("57974a0d356baff265710a20"), "name" : "E", "items" : [ 9, 10 ] },
{ "_id" : ObjectId("57974a1a356baff265710a21"), "name" : "F", "items" : [ 1, 5 ] }
For example:
things.mane.A intercept things.mane.C intercept things.mane.F:
[ 1, 2, 3, 4, 5 ] intercept [ 1, 2 ] intercept [ 1, 5 ]
Must be: [1]
I think that it's doable using $setIntersectionbut I can't find the way.
I can do it with two documents but how to do it with more ?
db.things.aggregate({$match:{"name":{$in:["A", "F"]}}},
{$group:{_id:null, "setA":{$first:"$items"}, "setF":{$last:"$items"} } },
{
"$project": {
"set1": 1,
"set2": 1,
"commonToBoth": { "$setIntersection": [ "$setA", "$setF" ] },
"_id": 0
}
}
)
{ "commonToBoth" : [ 5, 1 ] }
A solution which is not specific to the number of input items could look like so:
db.things.aggregate(
{
$match: {
"name": {
$in: ["A", "F"]
}
}
},
{
$group: {
_id: "$items",
count: {
$sum: 1
}
}
},
{
$group: {
_id: null,
totalCount: {
$sum: "$count"
},
items: {
$push: "$_id"
}
}
},
{
$unwind: {
path: "$items"
}
},
{
$unwind: {
path: "$items"
}
},
{
$group: {
_id: "$items",
totalCount: {
$first: "$totalCount"
},
count: {
$sum: 1
}
}
},
{
$project: {
_id: 1,
presentInAllDocs: {
$eq: ["$totalCount", "$count"]
}
}
},
{
$match: {
presentInAllDocs: true
}
},
{
$group: {
_id: null,
items: {
$push: "$_id"
}
}
}
)
which will output this
{
"_id" : null,
"items" : [
5,
1
]
}
Of course you can add a last $project stage to bring the result into the desired shape.
Explanation
The basic idea behind this is that when we count the number of documents and we count the number of occurrences of each item, then the items with a count equal to the total document count appeared in each document and are therefore in the intersection result.
This idea has one important assumption: your items arrays have no duplicates in it (i.e. they are sets). If this assumption is wrong, then you would have to insert an additional stage at the beginning of the pipeline to turn the arrays into sets.
One could also build this pipeline in a different and probably shorter way but I tried to keep the resource usage as low as possible and therefore added possibly unnecessary (from the functional point of view) stages. For example, the second stage groups by the items array as my assumption is that there are far fewer different values/arrays than documents so the rest of the pipeline has to work with a fraction of the initial document count. However, from the functional point of view, we just need the total count of documents and therefore we could skip that stage and just make a $group stage counting all documents and pushing them into an array for later usage - which of course is a big hit for memory consumption as we have now an array of all possible documents.
If your are using mongo 3.2, you could use arrayElemAt to precise all arguments of $setIntersection :
db.things.aggregate([{
$match: {
"name": {
$in: ["A", "B", "C"]
}
}
}, {
$group: {
_id: 0,
elements: {
$push: "$items"
}
}
}, {
$project: {
intersect: {
$setIntersection: [{
"$arrayElemAt": ["$elements", 0]
}, {
"$arrayElemAt": ["$elements", 1]
}, {
"$arrayElemAt": ["$elements", 2]
}]
},
}
}]);
You would have to dynamically add the require number of JsonObject with index such as :
{
"$arrayElemAt": ["$elements", <index>]
}
It should match with the number of elements of your input items in ["A", "B", "C"]
If you want to deal with duplicates (some name are present multiple time), regroup all your items by name, $unwind twice and $addToSet to merge all array for a specific $name before executing the previous aggregation :
db.things.aggregate([{
$match: {
"name": {
$in: ["A", "B", "C"]
}
}
}, {
$group: {
_id: "$name",
"items": {
"$push": "$items"
}
}
}, {
"$unwind": "$items"
}, {
"$unwind": "$items"
}, {
$group: {
_id: "$_id",
items: {
$addToSet: "$items"
}
}
}, {
$group: {
_id: 0,
elements: {
$push: "$items"
}
}
}, {
$project: {
intersect: {
$setIntersection: [{
"$arrayElemAt": ["$elements", 0]
}, {
"$arrayElemAt": ["$elements", 1]
}, {
"$arrayElemAt": ["$elements", 2]
}]
},
}
}]);
It isn't a clean solution but it works