aggregate with unwind, how to limit per document and not globally? (mongodb) - mongodb

If I have a collection with 300 documents, each document has a array field called items (each item of the array is an object), something like this:
*DOCUMENT 1:*
_id: **********,
title: "test",
desc: "test desc",
items (array)
0: (object)
title: (string)
tags: (array of strings)
1: (object)
etc.
and I need to retrieve items by tags, what I'm using is this query below. I have to $limit results to something like 200 or the query is too big, the problem is if the first document has more than 200 items what it returns are only items of that document, what I'd need is to limit results PER document, for instance I'd need to retrieve 5 items for each different document where tags match ($all) tags provided.
const foundItems = await db.collection('store').aggregate([
{
$unwind: '$items'
},
{
$match: {
'items.tags': { $all : tagsArray }
}
},
{
$project: {
myitem: '$items',
desc: 1,
title: 1
}
},
{
$limit: 200
}
]).toArray()
to make it more clear and simple what I'd need in a ideal world would be something like:
{
$limit: 5,
$per: _id,
$totalLimit: 200
}
instead of $limit: 200 , is this achievable somehow? I didn't find any explanation about it in the official documentation.
What I tried is to add $sort right before $limit which would make sense if it had the behaviour I'm looking for put it that way and maybe not if placed AFTER the limit, but unfortunately it doesn't work that way and placed before or after the limit doesn't make any difference.
And I can't really use $sample since results are more than the 5%

Updated demo - https://mongoplayground.net/p/nM6T9XVa-XK
db.collection.aggregate([
{ $unwind: "$items" },
{
$match: {
"items.tags": {
$all: [ "a","b" ]
}
}
},
{
"$group": {
"_id": "$_id",
"myitem": { "$push": "$items" },
desc: { "$first": "$desc" },
title: { "$first": "$title" }
}
},
{
"$project": {
"_id": 1,
desc: 1,
title: 1,
"myitem": { $slice: [ "$myitem", 2 ]
}
}
},
{
$unwind: "$myitem"
}
])
Demo - https://mongoplayground.net/p/BESptnyUfSS
After matching the records you can $group them according to id and $project them and limit them using Use $slice
db.collection.aggregate([
{ $unwind: "$items" },
{
$match: {
"items.tags": { $all: [ "a", "b" ]
}
}
},
{
$project: {
_id: 1, myitem: "$items", desc: 1,title: 1
}
},
{
"$group": {
"_id": "$_id",
"myitem": { "$push": "$myitem" }
}
},
{
"$project": {
"_id": 1,
"myitem": {
$slice: [ "$myitem", 1 ] // limit records here per group / id
}
}
}
])

Related

How to remove duplicate objects with different parameters in an aggregation in mongo db

[
{ id:1,month:5,year:2020,text:"Completed" },
{ id:2,month:2,year:2021,text:"Pending" },
{ id:3,month:3,year:2020,text:"Completed" },
{ id:4,month:5,year:2020,text:"Pending" },
{ id:5,month:4,year:2022,text:"Pending" },
]
These are the documents in my collection. I need to remove remove the duplicate objects with same year & month using aggregation in mongo db. so that i get
[
{ id:1,month:5,year:2020,text:"Completed" },
{ id:2,month:2,year:2021,text:"Pending" },
{ id:3,month:3,year:2020,text:"Completed" },
{ id:5,month:4,year:2022,text:"Pending" },
]
Maybe something like this:
db.collection.aggregate([
{
$group: {
_id: {
month: "$month",
year: "$year"
},
cnt: {
$sum: 1
},
doc: {
$push: "$$ROOT"
}
}
},
{
$match: {
cnt: {
$gt: 1
}
}
},
{
$project: {
docsTodelete: {
$slice: [
"$doc",
1,
{
"$size": "$doc"
}
]
}
}
},
{
$unwind: "$docsTodelete"
}
]).forEach(function(doc){
db.backup.save(doc.docsTodelete);
db.collection.remove(_id:doc.docsToDelete._id)
})
explained:
Group the documents by month-year and push the originals to array doc
Match only the documents that have duplicates
Slice the documents array to leave 1x document in the collection
Unwind the array with documents to be removed
Do forEach loop to remove the duplicated documents from the collection and store the removed in backup collection just in case you have doubts later.

How to query an array and retrieve it from MongoDB

Updated:
I have a document on the database that looks like this:
My question is the following:
How can I retrieve the first 10 elements from the friendsArray from database and sort it descending or ascending based on the lastTimestamp value.
I don't want to download all values to my API and then sort them in Python because that is wasting my resources.
I have tried it using this code (Python):
listOfUsers = db.user_relations.find_one({'userId': '123'}, {'friendsArray' : {'$orderBy': {'lastTimestamp': 1}}}).limit(10)
but it just gives me this error pymongo.errors.OperationFailure: Unknown expression $orderBy
Any answer at this point would be really helpful! Thank You!
use aggregate
first unwind
then sort according timestap
group by _id to create sorted array
use addfields and filter for getting first 10 item of array
db.collection.aggregate([
{ $match:{userId:"123"}},
{
"$unwind": "$friendsArray"
},
{
$sort: {
"friendsArray.lastTimeStamp": 1
}
},
{
$group: {
_id: "$_id",
friendsArray: {
$push: "$friendsArray"
}
},
},
{
$addFields: {
friendsArray: {
$filter: {
input: "$friendsArray",
as: "z",
cond: {
$lt: [
{
$indexOfArray: [
"$friendsArray",
"$$z"
]
},
10
]
}// 10 is n first item
}
}
},
}
])
https://mongoplayground.net/p/2Usk5sRY2L2
and for pagination use this
db.collection.aggregate([
{ $match:{userId:"123"}},
{
"$unwind": "$friendsArray"
},
{
$sort: {
"friendsArray.lastTimeStamp": 1
}
},
{
$group: {
_id: "$_id",
friendsArray: {
$push: "$friendsArray"
}
},
},
{
$addFields: {
friendsArray: {
$filter: {
input: "$friendsArray",
as: "z",
cond: {
$and: [
{
$gt: [
{
$indexOfArray: [
"$friendsArray",
"$$z"
]
},
10
]
},
{
$lt: [
{
$indexOfArray: [
"$friendsArray",
"$$z"
]
},
20
]
},
]
}// 10 is n first item
}
}
},
}
])
The translation of your find to aggregation(we need unwind that why aggregation is used) would be like the bellow query.
Test code here
Query (for descending replace 1 with -1)
db.collection.aggregate([
{
"$match": {
"userId": "123"
}
},
{
"$unwind": {
"path": "$friendsArray"
}
},
{
"$sort": {
"friendsArray.lastTimeStamp": 1
}
},
{
"$limit": 10
},
{
"$replaceRoot": {
"newRoot": "$friendsArray"
}
}
])
If you want to skip some before limit add one stage also
{
"$skip" : 10
}
To take the 10-20 messages for example.

Mongodb find maximum based on nested object key

I have below schema where I need to identify the object which has highest rank.
{ "team" : {
"member1" : [ { "rank": 2, "goal": 50 } ],
"member2" : [ { "rank": 5, "goal": 30 } ],
"member3" : [ { "rank": 1, "goal": 80 } ]
}}
$unwind will not work on the nested objects. Tried to convert this object as Array and tried to find the max of rank key. Any help would be appreciated.
If the intent is to only find the maximum rank that exists: The idea is a two stage aggregation query using $project and using $objectToArray to have common keys from which $max on required attribute can be applied.
Query: playground link
db.collection.aggregate([
{
$project: {
teamsData: {
$objectToArray: "$team"
}
}
},
{
$project: {
maxRank: {
$max: "$teamsData.v.rank"
}
}
}
]);
To get the object details that has the maximum rank: Use $unwind on the array projected from previous stage to help in sorting by rank $sort and then picking the the first item $first at $group stage.
Query: playgorund link
db.collection.aggregate([
{
$project: {
team: {
$objectToArray: "$team"
}
}
},
{
$unwind: "$team"
},
{
$sort: {
"team.v.rank": -1
}
},
{
$group: {
_id: null,
maxRankObj: {
$first: "$$ROOT"
}
}
}
]);
Sample O/P:
[
{
"_id": null,
"maxRankObj": {
"_id": ObjectId("5a934e000102030405000000"),
"team": {
"k": "member2",
"v": [
{
"goal": 30,
"rank": 5
}
]
}
}
}
]

How to know that aggregated group has previous/next values?

Suppose I have the following aggregation pipeline:
db.getCollection('posts').aggregate([
{ $match: { _id: { $gt: "some id" }, tag: 'some tag' } },
{ $limit: 5 },
{ $group: { _id: null, hasNextPage: {??}, hasPreviousPage: {??} } }
])
As a result $match and $limit stages would result in a subset of all the posts with a tag some tag. How can I know that there're posts before and after my subSet?
One of the possible ways, I guess, is to have expression (with $let) inside hasPreviousPage and hasNextPage that would search for one post with _id less than "some id" and greater than $last: "$_id"respectively. But I'm not sure how I can reference my group as a variable in $let. Also, maybe there're some other more effective ways.
You can use below aggregation:
db.posts.aggregate([
{ $match: { tag: 'some tag' } },
{ $sort: { _id: 1 } },
{
$facet: {
data: [
{ $match: { _id: { $gt: 'some id' } } },
{ $limit: 5 }
],
hasPreviousPage: [
{ $match: { _id: { $lte: 'some id' } } },
{ $count: "totalPrev" }
],
hasNextPage: [
{ $match: { _id: { $gt: 'some id' } } },
{ $skip: 5 },
{ $limit: 1 }, // just to check if there's any element
{ $count: "totalNext" }
]
}
},
{
$unwind: { path: "$hasPreviousPage", preserveNullAndEmptyArrays: true }
},
{
$unwind: { path: "$hasNextPage", preserveNullAndEmptyArrays: true }
},
{
$project: {
data: 1,
hasPreviousPage: { $gt: [ "$hasPreviousPage.totalPrev", 0 ] },
hasNextPage: { $gt: [ "$hasNextPage.totalNext", 0 ] }
}
}
])
To apply any paging you have to $sort your collection to get results in deterministic order. On a set that's sorted and filtered by tag you can run $facet which allows you to apply multiple subaggregations. Pipelines that are representing previous and nextPage can be ended with $count. Every subaggregation in $facet will return an array so we can run $unwind to get nested document instead of array for hasPreviousPage and hasNextPage. Option preserveNullAndEmptyArrays is required here cause otherwise MongoDB will remove whole document from aggregation pipeline if there are no prev / next documents. In the last step we can just convert subaggregations to boolean values.

total of all groups totals using mongodb

i did this Aggregate pipeline , and i want add a field contains the Global Total of all groups total.
{ "$match": query },
{ "$sort": cursor.sort },
{ "$group": {
_id: { key:"$paymentFromId"},
items: {
$push: {
_id:"$_id",
value:"$value",
transaction:"$transaction",
paymentMethod:"$paymentMethod",
createdAt:"$createdAt",
...
}
},
count:{$sum:1},
total:{$sum:"$value"}
}}
{
//i want to get
...project groups , goupsTotal , groupsCount
}
,{
"$skip":cursor.skip
},{
"$limit":cursor.limit
},
])
you need to use $facet (avaialble from MongoDB 3.4) to apply multiple pipelines on the same set of docs
first pipeline: skip and limit docs
second pipeline: calculate total of all groups
{ "$match": query },
{ "$sort": cursor.sort },
{ "$group": {
_id: { key:"$paymentFromId"},
items: {
$push: "$$CURRENT"
},
count:{$sum:1},
total:{$sum:"$value"}
}
},
{
$facet: {
docs: [
{ $skip:cursor.skip },
{ $limit:cursor.limit }
],
overall: [
{$group: {
_id: null,
groupsTotal: {$sum: '$total'},
groupsCount:{ $sum: '$count'}
}
}
]
}
the final output will be
{
docs: [ .... ], // array of {_id, items, count, total}
overall: { } // object with properties groupsTotal, groupsCount
}
PS: I've replaced the items in the third pipe stage with $$CURRENT which adds the whole document for the sake of simplicity, if you need custom properties then specify them.
i did it in this way , project the $group result in new field doc and $sum the sub totals.
{
$project: {
"doc": {
"_id": "$_id",
"total": "$total",
"items":"$items",
"count":"$count"
}
}
},{
$group: {
"_id": null,
"globalTotal": {
$sum: "$doc.total"
},
"result": {
$push: "$doc"
}
}
},
{
$project: {
"result": 1,
//paging "result": {$slice: [ "$result", cursor.skip,cursor.limit ] },
"_id": 0,
"globalTotal": 1
}
}
the output
[
{
globalTotal: 121500,
result: [ [group1], [group2], [group3], ... ]
}
]