Get MongoDB (aggregate) matched document total before pagination plus results? - mongodb

I'm aware there are a few questions very similar to this, but I haven't been able to find one which clearly outlines how to keep both the total count after match (in this case geoNear), but before skip/limit operations, as well as the skip/limit results.
My current aggregation pipeline is:
[ { '$geoNear':
{ near: [Object],
maxDistance: 4000,
distanceField: 'distance',
spherical: true,
query: {} } },
{ '$sort': { distance: 1 } },
{ '$skip': 0 },
{ '$limit': 20 } ]
Ideally I'd want something like this returned:
{
total: 123,
results: [<results respecting $skip & $limit>]
}

You can do :
1 $group to sum result of previous $geonear match and push the $$ROOT document to keep the record
1 $unwind necessary to remove the array
your $skip
your $limit
1 last $group to format the final result with only the sum amount and the JSON array
Query is :
db.coll.aggregate([{
$geoNear: {
near: [Object],
maxDistance: 4000,
distanceField: 'distance',
spherical: true,
query: {}
}
}, {
$sort: {
distance: 1
}
}, {
$group: {
_id: 0,
count: {
$sum: 1
},
document: {
$push: "$$ROOT"
}
}
}, {
$unwind: "$document"
}, {
$skip: 0
}, {
$limit: 20
}, {
$group: {
_id: 0,
total: {
$first: "$count"
},
results: {
$push: "$document"
}
}
}])
You can use $slice instead of $skip and $limit and preserveNullAndEmptyArrays for the $unwind part to assure an empty result array :
db.coll.aggregate([{
$geoNear: {
near: [Object],
maxDistance: 4000,
distanceField: 'distance',
spherical: true,
query: {}
}
}, {
$sort: {
distance: 1
}
}, {
$group: {
_id: 0,
count: {
$sum: 1
},
document: {
$push: "$$ROOT"
}
}
}, {
$project: {
count: 1,
document: { $slice: ["$document", 0, 20] }
}
}, {
$unwind: { path: "$document", preserveNullAndEmptyArrays: true }
}, {
$group: {
_id: 0,
total: {
$first: "$count"
},
results: {
$push: "$document"
}
}
}])

Related

How to $sum by index of sub-array

How to $sum by index of sub-array
the code did not work.
db.getCollection("test").aggregate([
{ $unwind: "$asks" },
{
$groups: {
_id: 0,
total: { $sum: { $arrayElemAt: ["$asks", 1] } },
count: { $sum: 1 },
},
},
]);
Thank you so much
There is a typo in your code. Change $groups to $group
Convert the string value to a double value and then calculate sum
Based on your image, you should use index 0, not index 1
Test Here
db.collection.aggregate([
{
$unwind: "$asks"
},
{
$group: {
_id: 0,
total: {
$sum: {
"$toDouble": {
"$arrayElemAt": [ "$asks", 0 ]
}
},
},
},
}
])

Mongo query on last sorted array item

Would like to query the following to obtain all item documents such that the last sale (ordered by soldDate) has a status of 2.
db.items.insertMany([
{ item: 1,
sales: [
{ soldDate: ISODate("2021-10-04"), status: 1 },
{ soldDate: ISODate("2021-10-05"), status: 2 }
]
},
{ item: 2,
sales: [
{ soldDate: ISODate("2021-09-29"), status: 3 },
{ soldDate: ISODate("2021-09-24"), status: 1 }
]
},
{ item: 3,
sales: [
{ soldDate: ISODate("2021-06-01"), status: 3 },
{ soldDate: ISODate("2021-06-12"), status: 2 },
{ soldDate: ISODate("2021-06-07"), status: 1 }
]
}
]);
So in this example, the query would return the following two documents:
{ item: 1,
sales: [
{ soldDate: ISODate("2021-10-04"), status: 1 },
{ soldDate: ISODate("2021-10-05"), status: 2 } // triggered by this
]
},
{ item: 3,
sales: [
{ soldDate: ISODate("2021-06-01"), status: 3 },
{ soldDate: ISODate("2021-06-12"), status: 2 }, // triggered by this
{ soldDate: ISODate("2021-06-07"), status: 1 }
]
}
Thanks for any help.
You stated: ordered by soldDate which can actually mean two things. Perhaps you want the documents sorted by the array, or perhaps you mean the array is sorted. I assumed the later.
Solution (Array sorted)
db.items.aggregate([
{ $match: { "sales.status": 2} },
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": 1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $match: { "sales.status": 2} },
... { $unwind: "$sales" },
... { $sort: { "item": 1, "sales.soldDate": 1} },
... { $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 }
]
}
]
But, to be complete here is a solution if you want the documents sorted (and the array not necessarily sorted).
Solution (Documents sorted)
db.items.aggregate([
{ $match: { "sales.status": 2} },
{ $sort: { "sales.soldDate": 1} }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $match: { "sales.status": 2} },
... { $sort: { "sales.soldDate": 1} }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 },
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 }
]
}
]
EDIT - After re-reading I believe you want only where the record having a status of 2 is also has the greatest date in the array
Solution (Only last having status of value 2 - docs and array unsorted)
db.items.aggregate([
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
{ $match : { "sales.0.status" : 2 } }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $unwind: "$sales" },
... { $sort: { "item": 1, "sales.soldDate": -1} },
... { $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
... { $match : { "sales.0.status" : 2 } }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 }
]
}
]
EDIT - Add Self Referencing Lookup
db.items.aggregate([
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
{ $match : { "sales.0.status" : 2 } },
{ $lookup : {
from: "items",
localField: "_id",
foreignField: "_id",
as: "results"
}
},
{ $unwind: "$results" },
{ $replaceRoot: { "newRoot": "$results" } }
])
With the self-referencing lookup we are treating MongoDB as a relational database. We find the documents that meet our requirements, but in doing so we have destroyed the original shape and content. By performing a lookup on the same records we can restore the shape but at a performance penalty.
Retain Copy
Rather than performing a lookup, which has a performance concern, a different approach is to leverage memory on the server. Keep a copy of the original while moving through the pipeline and manipulating the original to identify desired records...
db.items.aggregate([
{ $addFields: { "_original": "$$ROOT" } },
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "_original": { $first: "$_original" }, "sales_status": { $push: "$sales.status" } } },
{ $match : { "sales_status.0" : 2 } },
{ $replaceRoot: { "newRoot": "$_original" } }
])
In this example we keep a copy of the original in the field _original then once we have identified the records we want we pivot the root back to _original. This may put pressure on the WiredTiger cache as we are keeping a duplicate of all selected records in memory during the execution of the pipeline. A $lookup approach also has this memory concern. Two queries would eliminate the cache pressure issues, but behaves like a $lookup and would not perform as well.

MongoDB to return formatted object when no results can be found

I have the following stage in my MongoDB aggregation pipeline that returns the qty and sum of sales, which works fine:
{
$lookup: {
from: 'sales',
let: { part: '$_id' },
pipeline: [
{ $match: { $and: [{ $expr: { $eq: ['$partner', '$$part'] } }] } },
{ $group: { _id: null, qty: { $sum: 1 }, soldFor: { $sum: '$soldFor' } } },
{ $project: { _id: 0, qty: 1, soldFor: 1 } }],
as: 'sales'}},
{ $unwind: { path: '$sales', preserveNullAndEmptyArrays: true } },
{ $project: { _id: 1, sales: 1 }
}
However, if there are no sales, then the $project projection returns an empty sales object, but what I'd really like is it to return a completed object, but with 0 - like this:
{
sales: {
qty: 0,
soldFor: 0
}
}
You can use $cond operator here
{
"$project": {
"_id": 1,
"sales": {
"$cond": [
{ "$eq": [{ "$size": "$sales" }, 0] },
{
"sales": {
"qty": 0,
"soldFor": 0
}
},
"$sales"
]
}
}
}

Mongodb Aggregate - Count fields that equals value in array, but keep both arrays

I need to calculate the percentage of finalized/total items. The problem I have is calculating how many fields in the array equal to 'finished'. With my current solution I get finished items correctly, but total items are the same number as finished.
This is what I'm doing:
Items.aggregate([
{
$match: {
status: {
$ne: ['cancelled','pending']
}
}
},
{
$group: {
_id: '$person',
items: {
$push: {
total: '$status',
finished: {
$cond: [
{
$eq: ['$status', 'finished']
},
'$status',
null
]
}
}
}
}
},
{
$unwind: '$items'
},
{
$match: {
'items.finished': {
$ne: null
},
}
},
{
$group: {
_id: '$_id',
success: {
$push : '$items.finished'
},
total: {
$push: '$items.total'
}
}
},
{
$project: {
successCount: {
$size: '$success'
},
totalCount: {
$size: '$total'
}
}
},
{
$project: {
successScore: {
$divide: [ "$successCount", "$totalCount"]
}
}
}
]);
I also tried simpler solution, but can't figure how to keep total count field in the loop after doing $unwind
Items.aggregate([
{
$group: {
_id: '$_id',
totalCount: {$sum: 1},
finished: { $cond : [ {$eg: ['status', 'finished']}, $status, null] }
}
},
{ $unwind: '$finished'},
...
Then I can't access totalCount later

MongoDB: elemMatch match the last element in an array

I have the data like below:
{
"order_id" : 1234567,
"order_pay_time" : 1437373297,
"pay_info" : [
{
"pay_type" : 0,
"pay_time" : 1437369046
},
{
"pay_type" : 0,
"pay_time" : 1437369123
},
{
"pay_type" : 0,
"pay_time" : 1437369348
}
]}
what I want to get is the last payment is of type 1, but $elemMatch just match the list where pay_type:1 exists, how can I match the orders which last payment is of "pay_type" : 1
You can use aggregation to get expected output. The query will be like following:
db.collection.aggregate({
$unwind: "$pay_info"
}, {
$match: {
"pay_info.pay_type": 1
}
}, {
$group: {
_id: "$_id",
"pay_info": {
$push: "$pay_info"
},
"order_id": {
$first: "$order_id"
},
"order_pay_time": {
$first: "$order_pay_time"
}
}
})
Moreover if you want latest pay_info.pay_time then you can sort it by descending order with limit 1, some what like following:
db.collection.aggregate({
$unwind: "$pay_info"
}, {
$match: {
"pay_info.pay_type": 1
}
}, {
$sort: {
"pay_info.pay_time": -1
}
}, {
$limit: 1
}, {
$group: {
_id: "$_id",
"pay_info": {
$push: "$pay_info"
},
"order_id": {
$first: "$order_id"
},
"order_pay_time": {
$first: "$order_pay_time"
}
}
})
Edit
Also you can use $redact to avoid $unwind like following:
db.collection.aggregate({
$match: {
"pay_info": {
$elemMatch: {
"pay_type": 1
}
}
}
}, {
$sort: {
"pay_info.pay_time": -1
}
}, {
$limit: 1
}, {
$redact: {
$cond: {
if: {
$eq: [{
"$ifNull": ["$pay_type", 1]
}, 1]
},
then: "$$DESCEND",
else: "$$PRUNE"
}
}
}).pretty()
Just found this thread for a similar problem I've had.
I ended up doing this, maybe that will be of interest to someone:
db.collection.find({
$where: function(){
return this.pay_info[this.pay_info.length-1].pay_type === 1
}
})