How do you get the middle result from a mongodb query - mongodb

I have a MongoDB database, which has a collection that contains all of the addresses from a country. Sometimes when I execute a query on that I have a chance that I receive about 200 results (house numbers within that street). I want to get the middle item of that result.
When I do that in my coding like this for example:
const result = Address.find({ street: "fooStreet" })
// results in an array with a length of let's say 200 (could also be 20, 49, 103, etc) items
I could split it in my coding like below:
const middleIndex = Math.round(result.length / 2);
const house = result[middleIndex];
But this means that the other records go to waste and use unnecessary bandwidth + computing power which should be handled by the database. Since the database OS is optimized for working with collections etc, I was wondering if I could achieve the same result in a mongodb query? See pseudo below:
db.getCollection("addresses")
.find({ street: "fooStreet" })
.helpMeHere()
// ^ do something to get the middle result from the N items

You can do as below
db.collection.aggregate([
{ //Any match condition
$match: {}
},
{
$group: {//get the total matching result
"_id": null,
data: {
$push: "$$ROOT"
},
count: {
$sum: 0.5
}
}
},
{
$project: {//get the second half
"result": {
"$slice": [
"$data",
{
"$toInt": {
"$multiply": [//Negating results records from the last
{
"$toInt": "$count"
},
-1
]
}
}
]
}
}
}
])
playground
To get one element:
playground
db.collection.aggregate([
{
$match: {}
},
{
$group: {
"_id": null,
data: {
$push: "$$ROOT"
},
count: {
$sum: 0.5
}
}
},
{
$project: {
"result": {
"$arrayElemAt": [//array access
"$data",
{
"$toInt": "$count"
}
]
}
}
}
])

Related

MongoDB aggregate to get stats

I've done this sometime last year, but now I really can't recall and can't find any helpful resources.
I want to get the statistics of my collection based on types.
This is my data object
{
"_id": {
"$oid": "63bfc374378c59a5328f229e"
},
"amountEarned": 11500,
"amountPaid": 10350,
"relianceCommission": 1150,
"receiverType": "RESTAURANT",
"__v": 0
}
I just need the sum of amountPaid for each receiverType, it could be STORE, RESTAURANT or SHOPPER. Then I also need the sum of relianceCommission for all. Resulting in a shape like
{
storeEarnings: 500,
restaurantEarnings: 30,
shopperEarnings: 40,
totalRelianceCommission: 45
}
I've tried
aggregate([
{
$group: {_id: "$receiverType", total: {$sum: "amountPaid"}}
}
])
And then joining with another pipeline to calculate totalRelianceCommission, but I feel there should be a neater way to do it. I'm also not sure how to do the projections to result in the desired shape. Please help.
You need conditional sum.
db.collection.aggregate([
{
$group: {
_id: null,
storeEarnings: {
$sum: {
$cond: [{$eq: ["$receiverType","STORE"]},"$amountPaid",0]
}
},
restaurantEarnings: {
$sum: {
$cond: [{$eq: ["$receiverType","RESTAURANT"]},"$amountPaid",0]
}
},
shopperEarnings: {
$sum: {
$cond: [{$eq: ["$receiverType","SHOPPER"]},"$amountPaid",0]
}
},
totalRelianceCommission: {
$sum: "$relianceCommission"
}
}
}
])
Demo
query:
{
$group: {
_id: "$receiverType",
total: {
$sum: "$amountPaid"
},
commissions: {
$sum: "$relianceCommission"
}
}
}
result:[
{
"_id": "STORE",
"commissions": 1150,
"total": 10350
},
{
"_id": "RESTAURANT",
"commissions": 2300,
"total": 20700
}
]
loop through the array to get a sum of commissions

How to find date wise sum and total sum of all date at a time in mongodb?

I have a collection billow and i need to find date wise total cost and sum of all cost available in this collection. I can find total cost of a day but failed to get sum of all cost from the collection
[{
"date":"12-2-2015",
"cost":100
},
{
"date":"13-2-2015",
"cost":10
},
{
"date":"12-2-2015",
"cost":40
},
{
"date":"13-2-2015",
"cost":30
},
{
"date":"13-2-2015",
"cost":80
}]
I can find output like
[{
"day": "12-2-2015",
"cost": 140
},{
"day": "13-2-2015",
"cost": 120
}]
But I want output like this.
{
"day": "12-2-2015",
"cost": 140,
"total": 260
}
use this aggregate I dont add $match stage you could add to match date
db.collection.aggregate([
{
$group: {
_id: null,
orig: {
$push: "$$ROOT"
},
"total": {
$sum: "$cost"
},
}
},
{
$unwind: "$orig"
},
{
$project: {
date: "$orig.date",
cost: "$orig.cost",
total: "$total"
}
},
{
$group: {
_id: "$date",
cost: {
$sum: "$cost"
},
orig: {
$push: "$$ROOT.total"
}
},
},
{
"$unwind": "$orig"
},
{
$group: {
_id: {
_id: "$_id",
cost: "$cost",
total: "$orig"
},
},
},
{
$project: {
date: "$_id._id",
"cost": "$_id.cost",
total: "$_id.total",
_id: 0
}
}
])
https://mongoplayground.net/p/eN-pDg2Zz7u
It is like 2 queries.
There are 3 solutions that i can think of
2 queries (works no matter the collection size)
1 query and facet (the bellow solution)
group and pack each group in an array
(limitation = ngroups(distinct day dates) small enough to fit in 1 array 16MB distinct dates,
(which is true for like 200.000? distinct days see this)
1 query no facet
for example group and pack all collection into 1 array
(limitation = all collection must fit in 100MB memory
because of $push see this)
*for the limits i think they are like that, based on what i have understanded.
Query
Test code here
db.collection.aggregate([
{
"$facet": {
"total": [
{
"$group": {
"_id": null,
"total": {
"$sum": "$cost"
}
}
}
],
"coll": [
{
"$group": {
"_id": "$date",
"cost": {
"$sum": "$cost"
}
}
}
]
}
},
{
"$unwind": {
"path": "$coll"
}
},
{
"$project": {
"total": {
"$let": {
"vars": {
"t": {
"$arrayElemAt": [
"$total",
0
]
}
},
"in": "$$t.total"
}
},
"date": "$coll._id",
"cost": "$coll.cost"
}
}
])
I would do one query to get a cursor, then iterate the cursor and at the same time sum the total cost and push the relevant doc, then add the total to each group. In this way you perform only one query to mongodb and let your server do the rest while keeping the code simple.
// 1. Fetch the groups
const grouped = db.data.aggregate([
{ $group: {
_id: "$date",
cost: { $sum: "$cost" }
}}
]);
// 2. Iterate the cursor, push the results into an array while summing the total cost
let total = 0;
const result = [];
grouped.forEach(group => {
total += group.cost;
result.push(group); // push as much as your limit
});
// 3. Add total to each group
result.forEach(group => group.total = total);

aggregate with unwind, how to limit per document and not globally? (mongodb)

If I have a collection with 300 documents, each document has a array field called items (each item of the array is an object), something like this:
*DOCUMENT 1:*
_id: **********,
title: "test",
desc: "test desc",
items (array)
0: (object)
title: (string)
tags: (array of strings)
1: (object)
etc.
and I need to retrieve items by tags, what I'm using is this query below. I have to $limit results to something like 200 or the query is too big, the problem is if the first document has more than 200 items what it returns are only items of that document, what I'd need is to limit results PER document, for instance I'd need to retrieve 5 items for each different document where tags match ($all) tags provided.
const foundItems = await db.collection('store').aggregate([
{
$unwind: '$items'
},
{
$match: {
'items.tags': { $all : tagsArray }
}
},
{
$project: {
myitem: '$items',
desc: 1,
title: 1
}
},
{
$limit: 200
}
]).toArray()
to make it more clear and simple what I'd need in a ideal world would be something like:
{
$limit: 5,
$per: _id,
$totalLimit: 200
}
instead of $limit: 200 , is this achievable somehow? I didn't find any explanation about it in the official documentation.
What I tried is to add $sort right before $limit which would make sense if it had the behaviour I'm looking for put it that way and maybe not if placed AFTER the limit, but unfortunately it doesn't work that way and placed before or after the limit doesn't make any difference.
And I can't really use $sample since results are more than the 5%
Updated demo - https://mongoplayground.net/p/nM6T9XVa-XK
db.collection.aggregate([
{ $unwind: "$items" },
{
$match: {
"items.tags": {
$all: [ "a","b" ]
}
}
},
{
"$group": {
"_id": "$_id",
"myitem": { "$push": "$items" },
desc: { "$first": "$desc" },
title: { "$first": "$title" }
}
},
{
"$project": {
"_id": 1,
desc: 1,
title: 1,
"myitem": { $slice: [ "$myitem", 2 ]
}
}
},
{
$unwind: "$myitem"
}
])
Demo - https://mongoplayground.net/p/BESptnyUfSS
After matching the records you can $group them according to id and $project them and limit them using Use $slice
db.collection.aggregate([
{ $unwind: "$items" },
{
$match: {
"items.tags": { $all: [ "a", "b" ]
}
}
},
{
$project: {
_id: 1, myitem: "$items", desc: 1,title: 1
}
},
{
"$group": {
"_id": "$_id",
"myitem": { "$push": "$myitem" }
}
},
{
"$project": {
"_id": 1,
"myitem": {
$slice: [ "$myitem", 1 ] // limit records here per group / id
}
}
}
])

Mongoose aggregate until sum of documents is equal to a certain value

What pipeline can i use to select all ids until i hit the sum of 180 and get the _ids. Below is a sample of the data that i've filtered out already. In this case it should select the first two items.
[
{
"_id": "6048b2b190422d0066d90740",
"Code": "A0ABI61YH",
"Amount": 100
},
{
"_id": "6048b3cc7e4b350072424f4c",
"Code": "A0ABEAXX6",
"Amount": 100
},
{
"_id": "6048b5167e4b350072424f50",
"Code": "A0ABCENPD",
"Amount": 100
}
]
I don't think is there any straight way to achieve this, if you really want to then try, this will only work when your data size below 16MB, because we are going to group your documents in a document in array, second this may cause the performance issues,
$group by null and group all documents and required fields (_id, Amount) in result
$reduce to iterate loop of result array,
initialValue declare initial value for Amount is 0 and result is []
in check condition if initialValue's Amount is less than 180 then concat current object's _id and initialValue's _ids using $concatArrays and sum current object's Amount and initialValue's Amount, otherwise return same value
db.collection.aggregate([
{
$group: {
_id: null,
result: {
$push: {
_id: "$_id",
Amount: "$Amount"
}
}
}
},
{
$project: {
_id: 0,
result: {
$reduce: {
input: "$result",
initialValue: { Amount: 0, _ids: [] },
in: {
$cond: [
{ $lt: ["$$value.Amount", 180] },
{
_ids: { $concatArrays: ["$$value._ids", ["$$this._id"]] },
Amount: { $sum: ["$$value.Amount", "$$this.Amount"] }
},
"$$value"
]
}
}
}
}
}
])
Playground

Retrieving a count that matches specified criteria in a $group aggregation

So I am looking to group documents in my collection on a specific field, and for the output results of each group, I am looking to include the following:
A count of all documents in the group that match a specific query (i.e. a count of documents that satisfy some expression { "$Property": "Value" })
The total number of documents in the group
(Bonus, as I suspect that this is not easily accomplished) Properties of a document that correspond to a $min/$max accumulator
I am very new to the syntax used to query in mongo and don't quite understand how it all works, but after some research, I've managed to get it down to the following query (please note, I am currently using version 3.0.12 for my mongo db, but I believe we will upgrade in a couple of months time):
db.getCollection('myCollection').aggregate(
[
{
$group: {
_id: {
GroupID: "$GroupID",
Status: "$Status"
},
total: { $sum: 1 },
GroupName: { $first: "$GroupName" },
EarliestCreatedDate: { $min: "$DateCreated" },
LastModifiedDate: { $max: "$LastModifiedDate" }
}
},
{
$group: {
_id: "$_id.GroupID",
Statuses: {
$push: {
Status: "$_id.Status",
Count: "$total"
}
},
TotalCount: { $sum: "$total" },
GroupName: { $first: "$GroupName" },
EarliestCreatedDate: { $min: "$EarliestCreatedDate" },
LastModifiedDate: { $max: "$LastModifiedDate" }
}
}
]
)
Essentially what I am looking to retrieve is the Count for specific Status values, and project them into one final result document that looks like the following:
{
GroupName,
EarliestCreatedDate,
EarliestCreatedBy,
LastModifiedDate,
LastModifiedBy,
TotalCount,
PendingCount,
ClosedCount
}
Where PendingCount and ClosedCount are the total number of documents in each group that have a status Pending/Closed. I suspect I need to use $project with some other expression to extract this value, but I don't really understand the aggregation pipeline well enough to figure this out.
Also the EarliestCreatedBy and LastModifiedBy are the users who created/modified the document(s) corresponding to the EarliestCreatedDate and LastModifiedDate respectively. As I mentioned, I think retrieving these values will add another layer of complexity, so if there is no practical solution, I am willing to forgo this requirement.
Any suggestions/tips would be very much appreciated.
You can try below aggregation stages.
$group
Calculate all the necessary counts TotalCount, PendingCount and ClosedCount for each GroupID
Calculate $min and $max for EarliestCreatedDate and LastModifiedDate respectively and push all the fields to CreatedByLastModifiedBy to be compared later for fetching EarliestCreatedBy and LastModifiedBy for each GroupID
$project
Project all the fields for response
$filter the EarliestCreatedDate value against the data in the CreatedByLastModifiedBy and $map the matching CreatedBy to the EarliestCreatedBy and $arrayElemAt to convert the array to object.
Similar steps for calculating LastModifiedBy
db.getCollection('myCollection').aggregate(
[{
$group: {
_id: "$GroupID",
TotalCount: {
$sum: 1
},
PendingCount: {
$sum: {
$cond: {
if: {
$eq: ["Status", "Pending"]
},
then: 1,
else: 0
}
}
},
ClosedCount: {
$sum: {
$cond: {
if: {
$eq: ["Status", "Closed "]
},
then: 1,
else: 0
}
}
},
GroupName: {
$first: "$GroupName"
},
EarliestCreatedDate: {
$min: "$DateCreated"
},
LastModifiedDate: {
$max: "$LastModifiedDate"
},
CreatedByLastModifiedBy: {
$push: {
CreatedBy: "$CreatedBy",
LastModifiedBy: "$LastModifiedBy",
DateCreated: "$DateCreated",
LastModifiedDate: "$LastModifiedDate"
}
}
}
}, {
$project: {
_id: 0,
GroupName: 1,
EarliestCreatedDate: 1,
EarliestCreatedBy: {
$arrayElemAt: [{
$map: {
input: {
$filter: {
input: "$CreatedByLastModifiedBy",
as: "CrBy",
cond: {
"$eq": ["$EarliestCreatedDate", "$$CrBy.DateCreated"]
}
}
},
as: "EaCrBy",
in: {
"$$EaCrBy.CreatedBy"
}
}
}, 0]
},
LastModifiedDate: 1,
LastModifiedBy: {
$arrayElemAt: [{
$map: {
input: {
$filter: {
input: "$CreatedByLastModifiedBy",
as: "MoBy",
cond: {
"$eq": ["$LastModifiedDate", "$$MoBy.LastModifiedDate"]
}
}
},
as: "LaMoBy",
in: {
"$$LaMoBy.LastModifiedBy"
}
}
}, 0]
},
TotalCount: 1,
PendingCount: 1,
ClosedCount: 1
}
}]
)
Update for Version < 3.2
$filter is also not available in your version. Below is the equivalent.
The comparison logic is the same and creates an array with for every non matching entry the value of false or LastModifiedBy otherwise.
Next step is to use $setDifference to compare the previous array values with array [false] which returns the elements that only exist in the first set.
LastModifiedBy: {
$setDifference: [{
$map: {
input: "$CreatedByLastModifiedBy",
as: "MoBy",
in: {
$cond: [{
$eq: ["$LastModifiedDate", "$$MoBy.LastModifiedDate"]
},
"$$MoBy.LastModifiedBy",
false
]
}
}
},
[false]
]
}
Add $unwind stage after $project stage to change to object
{$unwind:"$LastModifiedBy"}
Similar steps for calculating EarliestCreatedBy