MongoDB - Group by number, and then match by max of all groups - mongodb

In my MondoDB, I would like to group my data by a number (machine_quality), and then compare this number with maximum value of ALL machine_quality, not just maximum value per every single group.
My nonworking query:
db.records.aggregate([
{
'$group': {
'_id': '$machine_quality',
'total': {'$sum': 1}
}
},
{
'$match': {
'_id': {
'$gte': {
'$subtract': [{'$max': '$_id'}, 3]
}
}
}
}
])
Question:
Part of query {'$max': '$_id'} only reffers to each group separately, and therefore will be always equal to group's _id. However I would like max to compare with maximum _id across ALL groups. Is there any convenient way to do that?
Any thoughts appreciated.

One way to do this is to use $facet, this way you can do 2 "parallel looking" group into 1 pipeline. (the second group will be your group, group by null is to find the global max)
Test code here
Query (after the facet,you can unwind your groups)
db.collection.aggregate([
{
"$facet": {
"global_max": [
{
"$group": {
"_id": null,
"m": {
"$max": "$machine_quality"
}
}
},
{
"$project": {
"_id": 0
}
}
],
"groups": [
{
"$group": {
"_id": "$machine_quality",
"names": {
"$push": "$name"
}
}
},
{
"$addFields": {
"machine_quality": "$_id"
}
},
{
"$project": {
"_id": 0
}
}
]
}
},
{
"$project": {
"global_max": {
"$let": {
"vars": {
"v": {
"$arrayElemAt": [
"$global_max",
0
]
}
},
"in": "$$v.m"
}
},
"groups": 1
}
}
])
This has the limitations of $facet 16MB document size see

Related

MongoDB: count both matching documents and matching subdocuments, grouped by property of document

Given a collection of documents each containing an array of subdocuments (among other properties):
{
"prop1": False,
"prop2": "unique_value",
"subdocuments": [
{
"subprop1": 1,
"subprop2": 10
},
{
"subprop1": 30,
"subprop2": 40
},
{
"subprop1": 10,
"subprop2": 1
}
]
}
And a $match query covering both documents and subdocuments:
{
"prop1": False,
"$or": [
{"subdocuments.subprop1": {"$lt": 3}},
{"subdocuments.subprop2": {"$lt": 5}}
]
}
How can I create an aggregate query that returns the number of matching subdocuments and matching documents, grouped by a specific property of the root documents?
Just counting total subdocuments and matching documents is simple, but I'm struggling to also get the right count of matching subdocuments.
Ideally I'd like to have a result like this (if we consider the sample document, only subdoc 1 and 3 match the $or conditions):
{
"unique_value": {
"documents": 1,
"subdocuments": 2
}
}
In this case the results are being grouped by the value of "prop2".
You can use $size and $filter to get the count for matching subdocuments first. Then do a $sum to get the documentCount and subdocumentCount.
db.collection.aggregate([
{
"$match": {
"prop1": false,
"$or": [
{
"subdocuments.subprop1": {
"$lt": 3
}
},
{
"subdocuments.subprop2": {
"$lt": 5
}
}
]
}
},
{
"$addFields": {
"subdocumentCount": {
$size: {
"$filter": {
"input": "$subdocuments",
"as": "s",
"cond": {
"$or": [
{
$lt: [
"$$s.subprop1",
3
]
},
{
$lt: [
"$$s.subprop2",
5
]
}
]
}
}
}
}
}
},
{
$group: {
_id: "$prop2",
documentCount: {
$sum: 1
},
subdocumentCount: {
$sum: "$subdocumentCount"
}
}
},
{
$project: {
_id: 0,
k: "$_id",
v: {
documentCount: "$documentCount",
subdocumentCount: "$subdocumentCount"
}
}
},
{
$group: {
_id: null,
docs: {
$push: "$$ROOT"
}
}
},
{
"$addFields": {
"docs": {
"$arrayToObject": "$docs"
}
}
},
{
"$replaceRoot": {
"newRoot": "$docs"
}
}
])
Here is the Mongo playground for your reference.

summing count result in two group

My collection's data are something like this :
[
{
ANumberAreaCode: "+98",
BNumberAreaCode: "+1",
AccountingTime: 1629754886,
Length: 123
},
{
ANumberAreaCode: "+44",
BNumberAreaCode: "+98",
AccountingTime: 1629754786,
Length: 123
},
{
ANumberAreaCode: "+98",
BNumberAreaCode: "+96",
AccountingTime: 1629754886,
Length: 998
}
]
I'm going to group on countries codes and count result (summing country codes in ANumberAreaCode and BNumberAreaCode ) .
This is my group sample :
{ "$group": {
"_id": {
"ANumberAreaCode": "$ANumberAreaCode",
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": {
"BNumberAreaCode": "$BNumberAreaCode",
},
"count": { "$sum": 1 }
}},
now , how can i summing count result of two above queries for common countries ?
I'm looking for a query that give me this result :
+98 : 3
+44 : 1
+1 :1
+96 :1
You can use this aggregation pipeline:
$facet to get both group, by A and B. This creates two objects: groupA and groupB.
Then using $concatArrays into $project stage it will concat two ouputs.
Deconstructs the array using $unwind
And $group again by values using $sum to get the total.
db.collection.aggregate([
{
"$facet": {
"groupA": [
{
"$group": {
"_id": "$ANumberAreaCode",
"total": {
"$sum": 1
}
}
}
],
"groupB": [
{
"$group": {
"_id": "$BNumberAreaCode",
"total": {
"$sum": 1
}
}
}
]
}
},
{
"$project": {
"result": {
"$concatArrays": [
"$groupA",
"$groupB"
]
}
}
},
{
"$unwind": "$result"
},
{
"$group": {
"_id": "$result._id",
"total": {
"$sum": "$result.total"
}
}
}
])
Example here

Best way to partial group by for grouping last/first X docs

Is there any Aggregation Pipeline Stage that can help with grouping some of the results, as following:
{ "_id": "some-id-1", "sum": 14.49 }
{ "_id": "some-id-2", "sum": 12.49 }
{ "_id": "some-id-3", "sum": 9.99 }
{ "_id": "some-id-4", "sum": 8.49 }
{ "_id": "some-id-5", "sum": 7.49 }
so it will group only the documents coming after record #3? for ex.:
{ "_id": "some-id-1", "sum": 14.49 }
{ "_id": "some-id-2", "sum": 12.49 }
{ "_id": "some-id-3", "sum": 9.99 }
{ "_id": "grouped", "sum": 15.98 } <---- partial group by
I was looking at $bucket but it seems to be helpful just if the sum is known in advance.
The idea is to reduce the amount of data mongodb has to ship back when there's a huge result set.
Ended up with using $facet:
{
$facet: {
list: [ {
$limit: 3
} ],
other: [
{ $skip: 3 },
{ $group: {
_id: 'other',
sum: {$sum: '$sum'},
} },
],
}
}

total of all groups totals using mongodb

i did this Aggregate pipeline , and i want add a field contains the Global Total of all groups total.
{ "$match": query },
{ "$sort": cursor.sort },
{ "$group": {
_id: { key:"$paymentFromId"},
items: {
$push: {
_id:"$_id",
value:"$value",
transaction:"$transaction",
paymentMethod:"$paymentMethod",
createdAt:"$createdAt",
...
}
},
count:{$sum:1},
total:{$sum:"$value"}
}}
{
//i want to get
...project groups , goupsTotal , groupsCount
}
,{
"$skip":cursor.skip
},{
"$limit":cursor.limit
},
])
you need to use $facet (avaialble from MongoDB 3.4) to apply multiple pipelines on the same set of docs
first pipeline: skip and limit docs
second pipeline: calculate total of all groups
{ "$match": query },
{ "$sort": cursor.sort },
{ "$group": {
_id: { key:"$paymentFromId"},
items: {
$push: "$$CURRENT"
},
count:{$sum:1},
total:{$sum:"$value"}
}
},
{
$facet: {
docs: [
{ $skip:cursor.skip },
{ $limit:cursor.limit }
],
overall: [
{$group: {
_id: null,
groupsTotal: {$sum: '$total'},
groupsCount:{ $sum: '$count'}
}
}
]
}
the final output will be
{
docs: [ .... ], // array of {_id, items, count, total}
overall: { } // object with properties groupsTotal, groupsCount
}
PS: I've replaced the items in the third pipe stage with $$CURRENT which adds the whole document for the sake of simplicity, if you need custom properties then specify them.
i did it in this way , project the $group result in new field doc and $sum the sub totals.
{
$project: {
"doc": {
"_id": "$_id",
"total": "$total",
"items":"$items",
"count":"$count"
}
}
},{
$group: {
"_id": null,
"globalTotal": {
$sum: "$doc.total"
},
"result": {
$push: "$doc"
}
}
},
{
$project: {
"result": 1,
//paging "result": {$slice: [ "$result", cursor.skip,cursor.limit ] },
"_id": 0,
"globalTotal": 1
}
}
the output
[
{
globalTotal: 121500,
result: [ [group1], [group2], [group3], ... ]
}
]

Correct query for group by user, per month

I have MongoDB collection that stores documents in this format:
"name" : "Username",
"timeOfError" : ISODate("...")
I'm using this collection to keep track of who got an error and when it occurred.
What I want to do now is create a query that retrieves errors per user, per month or something similar. Something like this:
{
"result": [
{
"_id": "$name",
"errorsPerMonth": [
{
"month": "0",
"errorsThisMonth": 10
},
{
"month": "1",
"errorsThisMonth": 20
}
]
}
]
}
I have tried several different queries, but none have given the desired result. The closest result came from this query:
db.collection.aggregate(
[
{
$group:
{
_id: { $month: "$timeOfError"},
name: { $push: "$name" },
totalErrorsThisMonth: { $sum: 1 }
}
}
]
);
The problem here is that the $push just adds the username for each error. So I get an array with duplicate names.
You need to compound the _id value in $group:
db.collection.aggregate([
{ "$group": {
"_id": {
"name": "$name",
"month": { "$month": "$timeOfError" }
},
"totalErrors": { "$sum": 1 }
}}
])
The _id is essentially the "grouping key", so whatever elements you want to group by need to be a part of that.
If you want a different order then you can change the grouping key precedence:
db.collection.aggregate([
{ "$group": {
"_id": {
"month": { "$month": "$timeOfError" },
"name": "$name"
},
"totalErrors": { "$sum": 1 }
}}
])
Or if you even wanted to or had other conditions in your pipeline with different fields, just add a $sort pipeline stage at the end:
db.collection.aggregate([
{ "$group": {
"_id": {
"month": { "$month": "$timeOfError" },
"name": "$name"
},
"totalErrors": { "$sum": 1 }
}},
{ "$sort": { "_id.name": 1, "_id.month": 1 } }
])
Where you can essentially $sort on whatever you want.