MongoDB aggregation framework to get frequencies of fields' values

MongoDB aggregation framework to get frequencies of fields' values - mongodb

I have a collection that is populated with documents that conform to the following schema:
{
_id,
name: String,
actionTime: Date,
n1: Number, // 1<=n1<=10
n2: Number, // 1<=n2<=10
n3: Number // 1<=n3<=20
}
I want to get the frequencies of each possible numbers of n1,n2,n3. So, for example if we have the following documents:
{
_id: 1,
name: 'label1',
actionTime: Date.now,
n1: 4,
n2: 9,
n3: 18
},
{
_id: 2,
name: 'label2',
actionTime: Date.now,
n1: 1,
n2: 6,
n3: 11
},
{
_id: 3,
name: 'label3',
actionTime: Date.now,
n1: 4,
n2: 2,
n3: 5
}
I would like to have a result document of the form (or like this):
{
"n1": {
"_id": 1, "total": 1,
"_id": 2, "total": 0,
...
"_id": 4, "total": 2,
...
},
"n2": {
"_id": 1, "total": 0,
"_id": 2, "total": 1,
...
"_id": 6, "total": 1,
...
_id: 9, 'total': 1,
...
},
"n3": {
"_id": 1, "total": 0,
...
"_id": 5, "total": 1,
...
"_id": 11, "total": 1,
...
"_id": 18, "total": 1,
...
}
}
Right now, I have used the aggregation framework with the following command:
db.col.aggregate( [ { $group: { _id: "$n1", total: { $sum: 1 } } }, { $sort: { _id: 1 } } ] )
To get desired result but only for one field (n1). I could iterate this process for all interesting fields, but I would like to know if there is a more compact query to get all at once.

Related

Mongodb aggregation pipeline, combine result from two facet pipeline

I'm having a claim type:
type TClaim: {
insuredId: number,
treatmentInfo: { amount: number }[]
}
and a list of claims:
[
{
insuredId: 1,
treatmentInfo: [{amount: 1}, {amount: 2}]
},
{
insuredId: 1,
treatmentInfo: [{amount: 3}, {amount: 4}]
},
{
insuredId: 2,
treatmentInfo: [{amount: 1}, {amount: 2}]
}
]
I want to get the result like:
[{insuredId: 1, numberOfClaims: 2, amount: 10},{insuredId: 2, numberOfClaims: 1, amount: 3}]
I'm using the $facet operator in mongodb aggregation, one for counting numberOfClaims and one for calculating the amount of each insurer. But I can't combine it to get the result that I want.
$facet: {
totalClaims: [ { $group: { _id: '$insuredId', totalClaims: { $count: {} } } } ],
amount: [ { $unwind: { path: '$treatmentInfo'}},
{ $group:
{ _id: '$insuredId',
amount: { $sum: '$treatmentInfo.amount',
},
},
},
]

Is there a reason why you want to use $facet? - I am just curious
You just need to add a new fields that sums up all the amount in the array first and then do a group stage by insuredId. The query is pretty much self-explanatory.
db.collection.aggregate([
{
"$addFields": {
"totalAmount": {
"$sum": "$treatmentInfo.amount"
}
}
},
{
"$group": {
"_id": "$insuredId",
"numberOfClaims": {
"$sum": 1
},
"amount": {
"$sum": "$totalAmount"
}
}
}
])
Result:
[
{
"_id": 1,
"amount": 10,
"numberOfClaims": 2
},
{
"_id": 2,
"amount": 3,
"numberOfClaims": 1
}
]
MongoDB Playground

Mongo db aggregation - $push and $slice top results

I have the following documents in my db:
{uid: 1, score: 10}
{uid: 2, score: 11}
{uid: 3, score: 1}
{uid: 4, score: 6}
{uid: 5, score: 2}
{uid: 6, score: 3}
{uid: 7, score: 8}
{uid: 8, score: 10}
I want to split them into buckets by score - i.e.:
score
uids
(bucket name in aggregation)
[0,4)
3,5,6
0
[4,7)
4
4
[7,inf
1,2,7,8
7
For this, I created the following aggregation which works just fine:
db.scores.aggregation(
[
{
$bucket:
{
groupBy: "$score",
boundaries: [0, 4, 7],
default: 7,
output:
{
"total": {$sum: 1},
"top_frustrated":
{
$push: {
"uid": "$uid", "score": "$score"
}
},
},
}
},
]
)
However, I would like to return only the top 3 of every bucket - i.e, buckets 0, 4 should be the same, but bucket 7 should have only uids 1,2,8 returned (as uid 7 has the lowest score) - but to include the total count of documents as well, i.e. output of bucket "7" should look like:
{ "total" : 4, "top_scores" :
[
{"uid" : 2, "score" : 11},
{"uid" : 1, "score" : 10},
{"uid" : 8, "score" : 10},
]
}
I tried using $addFields with $sortArray and $slice, but it either won't work or return errors.
I can of course use $project but I was wondering if there is a more efficient way.
I am using Amazon DocumentDB.

You can use the $topN accumulator, instead of $push, like this:
db.collection.aggregate([
{
"$bucket": {
"groupBy": "$score",
"boundaries": [
0,
4,
7
],
"default": 7,
"output": {
"total": {
"$sum": 1
},
"top_frustrated": {
"$topN": {
"n": 3,
"sortBy": {
"score": -1
},
"output": {
"uid": "$uid",
"score": "$score"
}
}
}
},
}
},
])
Playground link.
The only catch here is this operator is present in MongoDB 5.2 and above.
For older versions, this will work:
db.collection.aggregate([
{
"$sort": {
score: -1
}
},
{
$bucket: {
groupBy: "$score",
boundaries: [
0,
4,
7
],
default: 7,
output: {
"total": {
$sum: 1
},
"top_frustrated": {
$push: {
"uid": "$uid",
"score": "$score"
}
},
},
}
},
{
"$project": {
total: 1,
top_frustrated: {
"$slice": [
"$top_frustrated",
3
]
}
}
}
])
Playground link.

mongoDB filter, sort, and rank result

I have a collection like this
{
id: 1,
category: "food",
score: 20
}
{
id: 2,
category: "drink",
score: 19
}
{
id: 3,
category: "food",
score: 50
}
{
id: 4,
category: "food",
score: 30
}
id is not unique btw.
but it is unique in that category.
so it is possible to have
{id: 1, category: "food"}
{id: 1, category: "drink"}
but not
{id: 1, category: "food"}
{id: 1, category: "food"}
here's what I want to do
find all category == "food"
-> it will give id: 1, 3, 4
// I can add some other filter here before sort happen
// like id less than 100
sort them by score
-> it will give id: 3, 4, 1 // highest score must be the first entry
then what is the rank of id: [4, 1]
-> it should give me {id: 4, rank: 2}, {id: 1, rank: 3}
how can I achieve this?
please give me some snippets or idea

db.collection.aggregate([
{
"$match": { //Filter conditions
"category": "food"
}
},
{
"$sort": {//Sorting
"score": -1
}
},
{
"$group": { //Group by null to get array index
"_id": "null",
"data": {
"$push": "$$ROOT",
}
}
},
{
"$unwind": { //Unwind and get index
path: "$data",
"includeArrayIndex": "index"
}
},
{
"$match": {
"data.id": { //Filter require ids
$in: [
3,
4
]
}
}
}
])
Sample

Mongodb aggregate $group for non-existing items

I have document like this :
Documents :
{score: 1, value: 10}
{score: 3, value: 10}
{score: 1, value: 10}
{score: 4, value: 10}
{score: 1, value: 10}
{score: 5, value: 10}
{score: 5, value: 10}
{score: 10, value: 10}
In this collection, there is no score for 2,6,7,8,9 but I need output like below.
Output :
{score: 1, avg: 10}
{score: 2, avg: 0}
{score: 3, avg: 10}
{score: 4, avg: 10}
{score: 5, avg: 10}
{score: 6, avg: 0}
{score: 7, avg: 0}
{score: 8, avg: 0}
{score: 9, avg: 0}
{score: 10, avg: 10}
Any option in Mongo aggregate which will generate this. Please assist

You can try that using aggregation :
db.collection.aggregate([
{ $group: { _id: '$score', avg: { $avg: '$value' } } },
{ $group: { _id: '', min: { $min: '$_id' }, max: { $max: '$_id' }, data: { $push: '$$ROOT' } } },
{ $project: { _id: 0, data: 1, nums: { $range: ['$min', "$max", 1] } } },
{ $project: { data: { $concatArrays: ["$data", { $map: { input: { $setDifference: ["$nums", "$data._id"] }, in: { _id: '$$this', avg: 0 } } }] } } },
{ $unwind: '$data' }, { $replaceRoot: { newRoot: "$data" } }
])
Test : MongoDB-Playground

Assuming you know the range of scores, there's a trick to achieve exactly what you want :
1 - Insert in your collection a document for each score, with value field not set or set to null :
db.collection.insertMany([
{
score: 1,
},
{
score: 2,
},
{
score: 3,
},
{
score: 4,
},
{
score: 5,
},
{
score: 6,
},
{
score: 7,
},
{
score: 8,
},
{
score: 9,
},
{
score: 10,
}
]);
It's important for value field not to be set, because a value set at 0 will affect average calculation
Of course this operation must be performed only once.
Then you can apply the following aggregation, which will output exactly what you need :
db.collection.aggregate([
{
$bucket: {
groupBy: "$score",
boundaries: [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11
],
output: {
avg: {
$avg: "$value"
}
}
}
},
{
$project: {
score: "$_id",
avg: {
$ifNull: [
"$avg",
0
]
},
_id: 0
}
}
])
Will output :
[
{
"avg": 10,
"score": 1
},
{
"avg": 0,
"score": 2
},
{
"avg": 10,
"score": 3
},
{
"avg": 10,
"score": 4
},
{
"avg": 10,
"score": 5
},
{
"avg": 0,
"score": 6
},
{
"avg": 0,
"score": 7
},
{
"avg": 0,
"score": 8
},
{
"avg": 0,
"score": 9
},
{
"avg": 10,
"score": 10
}
]
You can test it here.

Add empty buckets in mongo aggregation

I have the following aggregation:
const buckets = await StatisticModel.aggregate([
{
$bucket: {
groupBy: '$ranking',
boundaries: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11],
},
},
])
Which returns the following object:
[
{ _id: 3, count: 6 },
{ _id: 4, count: 98 },
{ _id: 5, count: 81 },
{ _id: 6, count: 25 },
{ _id: 7, count: 4 }
]
How can I add the missing (empty) buckets?
This is a simple example but I have more complexe ones where I generate the boundaries and I want to return to the front-end all the buckets and not only the filled ones.

You can use below aggregation
db.collection.aggregate([
{ "$facet": {
"data": [
{ "$bucket": {
"groupBy": "$ranking",
"boundaries": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11]
}}
]
}},
{ "$addFields": {
"data": {
"$map": {
"input": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11],
"as": "i",
"in": {
"_id": "$$i",
"count": {
"$cond": [
{ "$eq": [{ "$indexOfArray": ["$data._id", "$$i"] }, -1] },
0,
{ "$arrayElemAt": ["$data.count", { "$indexOfArray": ["$data._id", "$$i"] }] }
]
}
}
}
}
}},
{ "$unwind": "$data" },
{ "$replaceRoot": { "newRoot": "$data" }}
])
But better to do with javascript
const array = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11]
const array2 = [
{ "_id": 3, "count": 6 },
{ "_id": 4, "count": 98 },
{ "_id": 5, "count": 81 },
{ "_id": 6, "count": 25 },
{ "_id": 7, "count": 4 }
]
array.map((ar) => {
const index = array2.map((e) => { return e._id }).indexOf(ar)
if (index === -1) {
array2.push({ _id: ar, count: 0 })
}
})
console.log(array2)

Starting in Mongo 5.1, it's a perfect use case for the new $densify aggregation operator:
// { ranking: 3, count: 6 }
// { ranking: 4, count: 98 }
// { ranking: 6, count: 25 }
// { ranking: 7, count: 4 }
db.collection.aggregate([
{ $densify: {
field: "ranking",
range: { step: 1, bounds: [0, 12] }
}},
{ $set: { count: { $cond: [ { $not: ["$count"] }, 0, "$count" ] } } }
])
// { ranking: 0, count: 0 } <=
// { ranking: 1, count: 0 } <=
// { ranking: 2, count: 0 } <=
// { ranking: 3, count: 6 }
// { ranking: 4, count: 98 }
// { ranking: 5, count: 0 } <=
// { ranking: 6, count: 25 }
// { ranking: 7, count: 4 }
// { ranking: 8, count: 0 } <=
// { ranking: 9, count: 0 } <=
// { ranking: 10, count: 0 } <=
// { ranking: 11, count: 0 } <=
This:
densifies documents ($densify) by creating new documents in a sequence of documents where certain values for a field (in our case field: "ranking") are missing:
the step for our densification is 1: range: { step: 1, ... } since our buckets are following each other with a size of 1.
and we densify within the range [0, 12]: bounds: [0, 12]
finally sets ($set) count to 0 only for new documents included during the densify stage ({ count: { $cond: [ { $not: ["$count"] }, 0, "$count" ] } })
Note that I'm assuming your buckets are of equal size (assuming the missing 10 in your list is an oversight).