MongoDB Aggregation with Sum and Multiple Group Results - mongodb

Let's say I have these collections members and positions
[
{
"church":"60dbb265a75a610d90b45c6b", "parentId":"60dbb265a75a610d90b45c6b", name: "Jonah John", status: 1, birth: "1983-01-01", position: "60f56f59-08be-49ec-814a-2a421f21bc08"
},
{
"church":"60dbb265a75a610d90b45c6b", "parentId":"60dbb265a75a610d90b45c6b", name: "March John", status: 1, birth: "1981-01-23", position: "60f56f59-08be-49ec-814a-2a421f21bc08"
},
{
"church":"60dbb265a75a610d90b45c6b", "parentId":"60dbb265a75a610d90b45c6b",name: "Jessy John", status: 0, birth: "1984-08-01", position: "e5bba609-082c-435a-94e3-0997fd229851"
}
]
[
{_id: "60f56f59-08be-49ec-814a-2a421f21bc08", name: "Receptionist"},
{_id: "5c78ba5a-3e6c-4d74-8d4a-fa23d02b8003", name: "Curtain"},
{_id: "e5bba609-082c-435a-94e3-0997fd229851", name: "Doorman"}
]
I want to aggregate in a way I can get:
inactiveMembers
activeMembers
totalMembers
totalPositionsOcuppied
And two arrays with:
positionsOcuppied {name, quantity}
birthdays {month, quantity.
I need an output like this:
{
"_id": {
"church":"60dbb265a75a610d90b45c6b",
"parentId":"60dbb265a75a610d90b45c6b"
},
"inactiveMembers":1,
"activeMembers":2,
"totalMembers":3,
"birthdays": [
{january:2}, {august:1}
],
"positionsOcuppied": [
{Doorman: 1}, {Receptionist:2}
],
"totalPositionsOcuppied": 3
}
How can I do that?
PS.: Very sorry for unclear values...

Update:
$addFields with birthMonth string
$lookup to add positions
$facet to $group by birthdays, positionsOcuppied, and all docs tougher as other
$map to format birthdays and positionsOcuppied
Format the answer
db.people.aggregate([
{$addFields: {
birthMonth: {
$arrayElemAt: [
["","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],
{$month: {$toDate: "$birth"}}
]
}
}
},
{$lookup: {from: "positions", localField: "position", foreignField: "_id",
as: "position"}},
{$facet: {
birthdays: [{$group: {_id: "$birthMonth", count: {$sum: 1}}}],
positionsOcuppied: [{$group: {_id: {$first: "$position.name"}, count: {$sum: 1}}}],
other: [
{$group: {_id: 0,
activeMembers: {$sum: "$status"},
totalMembers: {$sum: 1},
church: {$first: "$church"},
parentId: {$first: "$parentId"},
totalPositionsOcuppied: {$sum: {$size: "$position"}}
}
}
]
}
},
{$set: {
birthdays: {
$map: {input: "$birthdays", in: [{k: "$$this._id", v: "$$this.count"}]}
},
positionsOcuppied: {
$map: {input: "$positionsOcuppied", in: [{k: "$$this._id", v: "$$this.count"}]}
},
other: {$first: "$other"}
}
},
{$set: {
"other.birthdays": {
$map: {input: "$birthdays", in: {$arrayToObject: "$$this"}}
},
"other.positionsOcuppied": {
$map: {input: "$positionsOcuppied", in: {$arrayToObject: "$$this"}}
},
"other.inactiveMembers": {
$subtract: ["$other.totalMembers", "$other.activeMembers"]
},
"other._id": {church: "$other.church", parentId: "$other.parentId"},
birthdays: "$$REMOVE",
"other.church": "$$REMOVE",
"other.parentId": "$$REMOVE",
positionsOcuppied: "$$REMOVE"
}
},
{$replaceRoot: {newRoot: "$other"}}
])
See how it works on the playground example

Related

Fill data with NULL value if it is not present in the timeperiod using mongodb aggregation pipeline

I have to write an aggreagtion pipeline in which I will pass:
Timestamps of start date and end data for a day
I have to divide the data into 30min buckets and find data in between that buckets like:
2023-01-16T00:30:00.000+00:00 , 2023-01-16T01:00:00.000+00:00, 2023-01-16T01:30:00.000+00:00 and so on.
If data is not present in any particular bucket fill the values of that bucketa with zero but give the timestamp like:
2023-01-16T01:00:00.000+00:00 ther is no data give {timestamp:2023-01-16T01:00:00.000+00:00,a:0,b:0,c:0}
I have done the following:
[{
$match: {
$and: [
{
timestamp: {
$gte: ISODate('2023-01-16T00:00:00.000Z'),
$lt: ISODate('2023-01-16T23:59:59.000Z')
}
}
]
}
}, {
$group: {
_id: {
$toDate: {
$subtract: [
{
$toLong: '$timestamp'
},
{
$mod: [
{
$toLong: '$timestamp'
},
1800000
]
}
]
}
},
in: {
$sum: '$a'
},
out: {
$sum: '$b'
},
Count: {
$sum: 1
}
}
}, {
$addFields: {
totalIn: {
$add: [
'$in',
'$out'
]
},{
$sort: {
_id: 1
}
}]
Result is:
[{
"_id": {
"2023-01-16T12:00:00.000+00:00"
}
},
"totalIn": 397,
"count":22
},
{
"_id": {
"2023-01-16T01:30:00.000+00:00"
}
},
"totalIn": 222,
"count":2
}
...]
expected result:
[{
"_id": {
"2023-01-16T12:00:00.000+00:00"
}
},
"totalIn": 397,
"count":22
},
{
"_id": {
"2023-01-16T12:30:00.000+00:00"
}
},
"totalIn": 0,
"count":0
},
{
"_id": {
"2023-01-16T01:00:00.000+00:00"
}
},
"totalIn": 0,
"count":0
},
{
"_id": {
"2023-01-16T12:00:00.000+00:00"
}
},
"totalIn": 222,
"count":2
}
...]
One option is to use $range with $dateAdd:
db.collection.aggregate([
{$match: {timestamp: {
$gte: startDate,
$lt: endDate
}}},
{$group: {
_id: {$dateTrunc: {date: "$timestamp", unit: "minute", binSize: 30}},
in: {$sum: "$a"},
out: {$sum: "$b"},
count: {$sum: 1}
}},
{$group: {
_id: 0,
data: {$push: {
timestamp: "$_id",
totalIn: {$add: ["$in", "$out"]},
count: "$count"
}}
}},
{$project: {
_id: 0, data: 1,
bins: {$map: {
input: {$range: [
0,
{$multiply: [
{$dateDiff: {
startDate: startDate,
endDate: endDate,
unit: "hour"
}},
2
]}
]},
in: {$dateAdd: {
startDate: startDate,
unit: "minute",
amount: {$multiply: ["$$this", 30]}
}}
}}
}},
{$unwind: "$bins"},
{$set: {data: {$filter: {
input: "$data",
cond: {$eq: ["$bins", "$$this.timestamp"]}
}}}},
{$project: {
_id: "$bins",
count: {$ifNull: [{$first: "$data.count"}, 0]},
totalIn: {$ifNull: [{$first: "$data.totalIn"}, 0]}
}}
])
See how it works on the playground example

MongoDB aggregation: group and push multiple attributes

I am having a problem with my query using MongoDB aggregation.
I have a collection as follows:
[
{
id: 1,
name: 'cash',
amount: 10,
},
{
id: 2,
name: 'IPT',
amount: 10,
terminal_type: 'inside',
card: {
id: 1,
name: 'visa',
},
},
{
id: 2,
name: 'IPT',
amount: 10,
terminal_type: 'outside',
card: {
id: 1,
name: 'visa',
},
},
]
Expected result:
[
{
id: 1,
name: 'cash',
amount: 10,
},
{
id: 2,
name: 'IPT',
amount: 20,
cards: [
{
id: 1,
name: 'visa',
amount: 20,
},
],
terminals: [
{
name: 'inside',
amount: 10,
},
{
name: 'outside',
amount: 10,
},
],
},
]
What I have tried:
{
$group: {
_id: {
id: '$id',
card_id: '$card.id',
terminal_type: '$terminal_type',
},
name: {$first: '$name'},
amount: {$sum: '$amount'},
card_name: {$sum: '$card.name'},
}
},
{
$group: {
_id: {
id: '$id',
card_id: '$_id.card_id',
},
name: {$first: '$name'},
amount: {$sum: '$amount'},
card_name: {$first: '$card_name'},
terminals: {
$push: {
{ $cond: [
{$ifnull: ['$terminal_type', false]},
{
type: '$terminal_type',
amount: '$amount',
},
'$$REMOVE',
]
}
}
}
}
},
{
$group: {
_id: '$_id.id',
name: {$first: '$name'},
amount: {$sum: '$amount'},
cards: {
$push: {
{ $cond: [
{$ifnull: ['$id.card_id', false]},
{
id: '$_id.card_id',
name: '$card_name',
amount: '$amount',
},
'$$REMOVE',
],
},
},
terminals: // This is the problem where I can't figure out how get this value
}
}
I considered to unwind terminals before the last group pipeline but I ended up getting duplicate documents which make the sum for the amount incorrect. Can anyone help me to solve this or point me to where I can read and understand more about this? Thank you very much.
One option to go is to $group and then $reduce:
db.collection.aggregate([
{$group: {
_id: "$id",
name: {$first: "$name"},
amount: {$sum: "$amount"},
terminals: {$push: {name: "$terminal_type", amount: "$amount"}},
cards: {$push: {id: "$card.id", name: "$card.name", amount: "$amount"}},
cardIds: {$addToSet: "$card.id"},
terminalNames: {$addToSet: "$terminal_type"}
}},
{ $project: {
_id: 0, id: "$_id", name: 1, amount: 1,
cards: {
$map: {
input: "$cardIds",
as: "card",
in: {
id: "$$card",
amount: {$reduce: {
input: "$cards",
initialValue: 0,
in: {$add: [
"$$value",
{$cond: [{$eq: ["$$card", "$$this.id"]},"$$this.amount", 0]}
]}
}},
name: {$reduce: {
input: "$cards",
initialValue: "",
in: {$cond: [{$eq: ["$$this.id", "$$card"]}, "$$this.name", "$$value"]}
}}
}
}},
terminals: {
$map: {
input: "$terminalNames",
as: "terminal",
in: {
name: "$$terminal",
amount: {$reduce: {
input: "$terminals",
initialValue: 0,
in: {$add: [
"$$value",
{$cond: [{$eq: ["$$terminal", "$$this.name"]}, "$$this.amount", 0]}
]}
}}
}
}
}
}}
])
See how it works on the playground example
Another option may be to use $unwind as you mentioned:
db.collection.aggregate([
{$group: {
_id: "$id",
name: {$first: "$name"},
amount: {$sum: "$amount"},
terminals: {$push: {
type: "terminal",
name: "$terminal_type",
key: "$terminal_type",
amount: "$amount"
}},
cards: {$push: {
type: "card",
id: "$card.id",
key: "$card.id",
name: "$card.name",
amount: "$amount"
}}
}},
{$project: {amount: 1, name: 1, docs: {$concatArrays: ["$cards", "$terminals"]}}},
{$unwind: "$docs"},
{$group: {
_id: {_id: "$_id", type: "$docs.type", key: "$docs.key"},
amount: {$sum: "$docs.amount"},
name: {$first: "$docs.name"},
dataAmount: {$first: "$amount"},
dataName: {$first: "$name"}
}},
{$group: {
_id: "$_id._id",
amount: {$first: "$dataAmount"},
name: {$first: "$dataName"},
terminals: {$push: {
$cond: [
{$and: [{$eq: ["$_id.type", "terminal"]}, {$gt: ["$name", null]}]},
{amount: "$amount", name: "$name"},
"$$REMOVE"
]
}},
cards: {$push: {
$cond: [
{$and: [{$eq: ["$_id.type", "card"]}, {$gt: ["$name", null]}]},
{amount: "$amount", name: "$name", id: "$_id.key"},
"$$REMOVE"
]
}}
}}
])
See how it works on the playground example

How do I group and count values by value range in MongoDB

I have the following documents in my MongoDB:
_id: ObjectId(...)
'timestamp': 2022-11-03T10:00:00.000+00:00
score: 1
_id: ObjectId(...)
'timestamp': 2022-11-03T09:00:00.000+00:00
score: 3
_id: ObjectId(...)
'timestamp': 2022-11-03T10:00:00.000+00:00
score: 6
_id: ObjectId(...)
'timestamp': 2022-11-03T10:00:00.000+00:00
score: 10
I want to make an aggregation that counts the score within the range of (gte)1-(lt)5 as poor, (gte)5-(lt)7 as ok, (gte)7-(lt)8.5 as good and (gte)8.5-(lte)10 as excellent.
So the result would look like this:
{
"data": [
{
"name": "excellent",
"count": 1
},
{
"name": "good",
"count": 0
},
{
"name": "ok",
"count": 1
},
{
"name": "poor",
"count": 2
}
]
}
How do I achieve that?
If you accept an answer only with documents that have a count, you can do:
db.collection.aggregate([
{$project: {
_id: {
$arrayElemAt: [
["poor", "ok", "good", "excellent"],
{$floor: {$divide: ["$score", 10]}}
]}
}},
{$group: {_id: "$_id", count: {$sum: 1}}}
])
Otherwise you need to create all categories:
db.collection.aggregate([
{$group: {
_id: 0,
excellent: {$sum: {$cond: [{$gte: ["$score", 30]}, 1, 0]}},
good: {$sum: {$cond: [{$and: [{$gte: ["$score", 20]}, {$lt: ["$score", 30]}]}, 1, 0]}},
ok: {$sum: {$cond: [{$and: [{$gte: ["$score", 10]}, {$lt: ["$score", 20]}]}, 1, 0]}},
poor: {$sum: {$cond: [{$lt: ["$score", 10]}, 1, 0]}}
}},
{$unset: "_id"},
{$project: {data: {$objectToArray: "$$ROOT"}}},
{$project: {
data: {$map: {
input: "$data",
in: {nmae: "$$this.k", count: "$$this.v"}
}}
}}
])
See how it works on the playground example

Projecting data after doing a $facet

After doing a $facet I receive this output:
[
{
"confirmed": [
{
"confirmed": 100
}
],
"denied": [
{
"denied": 50
}
],
"pending": [
{
"pending": 20
}
]
}
]
how can I project it into something like this?
[
{
category: "confirmed", count: 100,
category: "denied", count: 50,
category: "pending", count: 20
}
]
I need the faucet part because to extract those numbers I have to do several $match to the same data. Dont know if there is a better option.
Thank you!
What you ask is not a valid format. This is an object with duplicate keys. You may want:
[{"confirmed": 100, "denied": 50, "pending": 20}]
or
[
{category: "confirmed", count: 100},
{category: "denied", count: 50},
{category: "pending", count: 20}
]
which are both valid options
I guess you want the second option. If you want the generic solution, one option is:
db.collection.aggregate([
{$project: {res: {$objectToArray: "$$ROOT"}}},
{$project: {
res: {$map: {
input: "$res",
in: {category: "$$this.k", count: {$objectToArray: {$first: "$$this.v"}}}
}}
}},
{$project: {
res: {$map: {
input: "$res",
in: {category: "$$this.category", count: {$first: "$$this.count.v"}}
}}
}},
{$unwind: "$res"},
{$replaceRoot: {newRoot: "$res"}}
])
See how it works on the playground example - generic
If you want the literal option, just use:
db.collection.aggregate([
{$project: {
res: [
{category: "confirmed", count: {$first: "$confirmed.confirmed"}},
{category: "denied", count: {$first: "$denied.denied"}},
{category: "pending", count: {$first: "$pending.pending"}}
]
}
},
{$unwind: "$res"},
{$replaceRoot: {newRoot: "$res"}}
])
See how it works on the playground example - literal

Group count 2 item object array mongodb

I have the following array:
[
{
“counter”: 123456,
“user”: “USER1”,
“last”: “USER1”
},
{
“counter”: 123,
“user”: “USER1”,
“last”: “USER2”
},
{
“counter”: 111,
“user”: “USER2”,
“last”: “USER2”
},
{
“counter”: 1122,
“user”: “USER2”,
“last”: “USER2”
},
{
“counter”: 112233,
“user”: “USER1”,
“last”: “USER2”
},
]
I'm doing the following query on mongodb:
{$group: {
_id: “$user”,
total: {$sum: 1},
last: {$sum: {$cond: [
{$eq: ['$last’, '$user']}, 1, 0
]}}
}}
I would like to get the following result:
[
{
_id: “USER1”
total: 3,
last: 1
},
{
_id: “USER2”
total: 2,
last: 4
}
]
But I get this:
[
{
_id: “USER1”
total: 3,
last: 1
},
{
_id: “USER2”
total: 2,
last: 2
}
]
When I make the group I can not count the last item satisfactorily
How can I get the expected result? Thank you for your help.
You have only 2 "USER2" then can't $sum 4 $last on that _id.
try using compose _id if you need by index in user.
db.teste.aggregate([
{$group:{
_id: {user:"$user",last:"$last"},
total: {$sum:1},
last: {
$sum: {
$cond:[ {$eq: ["$last", user]}, 1, 0]
}
}
}}
])
If you need to add string or other occurrences in different fields. Query is:
db.teste.aggregate([
{$group:{
_id: "$user"
}},
{$lookup:{
from: "teste",
let: { indice: "$_id" },
pipeline: [
{$group:{
_id: null,
user:{$sum:{$cond:[
{$eq:["$user", "$$indice"]}, 1, 0
]}},
last:{$sum:{$cond:[
{$eq:["$last", "$$indice"]}, 1, 0
]}}
}},
{$project:{
_id: 0
}}
],
as: "res"
}}
])
From zero to hero:
//First we'll extract only what we need find, on this case distinct users
db.teste.aggregate([
{$group:{
_id: "$user"
}}
])
//Here we will get the indexes of the search and reinsert in our filter using $let (nodejs var)
db.teste.aggregate([
{$group:{
_id: "$user"
}},
{$lookup:{
from: "teste",
let: { indice: "$_id" },
pipeline: [],
as: "res"
}}
])
//Let's counter the total of reinserted elements
db.teste.aggregate([
{$group:{
_id: "$user"
}},
{$lookup:{
from: "teste",
let: { indice: "$_id" },
pipeline: [
{$group:{
_id: null,
total:{$sum:1}
}}
],
as: "res"
}}
])
//Now let's test a true condition
db.teste.aggregate([
{$group:{
_id: "$user"
}},
{$lookup:{
from: "teste",
let: { indice: "$_id" },
pipeline: [
{$group:{
_id: null,
user:{$sum:{$cond:[
{$eq:[true, true]}, 1, 0
]}}
}}
],
as: "res"
}}
])
//cond tested let's extract what we want
db.teste.aggregate([
{$group:{
_id: "$user"
}},
{$lookup:{
from: "teste",
let: { indice: "$_id" },
pipeline: [
{$group:{
_id: null,
user:{$sum:{$cond:[
{$eq:["$user", "$$indice"]}, 1, 0
]}},
last:{$sum:{$cond:[
{$eq:["$last", "$$indice"]}, 1, 0
]}}
}}
],
as: "res"
}}
])
//Let's take the _id of the sub-colection because we do not need it
db.teste.aggregate([
{$group:{
_id: "$user"
}},
{$lookup:{
from: "teste",
let: { indice: "$_id" },
pipeline: [
{$group:{
_id: null,
user:{$sum:{$cond:[
{$eq:["$user", "$$indice"]}, 1, 0
]}},
last:{$sum:{$cond:[
{$eq:["$last", "$$indice"]}, 1, 0
]}}
}},
{$project:{
_id: 0
}}
],
as: "res"
}}
])