Mongodb aggregation and nested grouping - mongodb

I have the following MongoDB data model:
{
"_id": {
"$oid": "5ffd62eedf2075dfc5a5b0b8"
},
"portfolio": "4086_ClearCreek",
"ruleDescription": "Maximum Moody's Rating Factor Test",
"failureLevel": 3,
"failureCategory": "",
"hasHoldings": true,
"summaryStatus": 0,
"summaryStatusLabel": "Failure",
"ruleType": 1,
"ruleSource": 0,
"ruleValueType": 0,
"testValue": "3673",
"limitValue": "2400",
"limitOperator": "<=",
"testRoom": "-1273",
"numeratorValue": "185278281.19",
"denominatorValue": "680407923070.46",
"ruleCategory": "Collateral Quality",
"topLevelFilter": {
"id": "5fd1bd7868d7ac4e211a7642",
"type": "WSO CMP Dataset",
"name": "Clear Creek CLO, LTD._Clear Creek Test Data Set_Initial"
},
"executionDateTime": "2021-01-12T08:50:54.103"
}
I want to see results like this:-
{
"_id" : "Concentration Limitations",
"pass" : 1,
"warn" : 0,
"fail" : 0,
"portfolio" : [
{
"id" : "5fd1bd7868d7ac4e211a7642",
"name" : "Clear Creek CLO, LTD._Clear Creek Test Data Set_Initial",
"pass" : 1,
"warn" : 0,
"fail" : 0
}
]
}
This is my code that i am trying to achive the above result set:
db.rule_execution_result.aggregate([{
$group: {
_id: '$ruleCategory',
pass: {
$sum: {
$cond: [{
$eq: ["$summaryStatus", 1]
}, 1, 0]
}
},
warn: {
$sum: {
$cond: [{
$eq: ["$summaryStatus", 2]
}, 1, 0]
}
},
fail: {
$sum: {
$cond: [{
$eq: ['$summaryStatus', 0]
}, 1, 0]
}
},
portfolio: {
$push: {
id: '$$ROOT.topLevelFilter.id',
name: '$$ROOT.topLevelFilter.name',
category: '$$ROOT.ruleCategory',
summary:'$$ROOT.summaryStatus',
pass: '',
warn: '',
fail: ''
}
}
}
}
]).pretty()
I want to make one another nested group for portfolio on the basis of topLevelFilter.id and calculate sum of summaryStatus, but unable to do so.
So Can anyone please help me to achive the result set?.

first $group by both ruleCategory and topLevelFilter.id, get required count and fields
second $group by ruleCategory and sum counts and make array of portfolio
db.collection.aggregate([
{
$group: {
_id: {
ruleCategory: "$ruleCategory",
topLevelFilter: "$topLevelFilter.id"
},
name: { $first: "$topLevelFilter.name" },
summaryStatus: { $first: "$summaryStatus" },
pass: { $sum: { $cond: [{ $eq: ["$summaryStatus", 1] }, 1, 0] } },
warn: { $sum: { $cond: [{ $eq: ["$summaryStatus", 2] }, 1, 0] } },
fail: { $sum: { $cond: [{ $eq: ["$summaryStatus", 0]}, 1, 0] } }
}
},
{
$group: {
_id: "$_id.ruleCategory",
pass: { $sum: "$pass" },
warn: { $sum: "$warn" },
fail: { $sum: "$fail" },
portfolio: {
$push: {
id: "$_id.topLevelFilter",
name: "$name",
summary: "$summaryStatus",
pass: "$pass",
warn: "$warn",
fail: "$fail"
}
}
}
}
])
Playground

Related

MongoDB get count of field per season from MM/DD/YYYY date field

I am facing a problem in MongoDB. Suppose, I have the following collection.
{ id: 1, issueDate: "07/05/2021", code: "31" },
{ id: 2, issueDate: "12/11/2020", code: "14" },
{ id: 3, issueDate: "02/11/2021", code: "98" },
{ id: 4, issueDate: "01/02/2021", code: "14" },
{ id: 5, issueDate: "06/23/2020", code: "14" },
{ id: 6, issueDate: "07/01/2020", code: "31" },
{ id: 7, issueDate: "07/05/2022", code: "14" },
{ id: 8, issueDate: "07/02/2022", code: "20" },
{ id: 9, issueDate: "07/02/2022", code: "14" }
The date field is in the format MM/DD/YYYY. My goal is to get the count of items with each season (spring (March-May), summer (June-August), autumn (September-November) and winter (December-February).
The result I'm expecting is:
count of fields for each season:
{ "_id" : "Summer", "count" : 6 }
{ "_id" : "Winter", "count" : 3 }
top 2 codes (first and second most recurring) per season:
{ "_id" : "Summer", "codes" : {14, 31} }
{ "_id" : "Winter", "codes" : {14, 98} }
How can this be done?
You should never store date/time values as string, store always proper Date objects.
You can use $setWindowFields opedrator for that:
db.collection.aggregate([
// Convert string into Date
{ $set: { issueDate: { $dateFromString: { dateString: "$issueDate", format: "%m/%d/%Y" } } } },
// Determine the season (0..3)
{
$set: {
season: { $mod: [{ $toInt: { $divide: [{ $add: [{ $subtract: [{ $month: "$issueDate" }, 1] }, 1] }, 3] } }, 4] }
}
},
// Count codes per season
{
$group: {
_id: { season: "$season", code: "$code" },
count: { $count: {} },
}
},
// Rank occurrence of codes per season
{
$setWindowFields: {
partitionBy: "$_id.season",
sortBy: { count: -1 },
output: {
rank: { $denseRank: {} },
count: { $sum: "$count" }
}
}
},
// Get only top 2 ranks
{ $match: { rank: { $lte: 2 } } },
// Final grouping
{
$group: {
_id: "$_id.season",
count: { $first: "$count" },
codes: { $push: "$_id.code" }
}
},
// Some cosmetic for output
{
$set: {
season: {
$switch: {
branches: [
{ case: { $eq: ["$_id", 0] }, then: 'Winter' },
{ case: { $eq: ["$_id", 1] }, then: 'Spring' },
{ case: { $eq: ["$_id", 2] }, then: 'Summer' },
{ case: { $eq: ["$_id", 3] }, then: 'Autumn' },
]
}
}
}
}
])
Mongo Playground
I will give you clues,
You need to use $group with _id as $month on issueDate, use accumulator $sum to get month wise count.
You can divide month by 3, to get modulo, using $toInt, $divide, then put them into category using $cond.
Another option:
db.collection.aggregate([
{
$addFields: {
"season": {
$switch: {
branches: [
{
case: {
$in: [
{
$substr: [
"$issueDate",
0,
2
]
},
[
"06",
"07",
"08"
]
]
},
then: "Summer"
},
{
case: {
$in: [
{
$substr: [
"$issueDate",
0,
2
]
},
[
"03",
"04",
"05"
]
]
},
then: "Spring"
},
{
case: {
$in: [
{
$substr: [
"$issueDate",
0,
2
]
},
[
"12",
"01",
"02"
]
]
},
then: "Winter"
}
],
default: "No date found."
}
}
}
},
{
$group: {
_id: {
s: "$season",
c: "$code"
},
cnt1: {
$sum: 1
}
}
},
{
$sort: {
cnt1: -1
}
},
{
$group: {
_id: "$_id.s",
codes: {
$push: "$_id.c"
},
cnt: {
$sum: "$cnt1"
}
}
},
{
$project: {
_id: 0,
season: "$_id",
count: "$cnt",
codes: {
"$slice": [
"$codes",
2
]
}
}
}
])
Explained:
Add one more field for season based on $switch per month(extracted from issueDate string)
Group to collect per season/code.
$sort per code DESCENDING
group per season to form an array with most recurring codes in descending order.
Project the fields to the desired output and $slice the codes to limit only to the fist two most recurring.
Comment:
Indeed keeping dates in string is not a good idea in general ...
Playground

MongoDB Count With Condition within Project with $eq

I'm trying to count my "$attendance.status" with aggregation mongodb.
I've get my data with relations. then i want to count by relation columns like 'present', 'off', etc.
code
Employee.aggregate([
{
$lookup: {
from: "Attendance",
let: { employeeId: "$_id" },
pipeline: [
{
$match: {
$and: [
{ $expr: { $eq: ["$employeeId", "$$employeeId"] } },
{ isApproved: true },
{
createdAt: {
$gte: startOfMonth.toDate(),
$lte: endOfMonth.toDate(),
},
},
],
},
},
],
as: "attendance",
},
},
{
$project: {
_id: 1,
username: 1,
name: 1,
attendance: 1,
present: { $sum: { $eq: ["$attendance.status", "present"] } },
},
},
]);
But why cannot count my column?
i use $eq, with $sum then count the result. but the result is 0
{
"username": "Ethyl",
"name": "Kuhn",
"id": "614d43cde735f3e601dea165",
"attendance": [
{
"_id": "614d43cde735f3e601dea16f",
"status": "present",
"start": "2021-09-24T03:19:41.645Z",
"employeeId": "614d43cde735f3e601dea165",
"isApproved": true
},
],
"present": 0,
"sick": 0,
"off": 0,
},

Compare 2 count aggregations

I have a collection in MongoDB that looks something like the following:
{ "_id" : 1, "type" : "start", userid: "101", placementid: 1 }
{ "_id" : 2, "type" : "start", userid: "101", placementid: 2 }
{ "_id" : 3, "type" : "start", userid: "101", placementid: 3 }
{ "_id" : 4, "type" : "end", userid: "101", placementid: 1 }
{ "_id" : 5, "type" : "end", userid: "101", placementid: 2 }
and I want to group results by userid then placementid and then count the types of "start" and "end", but only when the two counts are different. In this particular example I would want to get placementid: 3 because when grouped and counted this is the only case where the counts don't match.
I've written a query that gets the 2 counts and the grouping but I can't do the filtering when counts don't match. This is my query:
db.getCollection('mycollection').aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$match: {
countStart: {$ne: "$countEnd"}
}
}
])
It seems like I'm using the match aggregation incorrectly because I'm seeing results where countStart and countEnd are the same.
{ "_id" : {"userid" : "101", "placementid" : "1"}, "countStart" : 1.0, "countEnd" : 1.0 }
{ "_id" : {"userid" : "101", "placementid" : "2"}, "countStart" : 1.0, "countEnd" : 1.0 }
{ "_id" : {"userid" : "101", "placementid" : "3"}, "countStart" : 1.0, "countEnd" : 0 }
Can anybody point into the right direction please?
To compare two fields inside $match stage you need $expr which is available in MongoDB 3.6:
db.myCollection.aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$match: {
$expr: { $ne: [ "$countStart", "$countEnd" ] }
}
}
])
If you're using older version of MongoDB you can use $redact:
db.myCollection.aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$redact: {
$cond: { if: { $ne: [ "$countStart", "$countEnd" ] }, then: "$$KEEP", else: "$$PRUNE" }
}
}
])
You run do the following pipeline to get this - no need to use $expr or $redact or anything special really:
db.mycollection.aggregate({
$group: {
_id: {
"userid": "$userid",
"placementid": "$placementid"
},
"sum": {
$sum: {
$cond: {
if: { $eq: [ "$type", "start" ] },
then: 1, // +1 for start
else: -1 // -1 for anything else
}
}
}
}
}, {
$match: {
"sum": { $ne: 0 } // only return the non matching-up ones
}
})

mongoDB aggregate with two percent by $group

My dataset :
{
"codepostal": 84000,
"siren": 520010234,
"type": "home"
},
{
"codepostal": 84000,
"siren": 0,
"type": "home"
},
{
"codepostal": 84000,
"siren": 450123003,
"type": "appt"
} ...
My pipeline (total is an integer) :
var pipeline = [
{
$match: { codepostal: 84000 }
},
{
$group: {
_id: { type: "$type" },
count: { $sum: 1 }
}
},
{
$project: {
percentage: { $multiply: ["$count", 100 / total] }
}
},
{
$sort: { _id: 1 }
}
];
Results :
[ { _id: { type: 'appt' }, percentage: 66 },
{ _id: { type: 'home' }, percentage: 34 } ]
Expected results is to count when "siren" is set to 0 or another number.
Count siren=0 => part
Count siren!=0 => pro
[ { _id: { type: 'appt' }, totalPercent: 66, proPercent: 20, partPercent: 80},
{ _id: { type: 'home' }, totalPercent: 34, proPercent: 45, partPercent: 55 } ]
Thanks a lot for your help !!
You can use $cond to get 0 or 1 for pro/part documents depending o value of siren field. Then it's easy to calculate totals for each type of document:
[
{
$match: { codepostal: 84000 }
},
{
$group: {
_id: { type: "$type" },
count: { $sum: 1 },
countPro: { $sum: {$cond: [{$eq:["$siren",0]}, 0, 1]} },
countPart: {$sum: {$cond: [{$eq:["$siren",0]}, 1, 0]} }
}
},
{
$project: {
totalPercent: { $multiply: ["$count", 100 / total] },
proPercent: { $multiply: ["$countPro", {$divide: [100, "$count"]}] },
partPercent: { $multiply: ["$countPart", {$divide: [100, "$count"]}] }
}
},
{
$sort: { _id: 1 }
}
]
Note that I used $divide to calculate pro/part percentage relative to the count of document within type group.
For your sample documents (total = 3) output will be:
[
{
"_id" : { "type" : "appt" },
"totalPercent" : 33.3333333333333,
"proPercent" : 100,
"partPercent" : 0
},
{
"_id" : { "type" : "home" },
"totalPercent" : 66.6666666666667,
"proPercent" : 50,
"partPercent" : 50
}
]

Grouping and counting across documents?

I have a collection with documents similar to the following format:
{
departure:{name: "abe"},
arrival:{name: "tom"}
},
{
departure:{name: "bob"},
arrival:{name: "abe"}
}
And to get output like so:
{
name: "abe",
departureCount: 1,
arrivalCount: 1
},
{
name: "bob",
departureCount: 1,
arrivalCount: 0
},
{
name: "tom",
departureCount: 0,
arrivalCount: 1
}
I'm able to get the counts individually by doing a query for the specific data like so:
db.sched.aggregate([
{
"$group":{
_id: "$departure.name",
departureCount: {$sum: 1}
}
}
])
But I haven't figured out how to merge the arrival and departure name into one document along with counts for both. Any suggestions on how to accomplish this?
You should use a $map to split your doc into 2, then $unwind and $group..
[
{
$project: {
dep: '$departure.name',
arr: '$arrival.name'
}
},
{
$project: {
f: {
$map: {
input: {
$literal: ['dep', 'arr']
},
as: 'el',
in : {
type: '$$el',
name: {
$cond: [{
$eq: ['$$el', 'dep']
}, '$dep', '$arr']
}
}
}
}
}
},
{
$unwind: '$f'
}, {
$group: {
_id: {
'name': '$f.name'
},
departureCount: {
$sum: {
$cond: [{
$eq: ['$f.type', 'dep']
}, 1, 0]
}
},
arrivalCount: {
$sum: {
$cond: [{
$eq: ['$f.type', 'arr']
}, 1, 0]
}
}
}
}, {
$project: {
_id: 0,
name: '$_id.name',
departureCount: 1,
arrivalCount: 1
}
}
]