MongoDB - Complex Grouping Query - mongodb

I have the following MongoDB aggregation query which groups by IDC, type and cluster - which works perfectly.
I would like to additionally group "environment", inside this existing grouping. Please see my query below, my existing output, and what I would like to see (desired output).
If you have any questions or wish to see the source (I didn't think it was necessary, as it would take up room on the question, then please comment).
Thanks
Example Source (around 1000 documents):
{
"_id":"55d5dc40281077b6d8af1bfa",
"hostname":"1",
"domain":"domain",
"description":"VMWare ESXi 5",
"cluster":1,
"type":"Physical",
"os":"EXSi",
"idc":"AMS",
"environment":"DR",
"deviceclass":"host",
"cores":64,
"memory":256,
"clusters":0,
"customer":"MnS",
"mounts":[],
"roles":["ESX-HOST"],
"ipset":{"backnet":"1"},
"frontnet":[],
"created":"2015-09-28T11:12:36.526Z"
}
Query:
Machine.aggregate([
{ "$match": {
"idc": req.query.idc, "customer": req.query.customer}
} ,
{ "$group": {
"_id": {
"cluster": "$cluster",
"idc":"$idc",
"type": "$type"
},
"SumCores": { "$sum":"$cores" },
"SumMemory": { "$sum":"$memory" }
}},
{ "$group": {
"_id": {
"cluster": "$_id.cluster",
"idc": "$_id.idc"
},
"data": {
"$push": {
"type": "$_id.type",
"SumCores": "$SumCores",
"SumMemory": "$SumMemory"
}
}
}},
{ "$project": {
"Physical": {
"$setDifference": [
{ "$map": {
"input": "$data",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.type", "Physical" ] },
{
"SumCores": "$$el.SumCores",
"SumMemory": "$$el.SumMemory"
},
false
]
}
}},
[false]
]
},
"Virtual": {
"$setDifference": [
{ "$map": {
"input": "$data",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.type", "Virtual" ] },
{
"SumCores": "$$el.SumCores",
"SumMemory": "$$el.SumMemory"
},
false
]
}
}},
[false]
]
}
}},
{ "$unwind": "$Physical" },
{ "$unwind": "$Virtual"},
{ "$sort" : { "_id.idc": -1, "_id.cluster": 1 } }
]);
Which gives me the following output:
{
"_id" : {
"cluster" : 1,
"idc" : "LH5"
},
"Physical" : {
"SumCores" : 192,
"SumMemory" : 768
},
"Virtual" : {
"SumCores" : 112,
"SumMemory" : 384
}
}
My desired output is:
[
{
"_id": {
"cluster": 1,
"idc": "LH8"
},
"Physical": [
{
"environment": "DR",
"SumCores": 256,
"SumMemory": 1024
},
{
"environment": "PROD",
"SumCores": 256,
"SumMemory": 1024
}
],
"Virtual": [
{
"environment": "DR",
"SumCores": 232,
"SumMemory": 469
},
{
"environment": "PROD",
"SumCores": 232,
"SumMemory": 469
}
]
}
]
Essentially, I want to group the sums based on the environment

Very much as in your initial query ( actually written by myself ), all you really need to do is add in that field detail to the initial _id of $group and then carry that through into the subsequent array entries:
Machine.aggregate([
{ "$match": {
"idc": req.query.idc, "customer": req.query.customer}
} ,
{ "$group": {
"_id": {
"cluster": "$cluster",
"idc":"$idc",
"type": "$type",
"environment": "$environment"
},
"SumCores": { "$sum":"$cores" },
"SumMemory": { "$sum":"$memory" }
}},
{ "$group": {
"_id": {
"cluster": "$_id.cluster",
"idc": "$_id.idc"
},
"data": {
"$push": {
"type": "$_id.type",
"environment": "$_id.environment",
"SumCores": "$SumCores",
"SumMemory": "$SumMemory"
}
}
}},
{ "$project": {
"Physical": {
"$setDifference": [
{ "$map": {
"input": "$data",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.type", "Physical" ] },
{
"environment": "$$el.environment",
"SumCores": "$$el.SumCores",
"SumMemory": "$$el.SumMemory"
},
false
]
}
}},
[false]
]
},
"Virtual": {
"$setDifference": [
{ "$map": {
"input": "$data",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.type", "Virtual" ] },
{
"environment": "$$el.environment",
"SumCores": "$$el.SumCores",
"SumMemory": "$$el.SumMemory"
},
false
]
}
}},
[false]
]
}
}},
{ "$unwind": "$Physical" },
{ "$unwind": "$Virtual"},
{ "$sort" : { "_id.idc": -1, "_id.cluster": 1 } }
]);
But you also "really" should be using the query form I recommended you did in the first place, since it is clear that all you want to do is diplay this in a template, and looping array content should be very simple:
Machine.aggregate([
{ "$match": {
"idc": req.query.idc, "customer": req.query.customer}
} ,
{ "$group": {
"_id": {
"cluster": "$cluster",
"idc":"$idc",
"type": "$type",
"environment": "$environment"
},
"SumCores": { "$sum":"$cores" },
"SumMemory": { "$sum":"$memory" }
}},
{ "$group": {
"_id": {
"cluster": "$_id.cluster",
"idc": "$_id.idc"
},
"data": {
"$push": {
"type": "$_id.type",
"environment": "$_id.environment",
"SumCores": "$SumCores",
"SumMemory": "$SumMemory"
}
}
}},
{ "$sort" : { "_id.idc": -1, "_id.cluster": 1 } }
]);

Related

Filter out items from mongoDb nested array and add new field

There is a mongoDb collection, looks like this:
[
{
"_id": {
"$oid": "63110728d74738cdc48a7de0"
},
"listName": "list_name",
"alloweUidList": [
{
"uid": "prQUKkIxljVqbHlCKah7T1Rh7l22",
"role": "creator",
"boolId": 1,
"crDate": "2022-09-01 21:25",
"modDate": null
}
],
"offerModelList": [
{
"offerListenerEntity": {
"_id": "6311072ed74738cdc48a7de1",
"uid": "prQUKkIxljVqbHlCKah7T1Rh7l22",
"itemName": "sometehing",
"crDate": "2022-09-01 21:25",
"boolId": 1,
"modDate": null,
"imageColorIndex": 3,
"shoppingListId": "63110728d74738cdc48a7de0",
"checkFlag": 0,
"itemCount": 1
},
"offers": [
{
"id": "62fa7983b7f32cc089864a3b",
"itemId": 127382,
"itemName": "item_1",
"itemCleanName": "item_clean_name",
"imageUrl": "item.png",
"price": 10,
"measure": "measure",
"salesStart": "N.a",
"source": "source",
"runDate": "2022.08.15-14:11:15",
"shopName": "shop_name",
"isSales": 1,
"insertType": "automate",
"timeKey": "2022_08_15_18_51",
"imageColorIndex": 0,
"isSelectedFlag": 1,
"selectedBy": "not_selected",
"itemCount": 1
},
{
"id": "62fa7983b7f32cc089864a3b",
"itemId": 127382,
"itemName": "item_2",
"itemCleanName": "item_clean_name",
"imageUrl": "image.png",
"price": 20,
"measure": "measure",
"salesStart": "N.a",
"source": "source",
"runDate": "2022.08.15-14:11:15",
"shopName": "shop_name",
"isSales": 1,
"insertType": "automate",
"timeKey": "2022_08_15_18_51",
"imageColorIndex": 0,
"isSelectedFlag": 0,
"selectedBy": "not_selected",
"itemCount": 1
}
]
},
{
"offerListenerEntity": {
"_id": "6311a5c0d74738cdc48a7de2",
"uid": "prQUKkIxljVqbHlCKah7T1Rh7l22",
"itemName": "anything",
"crDate": "2022-09-02 08:42",
"boolId": 1,
"modDate": null,
"imageColorIndex": 1,
"shoppingListId": "63110728d74738cdc48a7de0",
"checkFlag": 0,
"itemCount": 2
},
"offers": []
}
],
"crDate": "2022-09-01 21:25",
"modDate": "2022-09-01 21:25",
"boolId": 1,
"imageColorIndex": 1
}
]
So it has an array, with a nested array.
I would like to filter out the entire item from the offerModelList array, if the offerModelList.offerListenerEntity.boolId == 0 It's working with this aggregate query:
[
{
"$match": {
"alloweUidList": {
"$elemMatch": {
"uid": "prQUKkIxljVqbHlCKah7T1Rh7l22",
"boolId": 1
}
},
"boolId": 1,
}
},
{
"$addFields": {
"offerModelList": {
"$filter": {
"input": "$offerModelList",
"as": "i",
"cond": {
"$eq": [
"$$i.offerListenerEntity.boolId",
1
]
}
}
}
},
}
]
The problem comes, when I try to filter out items from the offerModelList.offers array based on isSelectedFlag field.
I modified my query to this:
db.collection.aggregate([
{
"$match": {
"alloweUidList": {
"$elemMatch": {
"uid": "prQUKkIxljVqbHlCKah7T1Rh7l22",
"boolId": 1
}
},
"boolId": 1,
}
},
{
"$addFields": {
"offerModelList": {
"$filter": {
"input": "$offerModelList",
"as": "i",
"cond": {
"$eq": [
"$$i.offerListenerEntity.boolId",
1
]
}
}
}
},
},
{
"$addFields": {
"offerModelList.offers": {
"$filter": {
"input": "$offerModelList.offers",
"as": "x",
"cond": {
"$eq": [
"$$x.isSelectedFlag",
1
]
}
}
}
},
}
])
The problem is, it alwas return empty offers array.
Here comes an example: https://mongoplayground.net/p/kksRpoNKr1k in this specific case the offers array should cointains only 1 item.
Don't think that you are able to directly filter from offerModelList.offers.
Instead, for the last stage,
$set - Set offerModelList field.
1.1. $map - Iterate element in offerModelList array and return a new array.
1.1.1. $mergeObjects - Merge current iterated document with the document resulted from 1.1.1.1.
1.1.1.1. Document with offers array. Via $filter to filter the document(s) with isSelectedFlag: 1.
db.collection.aggregate([
{
"$match": {
"alloweUidList": {
"$elemMatch": {
"uid": "prQUKkIxljVqbHlCKah7T1Rh7l22",
"boolId": 1
}
},
"boolId": 1,
}
},
{
"$addFields": {
"offerModelList": {
"$filter": {
"input": "$offerModelList",
"as": "i",
"cond": {
"$eq": [
"$$i.offerListenerEntity.boolId",
1
]
}
}
}
},
},
{
"$set": {
"offerModelList": {
$map: {
input: "$offerModelList",
as: "offerModel",
in: {
$mergeObjects: [
"$$offerModel",
{
offers: {
$filter: {
input: "$$offerModel.offers",
as: "x",
cond: {
$eq: [
"$$x.isSelectedFlag",
1
]
}
}
}
}
]
}
}
}
}
}
])
Demo # Mongo Playground

How to get only multiple counts in Mongodb?

Im trying to get multiple count values only from multiple documents in a collection which looks like this,( basically I want to get a count of how many are from the 4 directions)
{
"empno": 1500,
"province": "North"
}
{
"empno": 1600,
"province": "West"
}
early I found a solution and implemented following query;
([
{ "$facet": {
"N": [
{ "$match": { "province": "North" }},
{ "$count": "N" }
],
"E": [
{ "$match": { "province": "East" }},
{ "$count": "E" }
],
"S": [
{ "$match": { "province": "South" }},
{ "$count": "S" }
],
"W": [
{ "$match": { "province": "West" }},
{ "$count": "W" }
]
}},
{ "$project": {
"N": { "$arrayElemAt": ["$N.N", 0] },
"E": { "$arrayElemAt": ["$E.E", 0] },
"S": { "$arrayElemAt": ["$S.S", 0] },
"W": { "$arrayElemAt": ["$W.W", 0] },
}}
])
The output I get is
{ N: 1, W: 1 }
How can I get the values only like without the keys and also I want the blank fields that are empty to be with a 0. Like this;
{1, 0, 0, 1}
Facet
Query
group by null, is the thing that you needed to add to get the count
Test code here
db.collection.aggregate([
{
"$facet": {
"g0": [
{
"$match": {
"province": {
"$eq": "North"
}
}
},
{
"$group": {
"_id": null,
"count": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0
}
}
],
"g1": [
{
"$match": {
"province": {
"$eq": "East"
}
}
},
{
"$group": {
"_id": null,
"count": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0
}
}
],
"g2": [
{
"$match": {
"province": {
"$eq": "South"
}
}
},
{
"$group": {
"_id": null,
"count": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0
}
}
],
"g3": [
{
"$match": {
"province": {
"$eq": "West"
}
}
},
{
"$group": {
"_id": null,
"count": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0
}
}
]
}
},
{
"$set": {
"data": {
"$map": {
"input": {
"$objectToArray": "$$ROOT"
},
"in": {
"$cond": [
{
"$eq": [
"$$d.v",
[]
]
},
0,
{
"$let": {
"vars": {
"m": {
"$arrayElemAt": [
"$$d.v",
0
]
}
},
"in": "$$m.count"
}
}
]
},
"as": "d"
}
}
}
},
{
"$project": {
"data": 1
}
}
])
Group
Query
group is used instead of facet (facet is like 1 aggregation per field)
each group have its index (from the array), some indexes will be missing (because no documents exist)
add a zero-data field that has all indexes and count=0 (see bellow)
add to zero-data, the data found (the ones that existed in the collection,and we have groups for them) the rest keep the count=0
Test code here
db.collection.aggregate([
{
"$group": {
"_id": {
"$switch": {
"branches": [
{
"case": {
"$eq": [
"$province",
"North"
]
},
"then": {
"index": 0,
"province": "North"
}
},
{
"case": {
"$eq": [
"$province",
"East"
]
},
"then": {
"index": 1,
"province": "East"
}
},
{
"case": {
"$eq": [
"$province",
"South"
]
},
"then": {
"index": 2,
"province": "South"
}
},
{
"case": {
"$eq": [
"$province",
"West"
]
},
"then": {
"index": 3,
"province": "West"
}
}
],
"default": {
"index": 5
}
}
},
"count": {
"$sum": 1
}
}
},
{
"$group": {
"_id": null,
"data": {
"$push": {
"index": "$_id.index",
"province": "$province",
"count": "$count"
}
}
}
},
{
"$project": {
"_id": 0
}
},
{
"$set": {
"zero-data": [
{
"index": 0,
"count": 0
},
{
"index": 1,
"count": 0
},
{
"index": 2,
"count": 0
},
{
"index": 3,
"count": 0
}
]
}
},
{
"$set": {
"data": {
"$reduce": {
"input": "$zero-data",
"initialValue": [],
"in": {
"$let": {
"vars": {
"all_data": "$$value",
"d": "$$this"
},
"in": {
"$let": {
"vars": {
"found_data": {
"$filter": {
"input": "$data",
"cond": {
"$eq": [
"$$d.index",
"$$d1.index"
]
},
"as": "d1"
}
}
},
"in": {
"$concatArrays": [
"$$all_data",
[
{
"$cond": [
{
"$eq": [
"$$found_data",
[]
]
},
{
"index": "$$d.index",
"count": 0
},
{
"$arrayElemAt": [
"$$found_data",
0
]
}
]
}
]
]
}
}
}
}
}
}
}
}
},
{
"$project": {
"data": {
"$map": {
"input": "$data",
"in": "$$this.count"
}
}
}
}
])

How to add a summary header/footer of a dataset within the same query using Mongodb

Let the following dataset (_id ommited for clarity sakes)
{ "model":"Nissan", "regId": 1230, "status": "active", "regCost" :100},
{ "model":"Nissan", "regId": 1231, "status": "active", "regCost" :100 },
{ "model":"Nissan", "regId": 1232, "status": "inactive", "regCost" :0},
{ "model":"Honda", "regId": 1233, "status": "active", "regCost" :90},
{ "model":"Honda", "regId": 1234, "status": "active", "regCost" :90},
{ "model":"Toyota", "regId": 1235, "status": "active", "regCost" :80}
Running the following query in Mongo
[
{
"$group": {
"_id": "$model",
"TotalActive": {
"$sum": {
"$cond": {
"if": {
"$eq": ["$status", "active"]
},
"then": 1,
"else": 0
}
}
},
"TotalCost" : {"$sum" : "$regCost"}
}
}
]
will give this above result:
The question is how can I modify my query in order to add a summary row like:
You can use below aggregation
db.collection.aggregate([
{ "$group": {
"_id": "$model",
"TotalActive": {
"$sum": {
"$cond": {
"if": {
"$eq": ["$status", "active"]
},
"then": 1,
"else": 0
}
}
},
"TotalCost": { "$sum": "$regCost" }
}},
{ "$facet": {
"total": [
{ "$group": {
"_id": "Total",
"TotalActive": { "$sum": "$TotalActive" },
"TotalCost": { "$sum": "$TotalCost" }
}}
],
"data": [{ "$match": {} }]
}},
{ "$project": {
"data": {
"$concatArrays": ["$data", "$total"]
}
}},
{ "$unwind": "$data" },
{ "$replaceRoot": { "newRoot": "$data" } }
])
MongoPlayground

can't convert from BSON type missing to Date

I have a mongoDB aggregation which was working great when I tried it on MongoDB 4.0 but now I need to use it on MongoDB 3.4 and it's not working, I can't find why. All I suppose it that the bug occurs in the $project stage.
Here's the aggregation query :
{
"aggregate": true,
"pipeline": [
{
"$match": {
"field": {
"$exists": true
},
"field.objects": {
"$exists": true,
"$ne": []
},
"created_at": {
"$gte": {
"sec": 1551398400,
"usec": 0
},
"$lte": {
"sec": 1554076799,
"usec": 0
}
}
}
},
{
"$unwind": "$field.objects"
},
{
"$lookup": {
"from": "Object",
"localField": "field.objects.id",
"foreignField": "_id",
"as": "objects"
}
},
{
"$match": { // Some match clauses here
}
},
{
"$group": {
"_id": "$_id"
}
},
{
"$project": {
"year": {
"$year": "$created_at"
},
"month": {
"$month": "$created_at"
}
}
},
{
"$group": {
"_id": {
"date": {
"$concat": [
{
"$substr": [
"$year",
0,
4
]
},
"-",
{
"$cond": [
{
"$lte": [
"$month",
9
]
},
{
"$concat": [
"0",
{
"$substr": [
"$month",
0,
2
]
}
]
},
{
"$substr": [
"$month",
0,
2
]
}
]
},
"-01"
]
}
},
"total": {
"$sum": 1
}
}
}
],
"options": {
"cursor": true
},
"db": "db",
"collection": "Collection"
}
So, with MongoDB 4.0, I get the right result, but MongoDB 3.4 throws the following : can't convert from BSON type missing to Date. I looked a bit at changelogs but I didn't find anything.

MongoDB - Aggregate muliple rows

Suppose the following aggregation query:
Machine.aggregate( [ { $match : { $and: [ {"idc": req.query.idc }, {"customer":req.query.customer} ] } } ,{"$group":{_id: {"cluster":"$cluster","idc":"$idc","type":"$type"},"SumCores":{"$sum":"$cores"},"SumMemory": { "$sum":"$memory" }}}, { $sort : { idc : -1, cluster: 1 } } ]);
Which returns:
[
{
"_id": {
"cluster": 1,
"idc": "LH5",
"type": "Virtual"
},
"SumCores": 112,
"SumMemory": 384
},
{
"_id": {
"cluster": 1,
"idc": "LH5",
"type": "Physical"
},
"SumCores": 192,
"SumMemory": 768
},
{
"_id": {
"cluster": 1,
"idc": "LH8",
"type": "Virtual"
},
"SumCores": 232,
"SumMemory": 469
},
{
"_id": {
"cluster": 1,
"idc": "LH8",
"type": "Physical"
},
"SumCores": 256,
"SumMemory": 1024
}
]
Is there a way to change the aggregation to retrieve this desired output:
[
{
"_id": {
"cluster": 1,
"idc": "LH5"
},
"Virtual": {
"SumCores": 112,
"SumMemory": 384
},
"Physical": {
"SumCores": 192,
"SumMemory": 768
}
},
{
"_id": {
"cluster": 1,
"idc": "LH8"
},
"Virtual": {
"SumCores": 232,
"SumMemory": 469
},
"Physical": {
"idc": "LH8",
"type": "Physical"
}
}
]
Assumptions:
There will always be a Physical and Virtual "pair" per IDC/Cluster
I am happy to receive solutions which:
a) Change the aggregation query
b) Receive the existing data and change it into this format by way of a library and/or an algorithm
You are doing all the right things in the query already as you need to $group at the level you have in order to get the correct sums. The only thing remaining is to bring it all together.
Personally I would stick with the "pair" in an array as the final output:
Machine.aggregate([
{ "$match": {
"idc": req.query.idc, "customer": req.query.customer}
} ,
{ "$group": {
"_id": {
"cluster": "$cluster",
"idc":"$idc",
"type": "$type"
},
"SumCores": { "$sum":"$cores" },
"SumMemory": { "$sum":"$memory" }
}},
{ "$group": {
"_id": {
"cluster": "$_id.cluster",
"idc": "$_id.idc"
},
"data": {
"$push": {
"type": "$_id.type",
"SumCores": "$SumCores",
"SumMemory": "$SumMemory"
}
}
}},
{ "$sort" : { "_id.idc": -1, "_id.cluster": 1 } }
]);
Which would give you:
{
"_id" : {
"cluster" : 1,
"idc" : "LH8"
},
"data" : [
{
"type" : "Virtual",
"SumCores" : 232,
"SumMemory" : 469
},
{
"type" : "Physical",
"SumCores" : 256,
"SumMemory" : 1024
}
]
}
{
"_id" : {
"cluster" : 1,
"idc" : "LH5"
},
"data" : [
{
"type" : "Virtual",
"SumCores" : 112,
"SumMemory" : 384
},
{
"type" : "Physical",
"SumCores" : 192,
"SumMemory" : 768
}
]
}
But if you really must, then you can filter out the matched elements from the array and put them in their own properties:
Machine.aggregate([
{ "$match": {
"idc": req.query.idc, "customer": req.query.customer}
} ,
{ "$group": {
"_id": {
"cluster": "$cluster",
"idc":"$idc",
"type": "$type"
},
"SumCores": { "$sum":"$cores" },
"SumMemory": { "$sum":"$memory" }
}},
{ "$group": {
"_id": {
"cluster": "$_id.cluster",
"idc": "$_id.idc"
},
"data": {
"$push": {
"type": "$_id.type",
"SumCores": "$SumCores",
"SumMemory": "$SumMemory"
}
}
}},
{ "$project": {
"Physical": {
"$setDifference": [
{ "$map": {
"input": "$data",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.type", "Physical" ] },
{
"SumCores": "$$el.SumCores",
"SumMemory": "$$el.SumMemory"
},
false
]
}
}},
[false]
]
},
"Virtual": {
"$setDifference": [
{ "$map": {
"input": "$data",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.type", "Virtual" ] },
{
"SumCores": "$$el.SumCores",
"SumMemory": "$$el.SumMemory"
},
false
]
}
}},
[false]
]
}
}},
{ "$unwind": "$Physical" },
{ "$unwind": "$Virtual"},
{ "$sort" : { "_id.idc": -1, "_id.cluster": 1 } }
]);
Which gives you your result:
{
"_id" : {
"cluster" : 1,
"idc" : "LH8"
},
"Physical" : {
"SumCores" : 256,
"SumMemory" : 1024
},
"Virtual" : {
"SumCores" : 232,
"SumMemory" : 469
}
}
{
"_id" : {
"cluster" : 1,
"idc" : "LH5"
},
"Physical" : {
"SumCores" : 192,
"SumMemory" : 768
},
"Virtual" : {
"SumCores" : 112,
"SumMemory" : 384
}
}
But the first is just going to give you the same essential data without needing an extra pass through the results.
At any rate it's really just one more $group to bring it all together and then the optional stages if you really must have that data format. But I would personally handle any accessing of the "pair" in the code that needs to deal with it.