MongoDB Complex Aggregation - Combined Sum & Count - mongodb

I have a DB in which each document has an array of many different objects, of which I'm interested in working with only 6 specific ones. 5 of which are integers and 1 is categorical (text).
In order to leave only the fields I need for the aggregation, I've used $unwind on the fields array - multiplying each document by the number of fields it has. After this I filtered the specific fields I want using a basic $match.
This is where I hit some trouble - I've managed to write two queries where each gives me half of the end result I need. But I'm unable to combine them together into one general query. Specifically, I have one query that gives me 5 integer fields, each is the $sum of each integer fields, and another query that uses the categorical field in order to $count the number of times each category appears.
The desired output would give me a single document that has 5 k:v fields (1 for each sum calculation), and an additional object that includes k:v fields (where each key is a category and the value is the number of times it appears. this must be its own object because the categories that appear may vary).
The sample data I've added has been striped of most of it's structure and includes only the crucial parts relevant for this query. This is in order to secure our clients privacy.
I've tried solving this from every angle I could think of - and would greatly appreciate any feedback!
The first query:
[{$match: {
fields: {
$elemMatch: {
field_id: 174196148,
'values.start': {
$gte: ISODate('2022-02-01T00:00:00.000Z'),
$lt: ISODate('2022-02-03T00:00:00.000Z')
}
}
}
}}, {$unwind: {
path: '$fields'
}}, {$match: {
$or: [
{
'fields.field_id': 226577699
},
{
'fields.field_id': 225330844
},
{
'fields.field_id': 158472699
},
{
'fields.field_id': 191195626
},
{
'fields.field_id': 219444876
}
]
}}, {$unwind: {
path: '$fields.values'
}}, {$addFields: {
'Specific - Field Value': {
$round: [
{
$toDecimal: '$fields.values.value'
}
]
}
}}, {$group: {
_id: '$fields.label',
SumCalculation: {
$sum: {
$toDecimal: '$Specific - Field Value'
}
}
}}, {$group: {
_id: null,
SumArray: {
$push: {
k: '$_id',
v: '$SumCalculation'
}
}
}}, {$project: {
_id: 0,
Final: {
$arrayToObject: '$SumArray'
}
}}]
The second query:
[{$match: {
fields: {
$elemMatch: {
field_id: 174196148,
'values.start': {
$gte: ISODate('2022-01-01T00:00:00.000Z'),
$lt: ISODate('2022-03-31T00:00:00.000Z')
}
}
}
}}, {$unwind: {
path: '$fields'
}}, {$match: {
'fields.field_id': 177278285
}}, {$unwind: {
path: '$fields.values'
}}, {$group: {
_id: '$fields.values.value.text',
ModelCount: {
$sum: 1
}
}}, {$group: {
_id: null,
Full: {
$push: {
k: '$_id',
v: '$ModelCount'
}
}
}}, {$project: {
_id: 0,
Final: {
$arrayToObject: '$Full'
}
}}]
The desired output:
{
"Final": {
"Business Model": [
{
"K": "Solar Lease",
"V": 3
},
{
"K": "Solar Purchase",
"V": 112
}
],
"System Size - Signed Contract": 73,
"Additional Payment for O&M": 2000,
"O&M Years Included (Paid)": 2,
"Total Price Including VAT": 396660,
"1st Milestone - Down Payment": 30280
}
}
Sample data:
{
"_id": 1946794344,
"fields": [
{
"type": "money",
"field_id": 226577699,
"label": "1st Milestone - Down Payment ",
"values": [
{
"currency": "ILS",
"value": "6120.0000"
}
],
"config": {
"settings": {
"allowed_currencies": [
"ILS"
]
},
"mapping": null,
"label": "1st Milestone - Down Payment "
},
"external_id": "1st-milestone-down-payment-2"
},
{
"type": "money",
"field_id": 225330844,
"label": "Additional Payment for O&M",
"values": [
{
"currency": "ILS",
"value": "0.0000"
}
],
"config": {
"settings": {
"allowed_currencies": [
"ILS"
]
},
"mapping": null,
"label": "Additional Payment for O&M"
},
"external_id": "additional-payment-for-om"
},
{
"type": "money",
"field_id": 158472699,
"label": "Total Price Including VAT",
"values": [
{
"currency": "ILS",
"value": "61270.0000"
}
],
"config": {
"settings": {
"allowed_currencies": [
"ILS"
]
},
"mapping": null,
"label": "Total Price Including VAT"
},
"external_id": "money"
},
{
"type": "number",
"field_id": 191195626,
"label": "System Size - Signed Contract",
"values": [
{
"value": "11.6600"
}
],
"config": {
"settings": {
"decimals": 2
},
"mapping": null,
"label": "System Size - Signed Contract"
},
"external_id": "hspq-hmrkt"
},
{
"type": "number",
"field_id": 219444876,
"label": "O&M Years Included (Paid)",
"values": [
{
"value": "0.0000"
}
],
"config": {
"settings": {
"decimals": 0
},
"mapping": null,
"label": "O&M Years Included (Paid)"
},
"external_id": "om-years-gifted-for-free"
},
{
"type": "category",
"field_id": 177278285,
"label": "Business Model",
"values": [
{
"value": {
"status": "active",
"text": "Solar Purchase",
"id": 6,
"color": "DCEBD8"
}
}
],
"external_id": "mvdl-sqy"
}
]
}

Related

Nested arrays $unwind and $group back together in mongoDB

We have three nested arrays:
principalCredits with 2 objects
credits with 2 objects each
awardNominations.edges with variable totals from 0 to 3
The task is to add a field to the third array of objects awardNominations.edges based on a lookup from eventsCollection.
Here's the data I have (simplified, can copy and paste into MongoDB Compass):
[{
"principalCredits": [
{
"category": {
"id": "director",
"text": "Directors"
},
"totalCredits": 2,
"credits": [
{
"name": {
"id": "nm11813828",
"nameText": {
"text": "Pippa Ehrlich"
},
"awardNominations": {
"total": 2,
"edges": [
{
"node": {
"id": "an1393007",
"isWinner": true,
"award": {
"id": "an1393007",
"year": 2020,
"text": "Green Warsaw Award",
"event": {
"id": "ev0003786",
"text": "Millennium Docs Against Gravity"
},
"category": {
"text": null
}
}
}
},
{
"node": {
"id": "an1428940",
"isWinner": false,
"award": {
"id": "an1428940",
"year": 2021,
"text": "IDA Award",
"event": {
"id": "ev0000351",
"text": "International Documentary Association"
},
"category": {
"text": "Best Writing"
}
}
}
},
]
}
},
"category": {
"id": "director",
"text": "Director"
}
},
{
"name": {
"id": "nm1624755",
"nameText": {
"text": "James Reed"
},
"awardNominations": {
"total": 3,
"edges": [
{
"node": {
"id": "an0694012",
"isWinner": true,
"award": {
"id": "an0694012",
"year": 2015,
"text": "Best of Festival",
"event": {
"id": "ev0001486",
"text": "Jackson Wild Media Awards"
},
"category": {
"text": "Best of Festival"
}
}
}
},
{
"node": {
"id": "an0975779",
"isWinner": true,
"award": {
"id": "an0975779",
"year": 2017,
"text": "RTS West Television Award",
"event": {
"id": "ev0000571",
"text": "Royal Television Society, UK"
},
"category": {
"text": "Documentary"
}
}
}
},
{
"node": {
"id": "an0975781",
"isWinner": true,
"award": {
"id": "an0975781",
"year": 2015,
"text": "Grand Teton Prize",
"event": {
"id": "ev0001356",
"text": "Jackson Hole Film Festival"
},
"category": {
"text": "Best in Festival"
}
}
}
}
]
}
},
"category": {
"id": "director",
"text": "Director"
}
}
]
},
{
"category": {
"id": "writer",
"text": "Writers"
},
"totalCredits": 2,
"credits": [
{
"name": {
"id": "nm11813828",
"nameText": {
"text": "Pippa Ehrlich"
},
"awardNominations": {
"total": 2,
"edges": [
{
"node": {
"id": "an1393007",
"isWinner": true,
"award": {
"id": "an1393007",
"year": 2020,
"text": "Green Warsaw Award",
"event": {
"id": "ev0003786",
"text": "Millennium Docs Against Gravity"
},
"category": {
"text": null
}
}
}
},
{
"node": {
"id": "an1428940",
"isWinner": false,
"award": {
"id": "an1428940",
"year": 2021,
"text": "IDA Award",
"event": {
"id": "ev0000351",
"text": "International Documentary Association"
},
"category": {
"text": "Best Writing"
}
}
}
}
]
}
},
"category": {
"id": "writer",
"text": "Writer"
},
},
{
"name": {
"id": "nm1624755",
"nameText": {
"text": "James Reed"
},
"awardNominations": {
"total": 0,
"edges": []
}
},
"category": {
"id": "writer",
"text": "Writer"
},
}
]
}
]
}]
An example scored award should look like this:
{
"id": "an0975781",
"isWinner": true,
"award": { ... },
"score": 1.5
}
Once all the manipulation is done, the data needs to be in exactly the same shape as it was initially and with no null values. So in the case of the last array awardsNominations.edges it should be [] as it was, and not { node: { score: null }} or anything else.
To achieve this I have created an aggregation pipeline:
[
{
'$unwind': {
'path': '$principalCredits',
'preserveNullAndEmptyArrays': true
}
}, {
'$unwind': {
'path': '$principalCredits.credits',
'preserveNullAndEmptyArrays': true
}
}, {
'$unwind': {
'path': '$principalCredits.credits.name.awardNominations.edges',
'preserveNullAndEmptyArrays': true
}
}, {
'$lookup': {
'from': 'eventsCollection',
'localField': 'principalCredits.credits.name.awardNominations.edges.node.award.event.id',
'foreignField': 'id',
'as': 'matchingEvent'
}
}, {
'$unwind': {
'path': '$matchingEvent',
'preserveNullAndEmptyArrays': true
}
}, {
'$addFields': {
'principalCredits.credits.name.awardNominations.edges.node.score': {
'$multiply': [
'$matchingEvent.importance', {
'$cond': {
'if': '$principalCredits.credits.name.awardNominations.edges.node.isWinner',
'then': 1.5,
'else': 1.2
}
}
]
}
}
}
]
The above pipeline assigns the score to each award. However, the null values are still there and I have absolutely no idea how to group it back together. I have tried to group with:
{
'$group': {
'_id': '$id',
'titleDoc': {
'$first': '$$ROOT'
},
'allPrincipalCredits': {
'$push': '$principalCredits'
}
}
}
To keep the root and then somehow sort all the records back into shape but could not get back to the orginal object structure.
Any help in putting it all together will be much appriciated!
I'm fairly good with simple aggregations, but this seems to be too much for me currently and would love to learn how to $group things back properly.
I've tried and put together all the knowledge I have so far from different sources and similar answers but can't seem to get it to work.
Lookup collection eventsCollection contains objects like this:
{
"_id": { "$oid": "62c57125d6943d92f83f6fff" },
"id": "ev0030197",
"text": "#AmLatino Film Festival",
"importance": 1
}
So the "rule" in restoring to original structure is that for each $unwind you did to "deconstruct" the document you now have to do a $group to restore it.
As you can imagine in such a pipeline this could be VERY cumbersome. but definitely doable.
However let me propose a different approach that is still very messy but much easier compared to the alternative, additionally it is more efficient from a performance perspective.
(just minor sidenot the reason your score is still null is because you have a syntax error in your $multiply function)
Anyways, The idea is to first gather all the unique event ids that exist in the in nested documents.
Then execute one lookup to fetch all the relevant events.
And finally adding the score field using $map and $mergeDocuments instead of $unwinding and $grouping, like so:
Mongo Playground
db.collection.aggregate([
{
$addFields: {
allEvents: {
$reduce: {
input: {
$map: {
input: "$principalCredits",
in: {
$map: {
input: "$$this.credits",
as: "credit",
in: {
$map: {
input: "$$credit.name.awardNominations.edges",
as: "edge",
in: "$$edge.node.award.event.id"
}
}
}
}
}
},
initialValue: [],
in: {
"$concatArrays": [
{
"$reduce": {
input: "$$this",
initialValue: [],
in: {
"$concatArrays": [
"$$this",
"$$value"
]
}
}
},
"$$value"
]
}
}
}
}
},
{
"$lookup": {
"from": "eventsCollection",
"localField": "allEvents",
"foreignField": "id",
"as": "matchingEvents"
}
},
{
$addFields: {
principalCredits: {
$map: {
input: "$principalCredits",
in: {
$mergeObjects: [
"$$this",
{
credits: {
$map: {
input: "$$this.credits",
as: "credit",
in: {
$mergeObjects: [
"$$credit",
{
name: {
"$mergeObjects": [
"$$credit.name",
{
"awardNominations": {
"$mergeObjects": [
"$$credit.name.awardNominations",
{
edges: {
$map: {
input: "$$credit.name.awardNominations.edges",
as: "edge",
in: {
node: {
$mergeObjects: [
"$$edge.node",
{
score: {
"$multiply": [
{
$cond: [
"$$edge.node.isWinner",
1.5,
1.2
]
},
{
$first: {
$map: {
input: {
$filter: {
input: "$matchingEvents",
as: "matchedEvent",
cond: {
$eq: [
"$$matchedEvent.id",
"$$edge.node.award.event.id"
]
}
}
},
as: "matched",
in: "$$matched.importance"
}
}
}
]
}
}
]
}
}
}
}
}
]
}
}
]
}
}
]
}
}
}
}
]
}
}
}
}
},
{
$unset: [
"allEvents",
"matchingEvents"
]
}
])
Mongo Playground
I will just mention that you can make this much much much cleaner by involving some code while keeping the same approach suggested. first getting unique eventid with distinct. then fetching the matching importance for each event. Finally execute a single query using arrayFilters you can construct with this information.
Final side not is that the provided pipeline did not deal with null or missing values. So if an array is missing an error will be thrown as $map expects input to be a valid array.
This can easily be solved by just wrapping each of these expressions with $ifNull, like so:
{
$map: {
input: {$ifNull: ["$$this.credits",[]]}
}
}
This will also replace null values with an empty []
The deep buried keys (...award.event.id) in arrays confounds an easy approach without 1) messing up the structure as the OP has noted 2) incurring potentially very expensive multiple $unwind calls.
Recommendation: Two pass approach. Get the necessary importance values for the principalCredits objects in question, then go back and manually iterate over the collection, diving into the structure and applying the logic score = importance * isWinner? 1.2 : 1.5
PASS 1: Get the ev data
c=db.foo.aggregate([
{$project: {
XX: {$reduce: {
// Rapidly get to things we need to lookup:
input: '$principalCredits.credits.name.awardNominations.edges.node.award.event.id',
// We end up with a mess incl. empty arrays...
// [ [[ev1,ev2], [ev3,ev4]], [], [[ev1,...], [] ... ] ]
// Need to collapse all those arrays of arrays of arrays into
// a single list of ev values, hence a reduce within a reduce:
initialValue: [],
in: {$concatArrays: [
'$$value',
{$reduce: {
input: '$$this',
initialValue: [],
in: {$concatArrays: [ '$$value', '$$this' ] }
}} ]}
}}
}}
// XX is now [ ev1,ev2,ev3,ev4,ev1 ... ]
// The empty arrays are ignored. Don't worry about dupes.
,{$lookup: {
from: "Xev",
let: { evids: "$XX" },
pipeline: [
{$match: {$expr: {$in: ["$id","$$evids"]} } }
],
as: 'XX' // overwrite XX...
}}
]);
evdict = {}
c.forEach(function(d) {
d['XX'].forEach(function(ww) {
evdict[ww['id']] = ww;
});
});
{
"ev0003786" : {
"_id" : ObjectId("62cd7f8138d0fbc0eacfb17f"),
"id" : "ev0003786",
"text" : "Millennium Docs Against Gravity",
"importance" : 1
},
"ev0000351" : {
"_id" : ObjectId("62cd7f8138d0fbc0eacfb180"),
"id" : "ev0000351",
"text" : "International Documentary Association",
"importance" : 2
},
"ev0000571" : {
"_id" : ObjectId("62cd7f8138d0fbc0eacfb181"),
"id" : "ev0000571",
"text" : "Royal Television Society, UK",
"importance" : 3
}
}
PASS 2: Iterate main collection
Left as exercise to reader.
Note that if
The number of events is small.
There is no need or value in performing $match on the initial principalCredits collection (i.e. before the fancy $project/$reduce) to significantly reduce the lookup set into events
then this whole thing is unnecessary. Simply slurp all events into evdict with a quick find and proceed to pass 2.
There is potentially a very cool solution that can do this in one pass
UPDATED
See Tom's answer below.
Note to MongoDB 5.0 users: The new $getField function allows you to pluck out fields by name instead of having to use the standard trick of using dot notation in the $in clause to access the field. This might be clearer to some:
{$getField: {
"field": "importance",
"input": {
$first: {
$filter: {
input: "$matchingEvents",
as: "matchedEvent",
cond: {
$eq: [
"$$matchedEvent.id",
"$$edge.node.award.event.id"
]
}
}
}
}
}
}

How to transform MongoDB document to tree structure by Aggregation? [duplicate]

I'm working in a REST api with ExpressJS and Mongo and I have a collection with N quantity of levels.
So to solve this problem I'm using an recursive table (or collection) in mongo where a field is the id and every register has a parent_id which is at the same level as it's childs.
To explain better this, here is an E-R representation
So as you se, mongo will save the data like this json (accounts level 0 has null parent)
[
{ "id": "45TYYU", "parent_id": null, "name":"account 1", "type": 1, "category": 1 },
{ "id": "45TYYXT", "parent_id": "45TYYU", "name":"account 2", "type": 1, "category": 1 },
{ "id": "45TYYPZ", "parent_id": "45TYYU", "name":"account 3", "type": 1, "category": 1 },
{ "id": "45TYYPZRE", "parent_id": "45TYYPZ", "name":"account 4", "type": 1, "category": 1 },
{ "id": "45TYYPZSX", "parent_id": "45TYYPZ", "name":"account 5", "type": 1, "category": 1 },
{ "id": "45TYYPZGP", "parent_id": "45TYYXT", "name":"account 6", "type": 1, "category": 1 }
]
account 2 and account 3 are children of account 1, while account 4 and account 5 are children of account tree and account 6 is child of account 2 ... but every register is at the same logical level only identifying through parent_id.
so I need to transform this data into a GET method to restructure it like this:
[
{
"id": "45TYYU",
"parent_id": null,
"name":"account 1",
"type": 1,
"category": 1,
"children": [
{
"id": "45TYYXT",
"parent_id": "45TYYU",
"name":"account 2",
"type": 1,
"category": 1,
"children": [
{ "id": "45TYYPZGP", "parent_id": "45TYYXT", "name":"account 6", "type": 1, "category": 1 }
]
},
{
"id": "45TYYPZ",
"parent_id": "45TYYU",
"name":"account 3",
"type": 1,
"category": 1,
"children": [
{ "id": "45TYYPZRE", "parent_id": "45TYYPZ", "name":"account 4", "type": 1, "category": 1 },
{ "id": "45TYYPZSX", "parent_id": "45TYYPZ", "name":"account 5", "type": 1, "category": 1 }
]
}
]
},
{
"id": "45TFJK",
"parent_id": null,
"name":"account 7",
"type": 1,
"category": 1,
"children": [
{
"id": "47HJJT",
"parent_id": "45TFJK",
"name":"account 8",
"type": 1,
"category": 1
},
{
"id": "47YHJU",
"parent_id": "45TFJK",
"name":"account 8",
"type": 1,
"category": 1
}
]
}
]
Yes... the parents level 0 has null parent_id and I want to put it's children inside an array called "children" and then send like this in the GET response to my UI
What is the best way to do this in expressJS?
Is there a library or component out there that allows me to do this?
Thank you
You can use $graphLookup and other useful array operators,
$match filter that records only have parent_id is null
$graphLookup to get child records and depth number in depthField level
$unwind deconstruct children array and allow to not remove empty children
$sort by depth level field level in descending order
$group by id field and reconstruct children array
db.collection.aggregate([
{ $match: { parent_id: null } },
{
$graphLookup: {
from: "collection",
startWith: "$id",
connectFromField: "id",
connectToField: "parent_id",
depthField: "level",
as: "children"
}
},
{
$unwind: {
path: "$children",
preserveNullAndEmptyArrays: true
}
},
{ $sort: { "children.level": -1 } },
{
$group: {
_id: "$id",
parent_id: { $first: "$parent_id" },
name: { $first: "$name" },
type: { $first: "$type" },
category: { $first: 1 },
children: { $push: "$children" }
}
},
$addFields now find the nested level children and allocate to its level,
$reduce to iterate loop of children array.
initialize default field level default value is -1, presentChild is [], prevChild is [] for the conditions purpose
$let to initialize fields:
prev as per condition if both level are equal then return prevChild otherwise return presentChild
current as per condition if both level are equal then return presentChild otherwise []
in to return level field and prevChild field from initialized fields
presentChild $filter children from prev array and return, merge current objects with children array using $mergeObjects and concat with current array of let using $concatArrays
$addFields to return only presentChild array because we only required that processed array
{
$addFields: {
children: {
$reduce: {
input: "$children",
initialValue: { level: -1, presentChild: [], prevChild: [] },
in: {
$let: {
vars: {
prev: {
$cond: [
{ $eq: ["$$value.level", "$$this.level"] },
"$$value.prevChild",
"$$value.presentChild"
]
},
current: {
$cond: [{ $eq: ["$$value.level", "$$this.level"] }, "$$value.presentChild", []]
}
},
in: {
level: "$$this.level",
prevChild: "$$prev",
presentChild: {
$concatArrays: [
"$$current",
[
{
$mergeObjects: [
"$$this",
{
children: {
$filter: {
input: "$$prev",
as: "e",
cond: { $eq: ["$$e.parent_id", "$$this.id"] }
}
}
}
]
}
]
]
}
}
}
}
}
}
}
},
{
$addFields: {
id: "$_id",
children: "$children.presentChild"
}
}
])
Playground
#turivishal Im using same schema in backend nodejs im getting only show the
null object not for a parent child relation using same aggregration
this.tickets.aggregate([
{
$match: {
parent_id: null
}
},
{
$graphLookup: {
from: "collection",
startWith: "$id",
connectFromField: "id",
connectToField: "parent_id",
depthField: "level",
as: "children"
}
},
{
$unwind: {
path: "$children",
preserveNullAndEmptyArrays: true
}
},
{
$sort: {
"children.level": -1
}
},
{
$group: {
_id: "$id",
parent_id: {
$first: "$parent_id"
},
name: {
$first: "$name"
},
type: {
$first: "$type"
},
category: {
$first: 1
},
children: {
$push: "$children"
}
}
},
{
$addFields: {
children: {
$reduce: {
input: "$children",
initialValue: {
level: -1,
presentChild: [],
prevChild: []
},
in: {
$let: {
vars: {
prev: {
$cond: [
{
$eq: [
"$$value.level",
"$$this.level"
]
},
"$$value.prevChild",
"$$value.presentChild"
]
},
current: {
$cond: [
{
$eq: [
"$$value.level",
"$$this.level"
]
},
"$$value.presentChild",
[]
]
}
},
in: {
level: "$$this.level",
prevChild: "$$prev",
presentChild: {
$concatArrays: [
"$$current",
[
{
$mergeObjects: [
"$$this",
{
children: {
$filter: {
input: "$$prev",
as: "e",
cond: {
$eq: [
"$$e.parent_id",
"$$this.id"
]
}
}
}
}
]
}
]
]
}
}
}
}
}
}
}
},
{
$addFields: {
children: "$children.presentChild"
}
}
]).then((result) => {
console.log('test',result);
// callback(result);
}).catch((error) => {
callback(error);
});
output:
[
{
_id: '45TYYU',
parent_id: null,
name: 'account 1',
type: 1,
category: 1,
children: []
},
{
_id: '45TYYUA',
parent_id: null,
name: 'account 1',
type: 1,
category: 1,
children: []
}
]

Query maximum N records of each group base on a condition in MongoDB?

I have a question regarding querying data in MongoDB. Here is my sample data:
{
"_id": 1,
"category": "fruit",
"userId": 1,
"name": "Banana"
},
{
"_id": 2,
"category": "fruit",
"userId": 2,
"name": "Apple"
},
{
"_id": 3,
"category": "fresh-food",
"userId": 1,
"name": "Fish"
},
{
"_id": 4,
"category": "fresh-food",
"userId": 2,
"name": "Shrimp"
},
{
"_id": 5,
"category": "vegetable",
"userId": 1,
"name": "Salad"
},
{
"_id": 6,
"category": "vegetable",
"userId": 2,
"name": "carrot"
}
The requirements:
If the category is fruit, returns all the records match
If the category is NOT fruit, returns maximum 10 records of each category grouped by user
The category is known and stable, so we can hard-coded in our query.
I want to get it done in a single query. So the result expected should be:
{
"fruit": [
... // All records of
],
"fresh-food": [
{
"userId": 1,
"data": [
// Top 10 records of user 1 with category = "fresh-food"
]
},
{
"userId": 2,
"data": [
// Top 10 records of user 2 with category = "fresh-food"
]
},
...
],
"vegetable": [
{
"userId": 1,
"data": [
// Top 10 records of user 1 with category = "vegetable"
]
},
{
"userId": 2,
"data": [
// Top 10 records of user 2 with category = "vegetable"
]
},
]
}
I've found the guideline to group by each group using $group and $slice, but I can't apply the requirement number #1.
Any help would be appreciated.
You need to use aggregation for this
$facet to categorize incoming data, we categorized into two. 1. Fruit and 2. non_fruit
$match to match the condition
$group first group to group the data based on category and user. Second group to group by its category only
$objectToArray to make the object into key value pair
$replaceRoot to make the non_fruit to root with fruit
Here is the code
db.collection.aggregate([
{
"$facet": {
"fruit": [
{ $match: { "category": "fruit" } }
],
"non_fruit": [
{
$match: {
$expr: {
$ne: [ "$category", "fruit" ]
}
}
},
{
$group: {
_id: { c: "$category", u: "$userId" },
data: { $push: "$$ROOT" }
}
},
{
$group: {
_id: "$_id.c",
v: {
$push: {
uerId: "$_id.u",
data: { "$slice": [ "$data", 3 ] }
}
}
}
},
{ $addFields: { "k": "$_id", _id: "$$REMOVE" } }
]
}
},
{ $addFields: { non_fruit: { "$arrayToObject": "$non_fruit" } }},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [ "$$ROOT", "$non_fruit" ]
}
}
},
{ $project: { non_fruit: 0 } }
])
Working Mongo playground

Sort records by array field values in MongoDb

I have a collection which has documents like;
{
"name": "Subject1",
"attributes": [{
"_id": "security_level1",
"level": {
"value": "100",
"valueKey": "ABC"
}
}, {
"_id": "security_score1",
"level": {
"value": "1000",
"valueKey": "CDE"
}
}
]
},
{
"name": "Subject2",
"attributes": [{
"_id": "security_level1",
"level": {
"value": "99",
"valueKey": "XYZ"
}
}, {
"_id": "security_score1",
"level": {
"value": "2000",
"valueKey": "EDF"
}
}
]
},
......
Each document will have so many attributes generated dynamically, can be different in size.
Is it possible to sort records based on level.value of security_level1? (security_level1 is _id field value)
As per above example, the second document ("name": "Subject2") should come first as the value ('level.value') of _id:security_level1 is 99, which is less than of Subject1's security_level1 value (100) - (Ascending order)
Use $filter and $arrayElemAt to get security_level1 item. Then you can use $toInt to convert that value to an integer so that $sort can be applied:
db.collection.aggregate([
{
$addFields: {
level: {
$let: {
vars: {
level_1: { $arrayElemAt: [ { $filter: { input: "$attributes", cond: { $eq: [ "$$this._id", "security_level1" ] } } } ,0] }
},
in: {
$toInt: "$$level_1.level.value"
}
}
}
}
},
{
$sort: {
level: 1
}
}
])
Mongo Playground

MongoDb aggregation with arrays inside an array possible

I am struggling to find some examples of using the mongo aggregation framework to process documents which has an array of items where each item also has an array of other obejects (array containing an array)
In the example document below what I would really like is an example that sums the itemValue in the results array of all cases in the document and accross the collection where the result.decision was 'accepted'and group by the document locationCode
However, even an example that found all documents where the result.decision was 'accepted' to show or that summmed the itemValue for the same would help
Many thanks
{
"_id": "333212",
"data": {
"locationCode": "UK-555-5566",
"mode": "retail",
"caseHandler": "A N Other",
"cases": [{
"caseId": "CSE525666",
"items": [{
"id": "333212-CSE525666-1",
"type": "hardware",
"subType": "print cartridge",
"targetDate": "2020-06-15",
"itemDetail": {
"description": "acme print cartridge",
"quantity": 2,
"weight": "1.5"
},
"result": {
"decision": "rejected",
"decisionDate": "2019-02-02"
},
"isPriority": true
},
{
"id": "333212-CSE525666-2",
"type": "Stationery",
"subType": "other",
"targetDate": "2020-06-15",
"itemDetail": {
"description": "staples box",
"quantity": 3,
"weight": "1.66"
},
"result": {
"decision": "accepted",
"decisionDate": "2020-03-03",
"itemValue": "23.01"
},
"isPriority": true
}
]
},
{
"caseId": "CSE885655",
"items": [{
"id": "333212-CSE885655-1",
"type": "marine goods",
"subType": "fish food",
"targetDate": "2020-06-04",
"itemDetail": {
"description": "fish bait",
"quantity": 5,
"weight": "0.65"
},
"result": {
"decision": "accepted",
"decisionDate": "2020-03-02"
},
"isPriority": false
},
{
"id": "333212-CSE885655-4",
"type": "tobacco products",
"subType": "cigarettes",
"deadlineDate": "2020-06-15",
"itemDetail": {
"description": "rolling tobbaco",
"quantity": 42,
"weight": "2.25"
},
"result": {
"decision": "accepted",
"decisionDate": "2020-02-02",
"itemValue": "48.15"
},
"isPriority": true
}
]
}
]
},
"state": "open"
}
You're probably looking for $unwind. It takes an array within a document and creates a separate document for each array member.
{ foos: [1, 2] } -> { foos: 1 }, { foos: 2}
With that you can create a flat document structure and match & group as normal.
db.collection.aggregate([
{
$unwind: "$data.cases"
},
{
$unwind: "$data.cases.items"
},
{
$match: {
"data.cases.items.result.decision": "accepted"
}
},
{
$group: {
_id: "$data.locationCode",
value: {
$sum: {
$toDecimal: "$data.cases.items.result.itemValue"
}
}
}
},
{
$project: {
_id: 0,
locationCode: "$_id",
value: "$value"
}
}
])
https://mongoplayground.net/p/Xr2WfFyPZS3
Alternative solution...
We group by data.locationCode and sum all items with this condition:
cases[*].items[*].result.decision" == "accepted"
db.collection.aggregate([
{
$group: {
_id: "$data.locationCode",
itemValue: {
$sum: {
$reduce: {
input: "$data.cases",
initialValue: 0,
in: {
$sum: {
$concatArrays: [
[ "$$value" ],
{
$map: {
input: {
$filter: {
input: "$$this.items",
as: "f",
cond: {
$eq: [ "$$f.result.decision", "accepted" ]
}
}
},
as: "item",
in: {
$toDouble: {
$ifNull: [ "$$item.result.itemValue", 0 ]
}
}
}
}
]
}
}
}
}
}
}
}
])
MongoPlayground