Mongodb Aggregation: how to model a collection to return a tree structure? - mongodb

https://www.mongodb.com/docs/manual/tutorial/model-tree-structures-with-parent-references/ describes 5 ways to model tree-like structures in MongoDB, and recommends to use $graphlookup to traverse the tree.
All examples of $graphlookup return a flat array of the referenced documents tho, and it's not quite clear how should I use it to return the tree structure of subdocuments within subdocuments within subdocuments etc.
I am flexible with data structure as long as I can get each document by _id regardless of how far from the root it is, and retrieve the whole tree of dereferenced/embedded documents:
[
{
"_id": "a",
"label": "Cat",
"children": [
{
"_id": "b",
"label": "Big cat",
"children": [
{
"_id": "c",
"label": "Lion"
},
{
"_id": "d",
"label": "Jaguar"
},
{
"_id": "e",
"label": "Tiger"
}
]
},
{
"_id": "f",
"label": "Small cat",
"children": [
{
"_id": "g",
"label": "Bay cat"
},
{
"_id": "h",
"label": "Desert lynx"
},
{
"_id": "i",
"label": "Leopardus",
"children": [
{
"_id": "j",
"label": "Guina"
},
{
"_id": "k",
"label": "Tigrillo"
},
{
"_id": "l",
"label": "Ocelot"
}
]
},
{
"_id": "m",
"label": "Lynx"
},
{
"_id": "n",
"label": "Felis",
"children": [
{
"_id": "o",
"label": "Jungle cat"
},
{
"_id": "p",
"label": "Sand cat"
},
{
"_id": "q",
"label": "Wildcat",
"children": [
{
"_id": "r",
"label": "African wildcat"
},
{
"_id": "s",
"label": "European wildcat"
},
{
"_id": "t",
"label": "Domestic cat"
}
]
}
]
}
]
}
]
}
]
How should I store all these animals, and what pipeline operators can recursively populate the nested structure?
I have created the flat list of these cats in https://mongoplayground.net/p/7D8SgIh-85W to play with.

It's possible to create a flat collection and still be able to reconstruct the tree by modeling each document as a graph node with a connection list. Something like:
db={
"catTree": [
{
"_id": "a",
"label": "Cat",
"children": ["b", "f"],
"parent": null
},
{
"_id": "b",
"label": "Panthera",
"children": ["c", "d", "e"],
"parent": "a"
},
{
"_id": "c",
"label": "Lion",
"children": [],
"parent": "b"
},
{
"_id": "d",
"label": "Jaguar",
"children": [],
"parent": "b"
},
{
"_id": "e",
"label": "Tiger",
"children": [],
"parent": "b"
},
{
"_id": "f",
"label": "Small cats",
"children": ["g", "h", "i", "m", "n"],
"parent": "a"
},
{
"_id": "g",
"label": "Bay cat",
"children": [],
"parent": "f"
},
{
"_id": "h",
"label": "Desert lynx",
"children": [],
"parent": "f"
},
{
"_id": "i",
"label": "Leopardus",
"children": ["j", "k", "l"],
"parent": "f"
},
{
"_id": "j",
"label": "Guina",
"children": [],
"parent": "i"
},
{
"_id": "k",
"label": "Tigrillo",
"children": [],
"parent": "i"
},
{
"_id": "l",
"label": "Ocelot",
"children": [],
"parent": "i"
},
{
"_id": "m",
"label": "Lynx",
"children": [],
"parent": "f"
},
{
"_id": "n",
"label": "Felis",
"children": ["o", "p", "q"],
"parent": "f"
},
{
"_id": "o",
"label": "Jungle cat",
"children": [],
"parent": "n"
},
{
"_id": "p",
"label": "Sand cat",
"children": [],
"parent": "n"
},
{
"_id": "q",
"label": "Wildcat",
"children": ["r", "s", "t"],
"parent": "n"
},
{
"_id": "r",
"label": "African wildcat",
"children": [],
"parent": "q"
},
{
"_id": "s",
"label": "European wildcat",
"children": [],
"parent": "q"
},
{
"_id": "t",
"label": "Domestic cat",
"children": [],
"parent": "q"
}
]
}
Using "$graphLookup", a "root" document can be specified and all "children" will be returned, to any depth. Adapting list_to_tree from another javascript answer, the tree can be reconstructed.
db.catTree.aggregate([
{
"$match": {
"_id": "a"
}
},
{
"$graphLookup": {
"from": "catTree",
"startWith": "$children",
"connectFromField": "children",
"connectToField": "_id",
"as": "theKids",
"depthField": "level"
}
},
{
"$set": {
"children": {
"$function": {
"body": "function(root, list) {let map = {}, node, roots = [], i;for (i = 0; i < list.length; i += 1) {map[list[i]._id] = i;list[i].children = [];node = list[i];if (node.parent !== root) {list[map[node.parent]].children.push(node);} else {roots.push(node);}}return roots}",
"args": [
"$_id",
{
"$sortArray": {
"input": "$theKids",
"sortBy": {"level": 1}
}
}
],
"lang": "js"
}
}
}
},
{"$unset": "theKids"}
])
Example output:
[
{
"_id": "a",
"children": [
{
"_id": "b",
"children": [
{
"_id": "c",
"children": [],
"label": "Lion",
"level": NumberLong(1),
"parent": "b"
},
{
"_id": "d",
"children": [],
"label": "Jaguar",
"level": NumberLong(1),
"parent": "b"
},
{
"_id": "e",
"children": [],
"label": "Tiger",
"level": NumberLong(1),
"parent": "b"
}
],
"label": "Panthera",
"level": NumberLong(0),
"parent": "a"
},
{
"_id": "f",
"children": [
{
"_id": "h",
"children": [],
"label": "Desert lynx",
"level": NumberLong(1),
"parent": "f"
},
{
"_id": "g",
"children": [],
"label": "Bay cat",
"level": NumberLong(1),
"parent": "f"
},
{
"_id": "m",
"children": [],
"label": "Lynx",
"level": NumberLong(1),
"parent": "f"
},
{
"_id": "n",
"children": [
{
"_id": "q",
"children": [
{
"_id": "t",
"children": [],
"label": "Domestic cat",
"level": NumberLong(3),
"parent": "q"
},
{
"_id": "r",
"children": [],
"label": "African wildcat",
"level": NumberLong(3),
"parent": "q"
},
{
"_id": "s",
"children": [],
"label": "European wildcat",
"level": NumberLong(3),
"parent": "q"
}
],
"label": "Wildcat",
"level": NumberLong(2),
"parent": "n"
},
{
"_id": "p",
"children": [],
"label": "Sand cat",
"level": NumberLong(2),
"parent": "n"
},
{
"_id": "o",
"children": [],
"label": "Jungle cat",
"level": NumberLong(2),
"parent": "n"
}
],
"label": "Felis",
"level": NumberLong(1),
"parent": "f"
},
{
"_id": "i",
"children": [
{
"_id": "k",
"children": [],
"label": "Tigrillo",
"level": NumberLong(2),
"parent": "i"
},
{
"_id": "j",
"children": [],
"label": "Guina",
"level": NumberLong(2),
"parent": "i"
},
{
"_id": "l",
"children": [],
"label": "Ocelot",
"level": NumberLong(2),
"parent": "i"
}
],
"label": "Leopardus",
"level": NumberLong(1),
"parent": "f"
}
],
"label": "Small cats",
"level": NumberLong(0),
"parent": "a"
}
],
"label": "Cat",
"parent": null
}
]
Try it on mongoplayground.net.

Related

MongoDB distinct returns empty

I can't get collection.distinct to work for me on db.version '4.4.3'. I'm following the example at https://docs.mongodb.com/manual/reference/command/distinct/
{ "_id": 1, "dept": "A", "item": { "sku": "111", "color": "red" }, "sizes": [ "S", "M" ] }
{ "_id": 2, "dept": "A", "item": { "sku": "111", "color": "blue" }, "sizes": [ "M", "L" ] }
{ "_id": 3, "dept": "B", "item": { "sku": "222", "color": "blue" }, "sizes": "S" }
{ "_id": 4, "dept": "A", "item": { "sku": "333", "color": "black" }, "sizes": [ "S" ] }
Then run
db.runCommand ( { distinct: "inventory", key: "dept" } )
I get
{ values: [], ok: 1 }
This doesn't work either:
db.inventory.distinct( "dept" )
[]
I can't get it to work on a local server or a remote server. What gives?

mongoDB read performance difference between one to many models and normalized models

here are 2 possibilities for a note taking database that will have multiple notes for multiple users to keep track of
1.
[
{
"_id": "abcd",
"userInfo": {
"userID": "1",
"notes": [
{
"noteID": "1",
"text": "123"
},
{
"noteID": "2",
"text": "456"
},
{
"noteID": "3",
"text": "789"
}
]
}
},
{
"_id": "efgh",
"userInfo": {
"userID": "2",
"notes": [
{
"noteID": "1",
"text": "123"
},
{
"noteID": "2",
"text": "456"
}
]
}
}
]
And the 2nd option:
[
{
"_id": "abcd",
"userID": "1",
"noteID": "1",
"text": "123"
},
{
"_id": "efgh",
"userID": "1",
"noteID": "2",
"text": "456"
},
{
"_id": "ijkl",
"userID": "1",
"noteID": "3",
"text": "789"
},
{
"_id": "mnop",
"userID": "2",
"noteID": "1",
"text": "123"
},
{
"_id": "wxyz",
"userID": "2",
"noteID": "2",
"text": "123"
}
]
I'd expect 1 to have a much better performance when it comes to loading notes for a single user(if the user has a ton of notes). However, 2nd option is much better when modifying and adding individual notes.

Retrieve only matched object from nested array in mongodb

In this json , I need a find query which finds all the field where the "status":"Y", if the parent field has "status":"N", ignore the child field , else find the child field where the "status":"Y" along with its parent field
Note: The sub field is in a array
[
{
"type": "Type 1",
"status": "Y",
"code": "1",
"category": [
{
"type": "Cat 1",
"status": "Y",
"code": "1000",
"subcategories": [
{
"type": "Sub 1",
"status": "N",
"code": "1001"
},
{
"type": "Sub 2",
"status": "N",
"code": "1002"
},
{
"type": "Sub 3",
"status": "Y",
"code": "1003"
}
]
},
{
"type": "Cat 2",
"status": "N",
"code": "2000",
"subcategories": [
{
"type": "Sub 4",
"status": "Y",
"code": "2001"
},
{
"type": "Sub 5",
"status": "Y",
"code": "2002"
}
]
}
]
}
]
My Output Should be like this
[
{
"type": "Type 1",
"status": "Y",
"code": "1",
"category": [
{
"type": "Cat 1",
"status": "Y",
"code": "1000",
"subcategories": [
{
"type": "Sub 3",
"status": "Y",
"code": "1003"
}
]
} ]
}
]
Thanks in Advance:)
You can try below aggregation
db.collection.aggregate([
{ "$match": { "status": "Y" }},
{ "$unwind": "$category" },
{ "$match": { "category.status": "Y" } },
{ "$project": { "type": 1, "status": 1, "code": 1,
"category.type": "$category.type",
"category.status": "$category.status",
"category.code": "$category.code",
"category.subcategories": {
"$filter": {
"input": "$category.subcategories",
"as": "subcategory",
"cond": {
"$eq": [
"$$subcategory.status",
"Y"
]
}
}
}
}},
{ "$group": {
"_id": "$_id",
"type": { "$first": "$type" },
"status": { "$first": "$status" },
"code": { "$first": "$code" },
"category": { "$push": "$category" }
}}
]).then((data) => {
res.send(data)
})
Gives you following output (check here)
[
{
"_id": ObjectId("5a934e000102030405000000"),
"category": [
{
"code": "1000",
"status": "Y",
"subcategories": [
{
"code": "1003",
"status": "Y",
"type": "Sub 3"
}
],
"type": "Cat 1"
}
],
"code": "1",
"status": "Y",
"type": "Type 1"
}
]

How to check $exists in one to many relationship mongodb sails js?

Is there any way to find records that collection is empty?
For example please find below array. I want only that records with index "companydata" is empty. and also how can i get data that does not have empty "companydata" data.
Thanks in advance.
[
{
"company_id": {
"company_name": "C2",
"slug": "c2",
"is_organized": 1,
"status": "1",
"id": "5adf158f547f7f0314ca8b56",
"companydata": []
},
"user_id": "5ab889aee74a151b50d04ec1",
"status": "0",
"id": "5ae014e7432e85298081be0b"
},
{
"company_id": {
"company_name": "My Compnay",
"slug": "my-compnay",
"is_organized": 1,
"status": "1",
"id": "5ad442d98a0e0c1358ca93df",
"companydata": [
{
"name": "Bhavesh Amin",
"company_id": "5ad442d98a0e0c1358ca93df",
"status": "0",
"id": "5ad442da8a0e0c1358ca93e0"
}
]
},
"user_id": "5ab889aee74a151b50d04ec1",
"status": "0",
"id": "5ae01388432e85298081bdf8"
},
{
"company_id": {
"company_name": "Organization Name",
"slug": "organization-name",
"is_organized": 1,
"status": "1",
"id": "5ad08f9b938d1131eceea624",
"companydata": [
{
"name": "Helen H. Langley",
"company_id": "5ad08f9b938d1131eceea624",
"status": "1",
"id": "5ad08f9b938d1131eceea625"
}
]
},
"user_id": "5ab889aee74a151b50d04ec1",
"status": "0",
"id": "5ad42a5f52851a2b1449db2d"
},
]

Nested grouping of array

There are 3 master collection of category , subcategory and criteria each, i will be building framework with any possible combination of category , subcategory and criteria which will be stored as below-
framework document is added below having list of criteriaconfig as embedded object which further have single object of category , subcategory and criteria. you can refer criteriaconfig as link table that u call in mysql.
[
{
"id": "592bc3059f3ad715002b2331",
"name": "Framework1",
"description": "framework 1 for testing",
"criteriaConfigs": [
{
"id": "592bc3059f3ad715002b232f",
"category": {
"id": "591c2f5faa187956b2d0fb39",
"name": "category1",
"description": "category1",
"deleted": false,
"createdDate": 1495019359558
},
"subCategory": {
"id": "591c2f5faa187956b2d0fb83",
"name": "subCat1",
"description": "subCat1"
},
"criteria": {
"id": "591c2f5faa187956b2d0fbad",
"name": "criteria1",
"measure": "Action"
}
},
{
"id": "592bc3059f3ad715002b232e",
"category": {
"id": "591c2f5faa187956b2d0fb37",
"name": "Process",
"description": "Enagagement"
},
"subCategory": {
"id": "591c2f5faa187956b2d0fb81",
"name": "COMM / BRANDING",
"description": "COMM / BRANDING"
},
"criteria": {
"id": "591c2f5faa187956b2d0fba9",
"name": "Company representative forgets about customer on hold",
"measure": ""
}
} ]
},
{
"id": "592bc3059f3ad715002b2332",
"name": "Framework2",
"description": "framework 2 for testing",
"criteriaConfigs": [
{
"id": "592bc3059f3ad715002b232f",
"category": {
"id": "591c2f5faa187956b2d0fb39",
"name": "category1",
"description": "category1"
},
"subCategory": {
"id": "591c2f5faa187956b2d0fb83",
"name": "subCat1",
"description": "subCat1"
},
"criteria": {
"id": "591c2f5faa187956b2d0fbad",
"name": "criteria1",
"measure": "Action"
}
}
]
}
]
i need a view containing framework that will contain all list of category and inside category there will be list of added subcategory and inside subcategory will have list of criteria for single framework.
expected result -
[
{
"id": "f1",
"name": "Framework1",
"description": "framework 1 for testing",
"categories": [
{
"id": "c2",
"name": "category2",
"description": "category2",
"subCategories": [
{
"id": "sb1",
"name": "subCat1",
"description": "subCat1",
"criterias": [
{
"id": "cr1",
"name": "criteria1",
"measure": "Action"
},
{
"id": "cr2",
"name": "criteria2",
"measure": "Action"
},
{
"id": "cr3",
"name": "criteria3",
"measure": "Action"
}]
},
{
"id": "sb2",
"name": "subCat2",
"description": "subCat2",
"criterias": [
{
"id": "cr1",
"name": "criteria1",
"measure": "Action"
},
{
"id": "cr4",
"name": "criteria4",
"measure": "Action"
}]
}]
},
{
"id": "c1",
"name": "category1",
"description": "category1",
"subCategories": [
{
"id": "sb3",
"name": "subCat3",
"description": "subCat3",
"criterias": [
{
"id": "cr1",
"name": "criteria1",
"measure": "Action"
},
{
"id": "cr2",
"name": "criteria2",
"measure": "Action"
}
]},
{
"id": "sb2",
"name": "subCat2",
"description": "subCat2",
"criterias": [
{
"id": "cr1",
"name": "criteria1",
"measure": "Action"
},
{
"id": "cr4",
"name": "criteria4",
"measure": "Action"
}]
}
]
}]
},
{
"id": "f2",
"name": "Framework2",
"description": "framework 2 for testing",
"categories": [
{
"id": "c2",
"name": "category2",
"description": "category2",
"subCategories": [
{
"id": "sb4",
"name": "subCat5",
"description": "subCat5",
"criterias": [
{
"id": "cr1",
"name": "criteria1",
"measure": "Action"
},
{
"id": "cr3",
"name": "criteria3",
"measure": "Action"
}]
},
{
"id": "sb2",
"name": "subCat2",
"description": "subCat2",
"criterias": [
{
"id": "cr1",
"name": "criteria1",
"measure": "Action"
},
{
"id": "cr4",
"name": "criteria4",
"measure": "Action"
}]
}]
},
{
"id": "c1",
"name": "category1",
"description": "category1",
"subCategories": [
{
"id": "sb3",
"name": "subCat3",
"description": "subCat3",
"criterias": [
{
"id": "cr1",
"name": "criteria1",
"measure": "Action"
},
{
"id": "cr2",
"name": "criteria2",
"measure": "Action"
}
]},
{
"id": "sb2",
"name": "subCat2",
"description": "subCat2",
"criterias": [
{
"id": "cr1",
"name": "criteria1",
"measure": "Action"
},
{
"id": "cr4",
"name": "criteria4",
"measure": "Action"
}]
}
]
}]
}
]
Note - Category document doesn't have any reference to subcategory and same way subcategory doesn't have any reference to criteria object currently as they are master data and are generic , framework is created with their combination dynamically.
If you want to try to do all the work in the aggregation, you could group first by subcategory, then by category like:
db.collection.aggregate([
{$unwind:"$criteriaConfigs"},
{$project:{
_id:0,
category:"$criteriaConfigs.category",
subCategory:"$criteriaConfigs.subCategory",
criteria:"$criteriaConfigs.criteria"
}},
{$group:{
_id:{"category":"$category","subCategory":"$subCategory"},
criteria:{$addToSet:"$criteria"}
}},
{$group:{
_id:{"category":"$_id.category"},
subCategories:{$addToSet:{subCategory:"$_id.subCategory",
criteria:"$criteria"}}
}},
{$project:{
_id:0,category:"$_id.category",
subCategories:"$subCategories"
}}
])
Depending on how you plan to us the return data, it may be more efficient to return each unique combination:
db.collection.aggregate([
{$unwind:"$criteriaConfigs"},
{$group:{
_id:{
category:"$criteriaConfigs.category.name",
subCategory:"$criteriaConfigs.subCategory.name",
criteria:"$criteriaConfigs.criteria.name"
}
}},
{$project:{
_id:0,
category:"$_id.category",
subCategory:"$_id.subCategory",
criteria:"$_id.criteria"
}}
])
I'm not sure from your question what shape you are expecting the return data to have, so you may need to adjust for that.