Mongodb query to get information about circural dependency - mongodb

I have collection like this in MongoDB:
[
{
"_id": {"$oid": "616309f71b021c754992bfca"},
"correlatedNodes": [
{
"type": "IS_ULTIMATELY_CONSOLIDATED_BY",
"nodes": ["I07WOS4YJ0N7YRFE7309"]
}
],
"fromTaskId": 20,
"nodeName": "5493000U0YGG4VEQOX65"
},
{
"_id": {"$oid": "616309f71b021c754992bfcb"},
"correlatedNodes": [
{
"type": "IS_DIRECTLY_CONSOLIDATED_BY",
"nodes": ["529900P6OUCFPVWCFE19", "8945007IZBKFQUQLIP85"]
},
{
"type": "IS_ULTIMATELY_CONSOLIDATED_BY",
"nodes": ["529900P6OUCFPVWCFE19"]
}
],
"fromTaskId": 20,
"nodeName": "89450012XZ2GPWGIGH37"
},
{
"_id": {"$oid": "616309f71b021c754992bfcc"},
"correlatedNodes": [
{
"type": "IS_ULTIMATELY_CONSOLIDATED_BY",
"nodes": ["815600C9588783AB0210"]
},
{
"type": "IS_DIRECTLY_CONSOLIDATED_BY",
"nodes": ["815600C9588783AB0210"]
}
],
"fromTaskId": 20,
"nodeName": "815600228127946DFF05"
},
{
"_id": {"$oid": "616309f71b021c754992bfcd"},
"correlatedNodes": [
{
"type": "IS_ULTIMATELY_CONSOLIDATED_BY",
"nodes": ["549300LI58A0MHGHTZ98"]
}
],
"fromTaskId": 20,
"nodeName": "549300NV4OCF16TAS048"
}
]
It's possible to get information about circular dependency using query.
For example nodeName: 5493000U0YGG4VEQOX65 have list where exists node name 89450012XZ2GPWGIGH37 and this node have also list contains 5493000U0YGG4VEQOX65.
I want to find this scenario and list nodeNames where this situation occures.

You may want to use $graphLookup
$unwind the correlatedNodes and correlatedNodes.nodes for further processing
perform $graphLookup; with restrictSearchWithMatch to stop the recusive search when reach self node
{
"$graphLookup": {
"from": "collection",
"startWith": "$correlatedNodes.nodes",
"connectFromField": "correlatedNodes.nodes",
"connectToField": "nodeName",
"as": "nodeChain",
"depthField": "depth",
"restrictSearchWithMatch": {
"nodeName": { $ne: "$nodeName" }
}
}
}
use $reduce to determine whether the nodeChain is a circular one; You can achieve this by checking nodeChain.correlatedNodes.nodes equals to the outermostnodeName or not
$match the result in step 3 to find the circular cases.
Here is the Mongo playground for your reference.

Related

Join multiple collections in MongoDB

Greetings amigo i have one question related joining multiple collection in MongoDb
i have collection schema something like below
Posts Collection
{
"type": "POST_TYPE",
"_id": "63241dffb0f6770c23663230",
"user_id": "63241dffb0f6770c23663230",
"post_id": "63241dffb0f6770c23663230",
"likes": 50
}
Post Types: 1. Event
{
"date": "2022-09-16T07:07:18.242+00:00",
"_id": "63241dffb0f6770c23663230",
"user_id": "63241dffb0f6770c23663230",
"venue": "Some Place",
"lat": "null",
"long": "null",
}
Post Types: 2. Poll
{
"created_date": "2022-09-16T07:07:18.242+00:00",
"_id": "63241dffb0f6770c23663230",
"user_id": "63241dffb0f6770c23663230",
"question": "Question??????",
"poll_opt1": "Yes",
"poll_opt2": "No",
"poll_opt1_count": "5",
"poll_opt2_count": "2"
}
now i have to join Post collection with respective collection e.g.
"post_id" to Event::_id or Poll::_id with condition to Post::type
i have tried aggregation but it does not gave expected output.
i am trying to get output something like below
[
{
"type": "event",
"_id": "63241dffb0f6770c23663230",
"user_id": "63241dffb0f6770c23663230",
"post_id": {
"date": "2022-09-16T07:07:18.242+00:00",
"_id": "63241dffb0f6770c23663230",
"user_id": "63241dffb0f6770c23663230",
"venue": "Some Place",
"lat": "null",
"long": "null"
},
"likes": 50
},
{
"type": "poll",
"_id": "63241dffb0f6770c23663230",
"user_id": "63241dffb0f6770c23663230",
"post_id": {
"created_date": "2022-09-16T07:07:18.242+00:00",
"_id": "63241dffb0f6770c23663230",
"user_id": "63241dffb0f6770c23663230",
"question": "Question??????",
"poll_opt1": "Yes",
"poll_opt2": "No",
"poll_opt1_count": "5",
"poll_opt2_count": "2"
},
"likes": 50
}
]
is there any efficient way to achieve this or better MongoDb schema to manage these types of records?
You can try something like this, using $facet:
db.posts.aggregate([
{
"$facet": {
"eventPosts": [
{
"$match": {
type: "event"
},
},
{
"$lookup": {
"from": "events",
"localField": "post_id",
"foreignField": "_id",
"as": "post_id"
}
}
],
"pollPosts": [
{
"$match": {
type: "poll"
},
},
{
"$lookup": {
"from": "poll",
"localField": "post_id",
"foreignField": "_id",
"as": "post_id"
}
}
]
}
},
{
"$addFields": {
"doc": {
"$concatArrays": [
"$pollPosts",
"$eventPosts"
]
}
}
},
{
"$unwind": "$doc"
},
{
"$replaceRoot": {
"newRoot": "$doc"
}
},
{
"$addFields": {
"post_id": {
"$cond": {
"if": {
"$eq": [
{
"$size": "$post_id"
},
0
]
},
"then": {},
"else": {
"$arrayElemAt": [
"$post_id",
0
]
}
}
}
}
}
])
We do the following, in the query:
Perform two $lookups for the different post_type within $facet. This unfortunately will increase, with the different values of post_type.
Then we combine all the arrays obtained from $facet, using $concatArray.
Then we unwind the concatenated array, and bring the nested document to the root using $replaceRoot.
Finally, for post_id we pick the first array element if it exists, to match the desired output.
Playground link.

MongoDB count,min,max,avg for aggregate using field with List of objects

I have problem with get statistics data using MongoDB collection.
Collection
[
{
"_id": {"$oid": "616309f71b021c754992bfca"},
"correlatedNodes": [
{
"type": "IS_ULTIMATELY_CONSOLIDATED_BY",
"nodes": ["I07WOS4YJ0N7YRFE7309"]
}
],
"fromTaskId": 20,
"nodeName": "5493000U0YGG4VEQOX65"
},
{
"_id": {"$oid": "616309f71b021c754992bfcb"},
"correlatedNodes": [
{
"type": "IS_DIRECTLY_CONSOLIDATED_BY",
"nodes": ["529900P6OUCFPVWCFE19", "8945007IZBKFQUQLIP85"]
},
{
"type": "IS_ULTIMATELY_CONSOLIDATED_BY",
"nodes": ["529900P6OUCFPVWCFE19"]
}
],
"fromTaskId": 20,
"nodeName": "89450012XZ2GPWGIGH37"
},
{
"_id": {"$oid": "616309f71b021c754992bfcc"},
"correlatedNodes": [
{
"type": "IS_ULTIMATELY_CONSOLIDATED_BY",
"nodes": ["815600C9588783AB0210"]
},
{
"type": "IS_DIRECTLY_CONSOLIDATED_BY",
"nodes": ["815600C9588783AB0210"]
}
],
"fromTaskId": 20,
"nodeName": "815600228127946DFF05"
},
{
"_id": {"$oid": "616309f71b021c754992bfcd"},
"correlatedNodes": [
{
"type": "IS_ULTIMATELY_CONSOLIDATED_BY",
"nodes": ["549300LI58A0MHGHTZ98"]
}
],
"fromTaskId": 20,
"nodeName": "549300NV4OCF16TAS048"
}
]
I want to get basic statistic information for correlatedNodes field.
Something like:
{
"IS_ULTIMATELY_CONSOLIDATED_BY": {
"count": 567,
"avg": 22,
"min": 3,
"max": 50
}
}
By count I mean to use list size for each connection type.
I tried aggregate query like this:
db.node_correlations.aggregate([
{
$project: {"correlatedNodes": "$correlatedNodes" },
$unwind: "$correlatedNodes"
}
]);
But I received error:
[2021-10-13 12:14:10] com.mongodb.MongoCommandException: Command failed with error 40323 (Location40323): 'A pipeline stage specification object must contain exactly one field.' on server localhost:27017. The full response is {"ok": 0.0, "errmsg": "A pipeline stage specification object must contain exactly one field.", "code": 40323, "codeName": "Location40323"}
Thank you everyone for help.
Working solutions is:
db.node_correlations.aggregate([
{
"$unwind": "$correlatedNodes"
},
{
"$group": {
"_id": "$correlatedNodes.type",
"count": {
"$sum": {"$size": "$correlatedNodes.nodes"}
},
"avg": {
"$avg": {"$size": "$correlatedNodes.nodes"}
},
"min": {
"$min": {"$size": "$correlatedNodes.nodes"}
},
"max": {
"$max": {"$size": "$correlatedNodes.nodes"}
}
}
}
])

How to get result using stable sort using mongodb

I am working on making a query which can sort the result after grouping keys in MongoDB.
Following is the example data in DB
[
{
"_id": ObjectId("5a934e000102030405000000"),
"code": "code",
"groupId": "L0LV7ENT",
"version": {
"id": "1.0.0.0"
},
"status": "Done",
"type": "main"
},
{
"_id": ObjectId("5a934e000102030405000001"),
"code": "code",
"groupId": "L0LV7ENT",
"version": {
"id": "2.0.0.0"
},
"status": "Done",
"type": "main"
},
{
"_id": ObjectId("5a934e000102030405000002"),
"code": "code",
"groupId": "F6WJ9QP7",
"version": {
"id": "1.1.0.0"
},
"status": "Done",
"type": "main"
}
]
Here, I would like to sort the result in ascending order according to the version.id and to group the result according to the groupId.
Hence, I used the following query
db.collection.aggregate([
{
"$match": {
"$and": [
{
"type": "main",
"code": {
"$in": [
"code"
]
},
"status": {
"$in": [
"Done",
"Completed"
]
},
"groupId": {
"$in": [
"L0LV7ENT",
"F6WJ9QP7"
]
}
}
]
}
},
{
"$sort": {
"_id": 1,
"version.id": 1
}
},
{
"$group": {
"_id": {
"groupId": "$groupId"
},
"services": {
"$push": "$$ROOT"
}
}
}
])
But the result I am getting is not stable. Sometimes I see, the data with "_id": ObjectId("5a934e000102030405000002") coming first then ObjectId("5a934e000102030405000000") and ObjectId("5a934e000102030405000001").
It seems intermmitent. Is there any way to get a stable result?
EDIT
You can try it here
From the documentation:
$group does not order its output documents.
So you will need to sort after the group stage to have a deterministic output order.

Mongo Aggregate Combine Two Documents

Once I've unwound a sub-document array, how do I put it back together with all the original root fields?
Consider the following Tasks data set:
[
{
"_id": "5e95bb1cf36c0ab3247036bd",
"name": "Task A",
"org": "5e95b9894a0aa0b30dfcbc0b",
"creator": "5e117e5cd90de7187b000d87"
},
{
"_id": "5e95bb30f36c0ab3247036be",
"name": "Task B1",
"org": "5e95b9894a0aa0b30dfcbc0b",
"creator": "5e117e5cd90de7187b000d87",
"parent": "5e95bb1cf36c0ab3247036bd"
},
{
"_id": "5e95bb35f36c0ab3247036bf",
"name": "Task B2",
"org": "5e95b9894a0aa0b30dfcbc0b",
"creator": "5e117e5cd90de7187b000d87",
"parent": "5e95bb1cf36c0ab3247036bd"
}
]
So, then I run $graphLookup to get the parent task and populate it's children and then $unwind it and populate the creator field:
[
{
"$match": {
"parent": {
"$exists": false
}
}
},
{
"$graphLookup": {
"from": "tasks",
"startWith": "$_id",
"connectFromField": "_id",
"connectToField": "parent",
"as": "children"
}
},
{
"$unwind": {
"path": "$children"
}
},
{
"$lookup": {
"from": "users",
"localField": "children.creator",
"foreignField": "_id",
"as": "children.creator"
}
},
{
"$unwind": {
"path": "$children.creator"
}
}
]
Which returns the following documents:
[
{
"_id": "5e95bb1cf36c0ab3247036bd",
"name": "Task A",
"org": "5e95b9894a0aa0b30dfcbc0b",
"creator": "5e117e5cd90de7187b000d87",
"children": [
{
"_id": "5e95bb30f36c0ab3247036be",
"name": "Task B1",
"org": "5e95b9894a0aa0b30dfcbc0b",
"creator": {
"name": "Jack Frost"
},
"parent": "5e95bb1cf36c0ab3247036bd"
}
]
},
{
"_id": "5e95bb1cf36c0ab3247036bd",
"name": "Task A",
"org": "5e95b9894a0aa0b30dfcbc0b",
"creator": "5e117e5cd90de7187b000d87",
"children": [
{
"_id": "5e95bb35f36c0ab3247036bf",
"name": "Task B2",
"org": "5e95b9894a0aa0b30dfcbc0b",
"creator": {
"name": "Bill Nye"
},
"parent": "5e95bb1cf36c0ab3247036bd"
}
]
},
]
Lastly, I need to merge all of these duplicate documents back together and join the $children. This is the part I can't figure out. Below is some junk I'm trying but it seems messy to have to specifically list every property.
Is there a better way to combine multiple (mostly) matching docs?
[
...
{
"$group": {
"_id": "$_id",
"name": {
"$mergeObjects": "$properties"
},
"watchers": {
"$addToSet": "$watchers"
},
"assignees": {
"$addToSet": "$assignees"
},
"org": {
"$addToSet": "$$ROOT.org"
},
"children": {
"$push": "$children"
}
}
}
]
Answering my own question here, the best solution I can find is to specify each property but pass it the $first operator. This will ensure that the original value will be passed through.
{
$group: {
_id: '$_id',
name: {$first: '$name'},
org: {$first: '$org'},
creator: {$first: '$creator'},
children: {$push: '$children'}
}
}

Combining unique elements of arrays without $unwind

I would like to get the unique elements of all arrays in a collection. Consider the following collection
[
{
"collection": "collection",
"myArray": [
{
"name": "ABC",
"code": "AB"
},
{
"name": "DEF",
"code": "DE"
}
]
},
{
"collection": "collection",
"myArray": [
{
"name": "GHI",
"code": "GH"
},
{
"name": "DEF",
"code": "DE"
}
]
}
]
I can achieve this by using $unwind and $group like this:
db.collection.aggregate([
{
$unwind: "$myArray"
},
{
$group: {
_id: null,
data: {
$addToSet: "$myArray"
}
}
}
])
And get the output:
[
{
"_id": null,
"data": [
{
"code": "GH",
"name": "GHI"
},
{
"code": "DE",
"name": "DEF"
},
{
"code": "AB",
"name": "ABC"
}
]
}
]
However, the array "myArray" will have a lot of elements (about 6) and the number of documents passed into this stage of the pipeline will be about 600. So unwinding the array would give me a total of 3600 documents being processed. I would like to know if there's a way for me to achieve the same result without unwinding
You can use below aggregation
db.collection.aggregate([
{ "$group": {
"_id": null,
"data": { "$push": "$myArray" }
}},
{ "$project": {
"data": {
"$reduce": {
"input": "$data",
"initialValue": [],
"in": { "$setUnion": ["$$this", "$$value"] }
}
}
}}
])
Output
[
{
"_id": null,
"data": [
{
"code": "AB",
"name": "ABC"
},
{
"code": "DE",
"name": "DEF"
},
{
"code": "GH",
"name": "GHI"
}
]
}
]