MongoDB count,min,max,avg for aggregate using field with List of objects - mongodb

I have problem with get statistics data using MongoDB collection.
Collection
[
{
"_id": {"$oid": "616309f71b021c754992bfca"},
"correlatedNodes": [
{
"type": "IS_ULTIMATELY_CONSOLIDATED_BY",
"nodes": ["I07WOS4YJ0N7YRFE7309"]
}
],
"fromTaskId": 20,
"nodeName": "5493000U0YGG4VEQOX65"
},
{
"_id": {"$oid": "616309f71b021c754992bfcb"},
"correlatedNodes": [
{
"type": "IS_DIRECTLY_CONSOLIDATED_BY",
"nodes": ["529900P6OUCFPVWCFE19", "8945007IZBKFQUQLIP85"]
},
{
"type": "IS_ULTIMATELY_CONSOLIDATED_BY",
"nodes": ["529900P6OUCFPVWCFE19"]
}
],
"fromTaskId": 20,
"nodeName": "89450012XZ2GPWGIGH37"
},
{
"_id": {"$oid": "616309f71b021c754992bfcc"},
"correlatedNodes": [
{
"type": "IS_ULTIMATELY_CONSOLIDATED_BY",
"nodes": ["815600C9588783AB0210"]
},
{
"type": "IS_DIRECTLY_CONSOLIDATED_BY",
"nodes": ["815600C9588783AB0210"]
}
],
"fromTaskId": 20,
"nodeName": "815600228127946DFF05"
},
{
"_id": {"$oid": "616309f71b021c754992bfcd"},
"correlatedNodes": [
{
"type": "IS_ULTIMATELY_CONSOLIDATED_BY",
"nodes": ["549300LI58A0MHGHTZ98"]
}
],
"fromTaskId": 20,
"nodeName": "549300NV4OCF16TAS048"
}
]
I want to get basic statistic information for correlatedNodes field.
Something like:
{
"IS_ULTIMATELY_CONSOLIDATED_BY": {
"count": 567,
"avg": 22,
"min": 3,
"max": 50
}
}
By count I mean to use list size for each connection type.
I tried aggregate query like this:
db.node_correlations.aggregate([
{
$project: {"correlatedNodes": "$correlatedNodes" },
$unwind: "$correlatedNodes"
}
]);
But I received error:
[2021-10-13 12:14:10] com.mongodb.MongoCommandException: Command failed with error 40323 (Location40323): 'A pipeline stage specification object must contain exactly one field.' on server localhost:27017. The full response is {"ok": 0.0, "errmsg": "A pipeline stage specification object must contain exactly one field.", "code": 40323, "codeName": "Location40323"}

Thank you everyone for help.
Working solutions is:
db.node_correlations.aggregate([
{
"$unwind": "$correlatedNodes"
},
{
"$group": {
"_id": "$correlatedNodes.type",
"count": {
"$sum": {"$size": "$correlatedNodes.nodes"}
},
"avg": {
"$avg": {"$size": "$correlatedNodes.nodes"}
},
"min": {
"$min": {"$size": "$correlatedNodes.nodes"}
},
"max": {
"$max": {"$size": "$correlatedNodes.nodes"}
}
}
}
])

Related

MongoDB Unwind Error: cannot encode object of type: <class 'set'>

hope you're fine.
I cannot seem to find a way to aggregate the following document by 'equity id'.
{
"_id": {
"$oid": "6001dc246192c700013e8252"
},
"user": "blablabla",
"_type": "User::Individual",
"created_at": {
"$date": "2021-01-15T18:17:11.130Z"
},
"integrations": [{
"_id": {
"$oid": "6001dc62e7a0970001258da8"
},
"status": "completed",
"authentication_failed_msg": null
}],
"portfolios": [{
"_id": {
"$oid": "6001dc62e7a0970001258da9"
},
"_type": "SimplePortfolio",
"transactions": [{
"_id": {
"$oid": "6001dc62e7a0970001258daa"
},
"settlement_period": 2,
"expenses": 0,
"source": "integration",
"_type": "Transaction::Equity::Buy",
"date": {
"$date": "2020-03-02T00:00:00.000Z"
},
"shares": 100,
"price": 13.04,
"equity_id": "abcd"
}, {
"_id": {
"$oid": "6001dc62e7a0970001258dab"
},
"settlement_period": 2,
"expenses": 0,
"source": "integration",
"_type": "Transaction::Equity::Buy",
"date": {
"$date": "2020-03-02T00:00:00.000Z"
},
"shares": 1000,
"price": 1.03,
"equity_id": "efgh"
I tried something like
db.collection.aggregate([{"$unwind": {'$portfolios.transactions'}},
{"$group" : {"_id": "$equity_id"}}])
Got error InvalidDocument: cannot encode object: {'$portfolios.transactions'}, of type: <class 'set'>
Ideally what I want a list grouped by user and equity_id and a sum of its shares. Does anyone know if the error is caused by my aggregation or the document structure?
You should $unwind twice.
db.collection.aggregate([
{
"$unwind": "$portfolios"
},
{
"$unwind": "$portfolios.transactions"
},
{
"$group": {
"_id": "$portfolios.transactions.equity_id"
}
}
])
mongoplayground

Mongodb query to get information about circural dependency

I have collection like this in MongoDB:
[
{
"_id": {"$oid": "616309f71b021c754992bfca"},
"correlatedNodes": [
{
"type": "IS_ULTIMATELY_CONSOLIDATED_BY",
"nodes": ["I07WOS4YJ0N7YRFE7309"]
}
],
"fromTaskId": 20,
"nodeName": "5493000U0YGG4VEQOX65"
},
{
"_id": {"$oid": "616309f71b021c754992bfcb"},
"correlatedNodes": [
{
"type": "IS_DIRECTLY_CONSOLIDATED_BY",
"nodes": ["529900P6OUCFPVWCFE19", "8945007IZBKFQUQLIP85"]
},
{
"type": "IS_ULTIMATELY_CONSOLIDATED_BY",
"nodes": ["529900P6OUCFPVWCFE19"]
}
],
"fromTaskId": 20,
"nodeName": "89450012XZ2GPWGIGH37"
},
{
"_id": {"$oid": "616309f71b021c754992bfcc"},
"correlatedNodes": [
{
"type": "IS_ULTIMATELY_CONSOLIDATED_BY",
"nodes": ["815600C9588783AB0210"]
},
{
"type": "IS_DIRECTLY_CONSOLIDATED_BY",
"nodes": ["815600C9588783AB0210"]
}
],
"fromTaskId": 20,
"nodeName": "815600228127946DFF05"
},
{
"_id": {"$oid": "616309f71b021c754992bfcd"},
"correlatedNodes": [
{
"type": "IS_ULTIMATELY_CONSOLIDATED_BY",
"nodes": ["549300LI58A0MHGHTZ98"]
}
],
"fromTaskId": 20,
"nodeName": "549300NV4OCF16TAS048"
}
]
It's possible to get information about circular dependency using query.
For example nodeName: 5493000U0YGG4VEQOX65 have list where exists node name 89450012XZ2GPWGIGH37 and this node have also list contains 5493000U0YGG4VEQOX65.
I want to find this scenario and list nodeNames where this situation occures.
You may want to use $graphLookup
$unwind the correlatedNodes and correlatedNodes.nodes for further processing
perform $graphLookup; with restrictSearchWithMatch to stop the recusive search when reach self node
{
"$graphLookup": {
"from": "collection",
"startWith": "$correlatedNodes.nodes",
"connectFromField": "correlatedNodes.nodes",
"connectToField": "nodeName",
"as": "nodeChain",
"depthField": "depth",
"restrictSearchWithMatch": {
"nodeName": { $ne: "$nodeName" }
}
}
}
use $reduce to determine whether the nodeChain is a circular one; You can achieve this by checking nodeChain.correlatedNodes.nodes equals to the outermostnodeName or not
$match the result in step 3 to find the circular cases.
Here is the Mongo playground for your reference.

How to get result using stable sort using mongodb

I am working on making a query which can sort the result after grouping keys in MongoDB.
Following is the example data in DB
[
{
"_id": ObjectId("5a934e000102030405000000"),
"code": "code",
"groupId": "L0LV7ENT",
"version": {
"id": "1.0.0.0"
},
"status": "Done",
"type": "main"
},
{
"_id": ObjectId("5a934e000102030405000001"),
"code": "code",
"groupId": "L0LV7ENT",
"version": {
"id": "2.0.0.0"
},
"status": "Done",
"type": "main"
},
{
"_id": ObjectId("5a934e000102030405000002"),
"code": "code",
"groupId": "F6WJ9QP7",
"version": {
"id": "1.1.0.0"
},
"status": "Done",
"type": "main"
}
]
Here, I would like to sort the result in ascending order according to the version.id and to group the result according to the groupId.
Hence, I used the following query
db.collection.aggregate([
{
"$match": {
"$and": [
{
"type": "main",
"code": {
"$in": [
"code"
]
},
"status": {
"$in": [
"Done",
"Completed"
]
},
"groupId": {
"$in": [
"L0LV7ENT",
"F6WJ9QP7"
]
}
}
]
}
},
{
"$sort": {
"_id": 1,
"version.id": 1
}
},
{
"$group": {
"_id": {
"groupId": "$groupId"
},
"services": {
"$push": "$$ROOT"
}
}
}
])
But the result I am getting is not stable. Sometimes I see, the data with "_id": ObjectId("5a934e000102030405000002") coming first then ObjectId("5a934e000102030405000000") and ObjectId("5a934e000102030405000001").
It seems intermmitent. Is there any way to get a stable result?
EDIT
You can try it here
From the documentation:
$group does not order its output documents.
So you will need to sort after the group stage to have a deterministic output order.

Combining unique elements of arrays without $unwind

I would like to get the unique elements of all arrays in a collection. Consider the following collection
[
{
"collection": "collection",
"myArray": [
{
"name": "ABC",
"code": "AB"
},
{
"name": "DEF",
"code": "DE"
}
]
},
{
"collection": "collection",
"myArray": [
{
"name": "GHI",
"code": "GH"
},
{
"name": "DEF",
"code": "DE"
}
]
}
]
I can achieve this by using $unwind and $group like this:
db.collection.aggregate([
{
$unwind: "$myArray"
},
{
$group: {
_id: null,
data: {
$addToSet: "$myArray"
}
}
}
])
And get the output:
[
{
"_id": null,
"data": [
{
"code": "GH",
"name": "GHI"
},
{
"code": "DE",
"name": "DEF"
},
{
"code": "AB",
"name": "ABC"
}
]
}
]
However, the array "myArray" will have a lot of elements (about 6) and the number of documents passed into this stage of the pipeline will be about 600. So unwinding the array would give me a total of 3600 documents being processed. I would like to know if there's a way for me to achieve the same result without unwinding
You can use below aggregation
db.collection.aggregate([
{ "$group": {
"_id": null,
"data": { "$push": "$myArray" }
}},
{ "$project": {
"data": {
"$reduce": {
"input": "$data",
"initialValue": [],
"in": { "$setUnion": ["$$this", "$$value"] }
}
}
}}
])
Output
[
{
"_id": null,
"data": [
{
"code": "AB",
"name": "ABC"
},
{
"code": "DE",
"name": "DEF"
},
{
"code": "GH",
"name": "GHI"
}
]
}
]

MongoDB projection. Operator $add field|expression array awareness or after $slice

I've got collection that looks like:
[{
"org": "A",
"type": "simple",
"payFor": 3,
"price": 100
},
{
"org": "A",
"type": "custom",
"payFor": 2,
"price": 115
},
{
"org": "B",
"type": "simple",
"payFor": 1,
"price": 110
},
{
"org": "B",
"type": "custom",
"payFor": 2,
"price": 200
},
{
"org": "B",
"type": "custom",
"payFor": 4,
"price": 220
}]
And need to produce result with query to perform group by "org" where payments appears for only first "payFor" prices in "type".
I'm trying to use expression result by $slice operator in $add but this is not works.
pipeline:
[{
"$group": {
"_id": {
"org": "$org",
"type": "$type"
},
"payFor": {
"$max": "$payFor"
},
"count": {
"$sum": 1
},
"prices": {
"$push": "$price"
}
}
},
{
"$group": {
"_id": "$_id.org",
"payments": {
"$push": {
"type": "$_id.type",
"forFirst": "$payFor",
"sum": {
"$cond": [
{
"$gte": [
"$payFor",
"$count"
]
},
{
"$add": {
"$prices": {
"$slice": "$count"
}
}
},
{
"$add": "$prices"
}
]
}
}
}
}
}]
I know that it is possible to traverse unwinded prices and pick only "payFor" count of them. but result collections are more rich than in example above and this operation will produce some unecessary overheads.
Need some advice from community. Please. Thanks.