MongoDb : Find and filter values from nested map - mongodb

I have some date in mongo db
[
{
"_id": ObjectId("5a934e000102030405000000"),
"orgId": "606abce197dc265ac41ae82c",
"registrations": {
"id1": {
"status": "status",
"topStage": {
"id": "stage1",
"name": "stage1"
}
},
"id2": {
"status": "status",
"topStage": {
"id": "stage1",
"name": "stage1"
}
},
"id3": {
"status": "status",
"topStage": {
"id": "stage2",
"name": "stage2"
}
}
}
}
]
I am expecting to pass a stage id (at path registrations-> topStage -> id) and return all matching key values.
i have written following query
db.collection.aggregate([
{
$project: {
teams: {
$objectToArray: "$registrations"
},
original: "$$ROOT"
}
},
{
"$project": {
"teams": {
"$filter": {
"input": "$teams",
"as": "team",
"cond": {
"$eq": [
"$$team.v.topStage.id",
"stage1"
]
}
}
}
}
},
{
"$project": {
"registrations": {
"$arrayToObject": "$teams"
}
}
}
])
It does return me right values
for stage1 as stage id
[
{
"_id": ObjectId("5a934e000102030405000000"),
"registrations": {
"id1": {
"status": "status",
"topStage": {
"id": "stage1",
"name": "stage1"
}
},
"id2": {
"status": "status",
"topStage": {
"id": "stage1",
"name": "stage1"
}
}
}
}
]
and for stage2 as stage id, it returns
[
{
"_id": ObjectId("5a934e000102030405000000"),
"registrations": {
"id3": {
"status": "status",
"topStage": {
"id": "stage2",
"name": "stage2"
}
}
}
}
]
Can someone let me know if this is the best way to write this query or this can be simplified ??

It's the correct way to do it but there will be performance impact in the following cases.
If you don't have any other match condition against the indices
if you have a match condition and it matches few docs where registrations has more objects
Other best option you could do is that altering the schema.
you can keep registrations.id1 as registrations : { id:1, status_id: 2}
or you could alter the way such that it will not need to use objectToArray on larger set
if your data is huge, I would recommend to add an index on nested status Id field.
And mongo documentation itself suggests to evaluate multiple schemas for any data to get the best out of it.

Related

How to use an existing field value with $push in MongoDb?

I have the following Mongo collection
{
"_id": ObjectId("5524d12d2702a21830bdb8e5"),
"code": "Apple",
"name": "iPhone",
"parameters": [
{
"code": "xxx",
"name": "Andrew",
"value": "9",
},
{
"code": "yyy",
"name": "Joy",
"value": "7",
},
]
}
I am using the following query to push into the parameters array object
db.coll.update({
"parameters.name": "Andrew"
},
{
$push: {
"parameters": {
"code": "$code",
"name": "bar",
"value": "10",
}
}
},
{
multi: true
})
However, for the value of code, I want to use the value of the object that matched (i.e. the object with parameters.name == "Andrew", which here is xxx.
Here's a playground link to the problem https://mongoplayground.net/p/v-j1tCCjiWq
Also, I am using a really old version (3.2) of MongoDb. It would be preferable if the solution worked with that.
With MongoDB v4.4+, you can first $match with your criteria. Chain up $first with $filter to extract the array element you want. Use $concatArrays to append a new element(i.e. same as $push) to the array and $merge to update back into the collection.
db.coll.aggregate([
{
$match: {
"parameters.name": "Andrew"
}
},
{
$set: {
parameters: {
"$concatArrays": [
"$parameters",
[
{
"$mergeObjects": [
// get the object matched
{
"$first": {
"$filter": {
"input": "$parameters",
"as": "p",
"cond": {
$eq: [
"Andrew",
"$$p.name"
]
}
}
}
},
// update the object matched with other fields with constant value
{
"name": "bar",
"value": "10"
}
]
}
]
]
}
}
},
{
"$merge": {
"into": "coll1",
"on": "_id"
}
}
])
Mongo Playground

Select and filter MongoDB subdocument array list

I am trying to select document with filter userInteractions array. I need to remove item where userdId = "633eb753c8e3d3fd71d1c254" and return document as result. It is easy way to approach it with MongoDB query?
https://mongoplayground.net/p/2UHw52QJkYu
Example of JSON document:
{
"_id": {
"$oid": "633c25965034208db76cfb1e"
},
"email": "test#test.com",
"users": [
{
"type": "person",
"isActive": true,
"userInteractions": [
{
"userId": {
"$oid": "633eb753c8e3d3fd71d1c254"
},
"firstName": "Tom",
"lastName": "Hawkins",
},
{
"userId": {
"$oid": "633eb753c8e3d3fd71d1c222"
},
"firstName": "Melan",
"lastName": "Key",
},
{
"userId": {
"$oid": "633eb753c8e3d3fd71d1c259"
},
"firstName": "Ken",
"lastName": "Olibar",
},
]
}
]
}
Expecting output:
{
"_id": {
"$oid": "633c25965034208db76cfb1e"
},
"email": "test#test.com",
"users": [
{
"type": "person",
"isActive": true,
"userInteractions": [
{
"userId": {
"$oid": "633eb753c8e3d3fd71d1c222"
},
"firstName": "Melan",
"lastName": "Key",
},
{
"userId": {
"$oid": "633eb753c8e3d3fd71d1c259"
},
"firstName": "Ken",
"lastName": "Olibar",
},
]
}
]
}
$set - Set users field.
1.1. $map - Iterate the users array and return a new array.
1.1.1. $mergeObjects - Merge the current iterated document and the document from the result 1.1.1.1.
1.1.1.1. $filter - Filter the document(s) from the userInteractions array from the current iterated document.
db.collection.aggregate([
{
$set: {
users: {
$map: {
input: "$users",
as: "user",
in: {
$mergeObjects: [
"$$user",
{
userInteractions: {
$filter: {
input: "$$user.userInteractions",
cond: {
$ne: [
"$$this.userId",
{
$toObjectId: "633eb753c8e3d3fd71d1c254"
}
]
}
}
}
}
]
}
}
}
}
}
])
Demo # Mongo Playground

Nested arrays $unwind and $group back together in mongoDB

We have three nested arrays:
principalCredits with 2 objects
credits with 2 objects each
awardNominations.edges with variable totals from 0 to 3
The task is to add a field to the third array of objects awardNominations.edges based on a lookup from eventsCollection.
Here's the data I have (simplified, can copy and paste into MongoDB Compass):
[{
"principalCredits": [
{
"category": {
"id": "director",
"text": "Directors"
},
"totalCredits": 2,
"credits": [
{
"name": {
"id": "nm11813828",
"nameText": {
"text": "Pippa Ehrlich"
},
"awardNominations": {
"total": 2,
"edges": [
{
"node": {
"id": "an1393007",
"isWinner": true,
"award": {
"id": "an1393007",
"year": 2020,
"text": "Green Warsaw Award",
"event": {
"id": "ev0003786",
"text": "Millennium Docs Against Gravity"
},
"category": {
"text": null
}
}
}
},
{
"node": {
"id": "an1428940",
"isWinner": false,
"award": {
"id": "an1428940",
"year": 2021,
"text": "IDA Award",
"event": {
"id": "ev0000351",
"text": "International Documentary Association"
},
"category": {
"text": "Best Writing"
}
}
}
},
]
}
},
"category": {
"id": "director",
"text": "Director"
}
},
{
"name": {
"id": "nm1624755",
"nameText": {
"text": "James Reed"
},
"awardNominations": {
"total": 3,
"edges": [
{
"node": {
"id": "an0694012",
"isWinner": true,
"award": {
"id": "an0694012",
"year": 2015,
"text": "Best of Festival",
"event": {
"id": "ev0001486",
"text": "Jackson Wild Media Awards"
},
"category": {
"text": "Best of Festival"
}
}
}
},
{
"node": {
"id": "an0975779",
"isWinner": true,
"award": {
"id": "an0975779",
"year": 2017,
"text": "RTS West Television Award",
"event": {
"id": "ev0000571",
"text": "Royal Television Society, UK"
},
"category": {
"text": "Documentary"
}
}
}
},
{
"node": {
"id": "an0975781",
"isWinner": true,
"award": {
"id": "an0975781",
"year": 2015,
"text": "Grand Teton Prize",
"event": {
"id": "ev0001356",
"text": "Jackson Hole Film Festival"
},
"category": {
"text": "Best in Festival"
}
}
}
}
]
}
},
"category": {
"id": "director",
"text": "Director"
}
}
]
},
{
"category": {
"id": "writer",
"text": "Writers"
},
"totalCredits": 2,
"credits": [
{
"name": {
"id": "nm11813828",
"nameText": {
"text": "Pippa Ehrlich"
},
"awardNominations": {
"total": 2,
"edges": [
{
"node": {
"id": "an1393007",
"isWinner": true,
"award": {
"id": "an1393007",
"year": 2020,
"text": "Green Warsaw Award",
"event": {
"id": "ev0003786",
"text": "Millennium Docs Against Gravity"
},
"category": {
"text": null
}
}
}
},
{
"node": {
"id": "an1428940",
"isWinner": false,
"award": {
"id": "an1428940",
"year": 2021,
"text": "IDA Award",
"event": {
"id": "ev0000351",
"text": "International Documentary Association"
},
"category": {
"text": "Best Writing"
}
}
}
}
]
}
},
"category": {
"id": "writer",
"text": "Writer"
},
},
{
"name": {
"id": "nm1624755",
"nameText": {
"text": "James Reed"
},
"awardNominations": {
"total": 0,
"edges": []
}
},
"category": {
"id": "writer",
"text": "Writer"
},
}
]
}
]
}]
An example scored award should look like this:
{
"id": "an0975781",
"isWinner": true,
"award": { ... },
"score": 1.5
}
Once all the manipulation is done, the data needs to be in exactly the same shape as it was initially and with no null values. So in the case of the last array awardsNominations.edges it should be [] as it was, and not { node: { score: null }} or anything else.
To achieve this I have created an aggregation pipeline:
[
{
'$unwind': {
'path': '$principalCredits',
'preserveNullAndEmptyArrays': true
}
}, {
'$unwind': {
'path': '$principalCredits.credits',
'preserveNullAndEmptyArrays': true
}
}, {
'$unwind': {
'path': '$principalCredits.credits.name.awardNominations.edges',
'preserveNullAndEmptyArrays': true
}
}, {
'$lookup': {
'from': 'eventsCollection',
'localField': 'principalCredits.credits.name.awardNominations.edges.node.award.event.id',
'foreignField': 'id',
'as': 'matchingEvent'
}
}, {
'$unwind': {
'path': '$matchingEvent',
'preserveNullAndEmptyArrays': true
}
}, {
'$addFields': {
'principalCredits.credits.name.awardNominations.edges.node.score': {
'$multiply': [
'$matchingEvent.importance', {
'$cond': {
'if': '$principalCredits.credits.name.awardNominations.edges.node.isWinner',
'then': 1.5,
'else': 1.2
}
}
]
}
}
}
]
The above pipeline assigns the score to each award. However, the null values are still there and I have absolutely no idea how to group it back together. I have tried to group with:
{
'$group': {
'_id': '$id',
'titleDoc': {
'$first': '$$ROOT'
},
'allPrincipalCredits': {
'$push': '$principalCredits'
}
}
}
To keep the root and then somehow sort all the records back into shape but could not get back to the orginal object structure.
Any help in putting it all together will be much appriciated!
I'm fairly good with simple aggregations, but this seems to be too much for me currently and would love to learn how to $group things back properly.
I've tried and put together all the knowledge I have so far from different sources and similar answers but can't seem to get it to work.
Lookup collection eventsCollection contains objects like this:
{
"_id": { "$oid": "62c57125d6943d92f83f6fff" },
"id": "ev0030197",
"text": "#AmLatino Film Festival",
"importance": 1
}
So the "rule" in restoring to original structure is that for each $unwind you did to "deconstruct" the document you now have to do a $group to restore it.
As you can imagine in such a pipeline this could be VERY cumbersome. but definitely doable.
However let me propose a different approach that is still very messy but much easier compared to the alternative, additionally it is more efficient from a performance perspective.
(just minor sidenot the reason your score is still null is because you have a syntax error in your $multiply function)
Anyways, The idea is to first gather all the unique event ids that exist in the in nested documents.
Then execute one lookup to fetch all the relevant events.
And finally adding the score field using $map and $mergeDocuments instead of $unwinding and $grouping, like so:
Mongo Playground
db.collection.aggregate([
{
$addFields: {
allEvents: {
$reduce: {
input: {
$map: {
input: "$principalCredits",
in: {
$map: {
input: "$$this.credits",
as: "credit",
in: {
$map: {
input: "$$credit.name.awardNominations.edges",
as: "edge",
in: "$$edge.node.award.event.id"
}
}
}
}
}
},
initialValue: [],
in: {
"$concatArrays": [
{
"$reduce": {
input: "$$this",
initialValue: [],
in: {
"$concatArrays": [
"$$this",
"$$value"
]
}
}
},
"$$value"
]
}
}
}
}
},
{
"$lookup": {
"from": "eventsCollection",
"localField": "allEvents",
"foreignField": "id",
"as": "matchingEvents"
}
},
{
$addFields: {
principalCredits: {
$map: {
input: "$principalCredits",
in: {
$mergeObjects: [
"$$this",
{
credits: {
$map: {
input: "$$this.credits",
as: "credit",
in: {
$mergeObjects: [
"$$credit",
{
name: {
"$mergeObjects": [
"$$credit.name",
{
"awardNominations": {
"$mergeObjects": [
"$$credit.name.awardNominations",
{
edges: {
$map: {
input: "$$credit.name.awardNominations.edges",
as: "edge",
in: {
node: {
$mergeObjects: [
"$$edge.node",
{
score: {
"$multiply": [
{
$cond: [
"$$edge.node.isWinner",
1.5,
1.2
]
},
{
$first: {
$map: {
input: {
$filter: {
input: "$matchingEvents",
as: "matchedEvent",
cond: {
$eq: [
"$$matchedEvent.id",
"$$edge.node.award.event.id"
]
}
}
},
as: "matched",
in: "$$matched.importance"
}
}
}
]
}
}
]
}
}
}
}
}
]
}
}
]
}
}
]
}
}
}
}
]
}
}
}
}
},
{
$unset: [
"allEvents",
"matchingEvents"
]
}
])
Mongo Playground
I will just mention that you can make this much much much cleaner by involving some code while keeping the same approach suggested. first getting unique eventid with distinct. then fetching the matching importance for each event. Finally execute a single query using arrayFilters you can construct with this information.
Final side not is that the provided pipeline did not deal with null or missing values. So if an array is missing an error will be thrown as $map expects input to be a valid array.
This can easily be solved by just wrapping each of these expressions with $ifNull, like so:
{
$map: {
input: {$ifNull: ["$$this.credits",[]]}
}
}
This will also replace null values with an empty []
The deep buried keys (...award.event.id) in arrays confounds an easy approach without 1) messing up the structure as the OP has noted 2) incurring potentially very expensive multiple $unwind calls.
Recommendation: Two pass approach. Get the necessary importance values for the principalCredits objects in question, then go back and manually iterate over the collection, diving into the structure and applying the logic score = importance * isWinner? 1.2 : 1.5
PASS 1: Get the ev data
c=db.foo.aggregate([
{$project: {
XX: {$reduce: {
// Rapidly get to things we need to lookup:
input: '$principalCredits.credits.name.awardNominations.edges.node.award.event.id',
// We end up with a mess incl. empty arrays...
// [ [[ev1,ev2], [ev3,ev4]], [], [[ev1,...], [] ... ] ]
// Need to collapse all those arrays of arrays of arrays into
// a single list of ev values, hence a reduce within a reduce:
initialValue: [],
in: {$concatArrays: [
'$$value',
{$reduce: {
input: '$$this',
initialValue: [],
in: {$concatArrays: [ '$$value', '$$this' ] }
}} ]}
}}
}}
// XX is now [ ev1,ev2,ev3,ev4,ev1 ... ]
// The empty arrays are ignored. Don't worry about dupes.
,{$lookup: {
from: "Xev",
let: { evids: "$XX" },
pipeline: [
{$match: {$expr: {$in: ["$id","$$evids"]} } }
],
as: 'XX' // overwrite XX...
}}
]);
evdict = {}
c.forEach(function(d) {
d['XX'].forEach(function(ww) {
evdict[ww['id']] = ww;
});
});
{
"ev0003786" : {
"_id" : ObjectId("62cd7f8138d0fbc0eacfb17f"),
"id" : "ev0003786",
"text" : "Millennium Docs Against Gravity",
"importance" : 1
},
"ev0000351" : {
"_id" : ObjectId("62cd7f8138d0fbc0eacfb180"),
"id" : "ev0000351",
"text" : "International Documentary Association",
"importance" : 2
},
"ev0000571" : {
"_id" : ObjectId("62cd7f8138d0fbc0eacfb181"),
"id" : "ev0000571",
"text" : "Royal Television Society, UK",
"importance" : 3
}
}
PASS 2: Iterate main collection
Left as exercise to reader.
Note that if
The number of events is small.
There is no need or value in performing $match on the initial principalCredits collection (i.e. before the fancy $project/$reduce) to significantly reduce the lookup set into events
then this whole thing is unnecessary. Simply slurp all events into evdict with a quick find and proceed to pass 2.
There is potentially a very cool solution that can do this in one pass
UPDATED
See Tom's answer below.
Note to MongoDB 5.0 users: The new $getField function allows you to pluck out fields by name instead of having to use the standard trick of using dot notation in the $in clause to access the field. This might be clearer to some:
{$getField: {
"field": "importance",
"input": {
$first: {
$filter: {
input: "$matchingEvents",
as: "matchedEvent",
cond: {
$eq: [
"$$matchedEvent.id",
"$$edge.node.award.event.id"
]
}
}
}
}
}
}

Using $match to query from different arrays with the same key value

Suppose I have this simple JSON data of two documents both with two different arrays namely carPolicies and paPolicies. Within these arrays are objects named as policy where it contains a key 'agent' where the value is '47'.
{
"_id": {
"$oid": "some_id"
},
"name": "qwe",
"password": "pw",
"carPolicies": [
{
"policy": {
"agent": "47"
}
},
{
"policy": {
"agent": "47"
}
}
],
"paPolicies": [
{
"policy": {
"agent": "47"
}
},
{
"policy": {
"agent": "47"
}
}
]
}
{
"_id": {
"$oid": "some_id"
},
"name": "rty",
"password": "wp",
"carPolicies": [
{
"policy": {
"agent": "47"
}
},
{
"policy": {
"agent": "47"
}
}
],
"paPolicies": [
{
"policy": {
"agent": "47"
}
},
{
"policy": {
"agent": "47"
}
}
]
}
Using mongoDB's $match operator, how do I come up with a query that if agent value is 47 in either arrays, it returns me the document's name?
This is what I currently have:
db.collection('users').aggregate([
// Get just the docs that contain an agent element where agent is === req.params.name
{$match: {$or: [{'paPolicies.policy.agent': req.params.name}, {'carPolicies.policy.agent': req.params.name}]} },
{
$project: {
policy: {
$filter: {
// how to do an 'or' operator at 'input' so it can be input: '$paPolicies.policy || $carPolicies.policy'
input: '$paPolicies.policy',
as: 'police',
cond: { $eq: ['$$police.agent', req.params.name]}
}
},
_id: 1, name: 1
}
}
])
I know that the above code is wrong but I feel like it's the closest I can currently get to a solution and hopefully gives an idea of what I'm trying to achieve.
If I get the requirement right. How about just using dot(.) notation in a .find() query with projection as second parameter.
db.collection.find({
$or: [
{
"carPolicies.policy.agent": "47"
},
{
"paPolicies.policy.agent": "47"
}
]
},
{
"_id": 1,
"name": 1
})

Get Distinct Document by Max Value of a Field

I have a requirement where i should query on two fields out of which one is unique field and one is maximum field.
Here is my sample collection
{
"_id": ObjectId('59537b7fe08062b9ee8dfdf6'),
"admin": {
"model": "abc",
"version": "00",
"name":"john",
"age":"30"
}
}
{
"_id": ObjectId('59537b7fe08062b9ee8dfdf7'),
"admin": {
"model": "abc",
"version": "01" ,
"name":"john",
"age":"30"
}
}
{
"_id": ObjectId('59537b7fe08062b9ee8dfdf8'),
"admin": {
"model": "def",
"version": "00" ,
"name":"cena",
"age":"30"
}
}
I have two same models with different versions.I want to query for model with maximum version. I tried by simply sorting the version it does not work for me.
I am expecting output like this
{
"_id": ObjectId('59537b7fe08062b9ee8dfdf7'),
"admin": {
"model": "abc",
"version": "01" ,
"name":"john",
"age":"30"
}
}
{
"_id": ObjectId('59537b7fe08062b9ee8dfdf8'),
"admin": {
"model": "def",
"version": "00" ,
"name":"cena",
"age":"30"
}
}
Any suggestions will be really helpful.
As Neil said, it is $sort, $group, and $replaceRoot, but with correct values in the query:
db.collection.aggregate([
{ "$sort": { "admin.version": -1 } },
{ "$group": {
"_id": "$admin.model" ,
"admin": { "$first": "$$ROOT" }
}},
{ "$replaceRoot": { "newRoot": "$admin" } }
])