Remove multiple objects from nested array 3 - mongodb

I try to clean my collection with single update query , need to remove some deeply nested objects , but without breaking other objects , here is a good solution provided by #rickhg12hs:
Remove multiple objects from deeply nested array 2
but it has small drawback , it is breaking the content of _a._p object when there is no _a._p.s object inside...
and original solution provided by #nimrod serok:
Remove multiple elements from deep nested array with single update query
but it has other issue , when there is missing "_a._p.s.c" , "_a._p.s.d" or "_a._p.s.a" object it add objects with null values instead which afcourse is not expected ...
Playground test
This are 2x example original documents:
[
{
"_id": ObjectId("5c05984246a0201286d4b57a"),
f: "x",
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
"s": {
"a": {
"t": [
{
id: 1,
"dateP": "20200-09-20",
did: "x",
dst: "y",
den: "z"
},
{
id: 2,
"dateP": "20200-09-20"
}
]
},
"c": {
"t": [
{
id: 3,
"dateP": "20300-09-22"
},
{
id: 4,
"dateP": "20300-09-23",
did: "x",
dst: "y",
den: "z"
},
{
id: 5,
"dateP": "20300-09-23"
}
]
}
}
}
}
]
},
{
"_id": ObjectId("5c05984246a0201286d4b57b"),
f: "x",
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
_t: "Some field",
_x: "Some other field"
}
}
]
}
]
Expected result after update:
[
{
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
"s": {
"a": {
"t": [
{
"dateP": "20200-09-20",
"den": "z",
"did": "x",
"dst": "y",
"id": 1
}
]
},
"c": {
"t": [
{
"dateP": "20300-09-23",
"den": "z",
"did": "x",
"dst": "y",
"id": 4
}
]
}
}
}
}
],
"_id": ObjectId("5c05984246a0201286d4b57a"),
"f": "x"
},
{
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
_t: "Some field",
_x: "Some other field"
}
}
],
"_id": ObjectId("5c05984246a0201286d4b57b"),
"f": "x"
}
]
The goal is with single update query to remove any objects under _a._p.s.[a|c|d].t where the fields did,dst and den are missing but without breaking other objects _a._p where _a._p.s do not exists ...

Looks like a small change to #rickhg12hs's answer can solve this:
db.collection.update({},
[
{$set: {
_a: {$map: {
input: "$_a",
as: "elem",
in: {$cond: [
{$or: [
{$eq: [{$type: "$$elem._p"}, "missing"]},
{$eq: [{$type: "$$elem._p.s"}, "missing"]}
]},
"$$elem",
{
_p: {s: {
$arrayToObject: {$map: {
input: {$objectToArray: "$$elem._p.s"},
as: "anyKey",
in: {
k: "$$anyKey.k",
v: {
t: {$filter: {
input: "$$anyKey.v.t",
as: "t",
cond: {$setIsSubset: [
["did", "dst", "den"],
{$map: {
input: {$objectToArray: "$$t"},
in: "$$this.k"
}}
]}
}}
}
}
}}
}
}}
]}
}}
}}
],
{
"multi": true
})
See how it works on the playground example

Related

mongoDB count deeply nested array elements filtered by and condition

I am attempting to count elements in 3x nested array , I have some progress , but struggling to filter based on lang:"EN" condition inside the 2x $reduced 1x filter :
Here is example document:
{
"_id": ObjectId("5c05984246a0201286d4b57a"),
f: "x",
"_a": [
{
"_onlineStore": {}
},
{
"_p": [
{
"pid": 1,
"s": {
"a": {
"t": [
{
id: 1,
"dateP": "20200-09-20",
lang: "EN"
},
{
id: 2,
"dateP": "20200-09-20",
lang: "En"
}
]
},
"c": {
"t": [
{
id: 3,
lang: "en"
},
{
id: 4,
lang: "En"
},
{
id: 5,
"dateP": "20300-09-23"
}
]
}
},
h: "Some data"
}
]
}]
}
And here is my attempt ( just need to filter only the elements with lang:"EN"
db.collection.aggregate([
{
$project: {
res: {
$reduce: {
input: "$_a",
initialValue: [],
in: {
$concatArrays: [
"$$value",
{
"$cond": {
"if": {
"$eq": [
{
"$type": "$$this._p"
},
"array"
]
},
"then": {
$reduce: {
input: "$$this._p",
initialValue: [],
in: {
$concatArrays: [
"$$value",
{
"$filter": {
"input": {
"$objectToArray": "$$this.s"
},
"as": "f",
"cond": {
"$eq": [
"$$f.k",
"c"
]
}
}
}
]
}
}
},
"else": []
}
}
]
}
}
}
}
},
{
$unwind: "$res"
},
{
$unwind: "$res.v.t"
},
{
$count: "Total"
}
])
I need to count all _a[]._p[].s.c.t[] where lang:"EN","en","En" , note at object s there is multiple nested elements c , a , d , etc , only the c need to be counted where lang:"EN" , I managed to filter only the "c" but struggling to add the lang:"EN","en","En" inside the $filter.cond , can anybody help here?
Expected playground output is:
{count:7}
I can add final $match condition and clear the lang:EN , but I am wondering if there is better option to be done inside the reduce/reduce/objectToArray/cond and avoid the $unwind's?
Playgorund
One option to avoid $unwind is:
db.collection.aggregate([
{$match: {"_a._p.s.c.t": {$elemMatch: {lang: {$in: ["EN", "En", "en"]}}}}},
{$project: {
res: {$reduce: {
input: "$_a._p.s.c.t",
initialValue: [],
in: {$concatArrays: ["$$value", "$$this"]}
}}
}},
{$project: {
res: {$reduce: {
input: "$res",
initialValue: 0,
in: {$sum: [
"$$value",
{$size: {$filter: {
input: "$$this",
as: "inner",
cond: {$in: ["$$inner.lang", ["EN", "En", "en"]]}
}}}
]}
}}
}},
{$group: {_id: 0, count: {$sum: "$res"}}}
])
See how it works on the playground example

mongoDB count array elements in deep nested array objects

I am attempting to prepare aggregation query for faster deep nested elements count , collection is pretty big(100M docs / 1TB / mongodb 4.4) so any $unwind's make the task very slow , please, advice if there is any option to use $reduce / $filter or other faster option:
Example document:
{
"_id": ObjectId("5c05984246a0201286d4b57a"),
f: "x",
"_a": [
{
"_onlineStore": {}
},
{
"_p": [
{
"pid": 1,
"s": {
"a": {
"t": [
{
id: 1,
"dateP": "20200-09-20",
lang: "EN"
},
{
id: 2,
"dateP": "20200-09-20",
lang: "En"
}
]
},
"c": {
"t": [
{
id: 3,
lang: "en"
},
{
id: 4,
lang: "En"
},
{
id: 5,
"dateP": "20300-09-23"
}
]
}
},
h: "Some data"
}
]
}
]
}
I need to count number of "_a[]._p[]._s.c.t[]" array elements where lang: $in:["En","en" ,"EN","En","eN"]
Note elements under "_a._p._s.a.t" or "_a._p._s.d.t" shall not be included in the count ...
Expected result 1:
{ count:2}
Expected result 2:
{
id: 3,
lang: "en"
},
{
id: 4,
lang: "En"
}
Please, advice?
Thanks
1.Extended example that need to be fixed playground (count expected to be 8)
Here is my unwind version , but for big collection it looks pretty expensive:
2. Playground unwind version ( expensive )
db.myCollection.aggregate([
{
$project: {
count: {
$size: {
$filter: {
input: "$_a._p.s.t",
as: "t",
cond: { $ne: ["$$t", null] }
}
}
}
}
}
])

Remove multiple objects from nested array 4

please, help in my attempt to clean my documents from corrupted sub-objects , few solutions proposed in previous questions work for most of the cases , but there is specific cases where there is more objects at same nested level that shall not be cleaned:
Example document:
{
"_id": ObjectId("5c05984246a0201286d4b57a"),
f: "x",
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
"s": {
"a": {
"t": [
{
id: 1,
"dateP": "20200-09-20",
did: "x",
dst: "y",
den: "z"
},
{
id: 2,
"dateP": "20200-09-20"
}
]
},
"c": {
"t": [
{
id: 3,
"dateP": "20300-09-22"
},
{
id: 4,
"dateP": "20300-09-23",
did: "x",
dst: "y",
den: "z"
},
{
id: 5,
"dateP": "20300-09-23"
}
]
}
},
h: "This is cleaned but it shauld not"
}
}
]
}
All objects where did,dst,den are missing from _a._p.s.[a|c|d].t need to be removed ,
expected result:
[
{
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
"s": {
"a": {
"t": [
{
"dateP": "20200-09-20",
"den": "z",
"did": "x",
"dst": "y",
"id": 1
}
]
},
"c": {
"t": [
{
"dateP": "20300-09-23",
"den": "z",
"did": "x",
"dst": "y",
"id": 4
}
]
}
},
h: "This is cleaned but it shauld not"
}
}
],
"_id": ObjectId("5c05984246a0201286d4b57a"),
"f": "x"
}
]
Very good solutions provided by #nimrod serok & #rickhg12hs here: , but unfortunatelly not working for all cases , for example for cases where there is more key/values at the level "_a._p" beside "s" the other key/values beside "s" are cleaned like _a._p.h:"..." in the example , please, advice if there is any easy option to be solved with mongo update query?
Playground example
One option is to add $mergeObjects to the party:
db.collection.update({},
[
{
"$set": {
"_a": {
"$map": {
"input": "$_a",
"as": "elem",
"in": {
"$cond": [
{
$or: [
{
"$eq": [
{
"$type": "$$elem._p"
},
"missing"
]
},
{
"$eq": [
{
"$type": "$$elem._p.s"
},
"missing"
]
}
]
},
"$$elem",
{
$mergeObjects: [
"$$elem._p",
{
"s": {
"$arrayToObject": {
"$map": {
"input": {
"$objectToArray": "$$elem._p.s"
},
"as": "anyKey",
"in": {
"k": "$$anyKey.k",
"v": {
"t": {
"$filter": {
"input": "$$anyKey.v.t",
"as": "t",
"cond": {
"$setIsSubset": [
[
"did",
"dst",
"den"
],
{
"$map": {
"input": {
"$objectToArray": "$$t"
},
"in": "$$this.k"
}
}
]
}
}
}
}
}
}
}
}
}
]
}
]
}
}
}
}
}
],
{
"multi": true
})
See how it works on the playground example

Remove multiple objects from deeply nested array 2

I need to remove some inconsistent objects not having did,dst and den from deeply nested array , please, advice if this can be done with single update query for all documents in the collection ?
This is example of my original document:
[
{
"_id": ObjectId("5c05984246a0201286d4b57a"),
f: "x",
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
"s": {
"a": {
"t": [
{
id: 1,
"dateP": "20200-09-20",
did: "x",
dst: "y",
den: "z"
},
{
id: 2,
"dateP": "20200-09-20"
}
]
},
"c": {
"t": [
{
id: 3,
"dateP": "20300-09-22",
},
{
id: 4,
"dateP": "20300-09-23",
did: "x",
dst: "y",
den: "z"
},
{
id: 5,
"dateP": "20300-09-23",
}
]
}
}
}
}
]
}
]
After the update , the document need to look as follow:
[
{
"_id": ObjectId("5c05984246a0201286d4b57a"),
f: "x",
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
"s": {
"a": {
"t": [
{
id: 1,
"dateP": "20200-09-20",
did: "x",
dst: "y",
den: "z"
}
]
},
"c": {
"t": [
{
id: 4,
"dateP": "20300-09-23",
did: "x",
dst: "y",
den: "z"
}
]
}
}
}
}
]
}
]
Please, note a.t , c.t and d.t are all possible objects inside s object , but they are not compulsory in all documents so in some documents they can be missing , in other documents there can be only a.t and c.t ,but not d.t ...
#nimrod serok helped with a partial solution here:
Remove multiple elements from deep nested array with single update query
, but there is a small drawback , missing a,c, or d objects in original document do not need to appear in the resulting document as null since they do not exist and not expected:
playground
( d.t:null and c.t:null shall not appear after the update )
Here's one way you could do it where the field name after _p.s could be anything. It feels a bit fragile though since all the other field names and depths need to be constant.
db.collection.update({},
[
{
"$set": {
"_a": {
"$map": {
"input": "$_a",
"as": "elem",
"in": {
"$cond": [
{"$eq": [{"$type": "$$elem._p"}, "missing"]},
"$$elem",
{
"_p": {
"s": {
"$arrayToObject": {
"$map": {
"input": {"$objectToArray": "$$elem._p.s"},
"as": "anyKey",
"in": {
"k": "$$anyKey.k",
"v": {
"t": {
"$filter": {
"input": "$$anyKey.v.t",
"as": "t",
"cond": {
"$setIsSubset": [
["did", "dst", "den"],
{
"$map": {
"input": {"$objectToArray": "$$t"},
"in": "$$this.k"
}
}
]
}
}
}
}
}
}
}
}
}
}
]
}
}
}
}
}
],
{"multi": true}
)
Try it on mongoplayground.net.

How to get argmax/argmin of multiple fields simultaneously in mongodb?

Here's the data example I'm working with.
[
{
"uid": "111",
"a": 1,
"b": 3,
"c": 1,
},
{
"uid": "222",
"a": 2,
"b": 2,
"c": 2
},
{
"uid": "333",
"a": 3,
"b": 1,
"c": 3
}
]
Then I want to perform argmax on fields "a" and "b", and argmin on field "c" and return the "uid" as the result.
For example:
For "a", it's maximum value is 3, the corresponding "uid" is "333", so argmax of "a" should be "uid" : "333".
The question is what query should be executed so that I can get the result as below?
[
{
"argmax_of_a": "333",
"argmax_of_b": "111",
"argmin_of_c": "111",
}
]
Here's the code snipped I'm playing with https://mongoplayground.net/p/gEDuHd-aCiZ
I can find someway to get argmax/argmin of one specific field, but I have no idea how to work on multiple fields simultaneously.
Thanks in advance!
give this aggreation pipeline a try:
db.collection.aggregate(
[
{
$group: {
_id: null,
a: { $push: { uid: '$uid', val: '$a' } },
b: { $push: { uid: '$uid', val: '$b' } },
c: { $push: { uid: '$uid', val: '$c' } }
}
},
{
$project: {
_id: 0,
max_of_a: { $arrayElemAt: ["$a", { $indexOfArray: ["$a.val", { $max: '$a.val' }] }] },
max_of_b: { $arrayElemAt: ["$b", { $indexOfArray: ["$b.val", { $max: '$b.val' }] }] },
max_of_c: { $arrayElemAt: ["$c", { $indexOfArray: ["$c.val", { $max: '$c.val' }] }] }
}
},
{
$project: {
arg_max_of_a: '$max_of_a.uid',
arg_max_of_b: '$max_of_b.uid',
arg_max_of_c: '$max_of_c.uid'
}
}
])