mongoDB count array elements in deep nested array objects - mongodb

I am attempting to prepare aggregation query for faster deep nested elements count , collection is pretty big(100M docs / 1TB / mongodb 4.4) so any $unwind's make the task very slow , please, advice if there is any option to use $reduce / $filter or other faster option:
Example document:
{
"_id": ObjectId("5c05984246a0201286d4b57a"),
f: "x",
"_a": [
{
"_onlineStore": {}
},
{
"_p": [
{
"pid": 1,
"s": {
"a": {
"t": [
{
id: 1,
"dateP": "20200-09-20",
lang: "EN"
},
{
id: 2,
"dateP": "20200-09-20",
lang: "En"
}
]
},
"c": {
"t": [
{
id: 3,
lang: "en"
},
{
id: 4,
lang: "En"
},
{
id: 5,
"dateP": "20300-09-23"
}
]
}
},
h: "Some data"
}
]
}
]
}
I need to count number of "_a[]._p[]._s.c.t[]" array elements where lang: $in:["En","en" ,"EN","En","eN"]
Note elements under "_a._p._s.a.t" or "_a._p._s.d.t" shall not be included in the count ...
Expected result 1:
{ count:2}
Expected result 2:
{
id: 3,
lang: "en"
},
{
id: 4,
lang: "En"
}
Please, advice?
Thanks
1.Extended example that need to be fixed playground (count expected to be 8)
Here is my unwind version , but for big collection it looks pretty expensive:
2. Playground unwind version ( expensive )

db.myCollection.aggregate([
{
$project: {
count: {
$size: {
$filter: {
input: "$_a._p.s.t",
as: "t",
cond: { $ne: ["$$t", null] }
}
}
}
}
}
])

Related

Remove multiple objects from nested array 3

I try to clean my collection with single update query , need to remove some deeply nested objects , but without breaking other objects , here is a good solution provided by #rickhg12hs:
Remove multiple objects from deeply nested array 2
but it has small drawback , it is breaking the content of _a._p object when there is no _a._p.s object inside...
and original solution provided by #nimrod serok:
Remove multiple elements from deep nested array with single update query
but it has other issue , when there is missing "_a._p.s.c" , "_a._p.s.d" or "_a._p.s.a" object it add objects with null values instead which afcourse is not expected ...
Playground test
This are 2x example original documents:
[
{
"_id": ObjectId("5c05984246a0201286d4b57a"),
f: "x",
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
"s": {
"a": {
"t": [
{
id: 1,
"dateP": "20200-09-20",
did: "x",
dst: "y",
den: "z"
},
{
id: 2,
"dateP": "20200-09-20"
}
]
},
"c": {
"t": [
{
id: 3,
"dateP": "20300-09-22"
},
{
id: 4,
"dateP": "20300-09-23",
did: "x",
dst: "y",
den: "z"
},
{
id: 5,
"dateP": "20300-09-23"
}
]
}
}
}
}
]
},
{
"_id": ObjectId("5c05984246a0201286d4b57b"),
f: "x",
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
_t: "Some field",
_x: "Some other field"
}
}
]
}
]
Expected result after update:
[
{
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
"s": {
"a": {
"t": [
{
"dateP": "20200-09-20",
"den": "z",
"did": "x",
"dst": "y",
"id": 1
}
]
},
"c": {
"t": [
{
"dateP": "20300-09-23",
"den": "z",
"did": "x",
"dst": "y",
"id": 4
}
]
}
}
}
}
],
"_id": ObjectId("5c05984246a0201286d4b57a"),
"f": "x"
},
{
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
_t: "Some field",
_x: "Some other field"
}
}
],
"_id": ObjectId("5c05984246a0201286d4b57b"),
"f": "x"
}
]
The goal is with single update query to remove any objects under _a._p.s.[a|c|d].t where the fields did,dst and den are missing but without breaking other objects _a._p where _a._p.s do not exists ...
Looks like a small change to #rickhg12hs's answer can solve this:
db.collection.update({},
[
{$set: {
_a: {$map: {
input: "$_a",
as: "elem",
in: {$cond: [
{$or: [
{$eq: [{$type: "$$elem._p"}, "missing"]},
{$eq: [{$type: "$$elem._p.s"}, "missing"]}
]},
"$$elem",
{
_p: {s: {
$arrayToObject: {$map: {
input: {$objectToArray: "$$elem._p.s"},
as: "anyKey",
in: {
k: "$$anyKey.k",
v: {
t: {$filter: {
input: "$$anyKey.v.t",
as: "t",
cond: {$setIsSubset: [
["did", "dst", "den"],
{$map: {
input: {$objectToArray: "$$t"},
in: "$$this.k"
}}
]}
}}
}
}
}}
}
}}
]}
}}
}}
],
{
"multi": true
})
See how it works on the playground example

MongoDB - Update nested array with many nested objects

I need to update the dateP in the following structure with "2022-01-02", but it seems not an easy task:
{
"_id": ObjectId("5c05984246a0201286d4b57a"),
"_a": [
{
"_p": {
"s": {
"a": {}
}
}
},
{
"_onlineStore": {}
},
{
"_p": {
"s": {
"a": {
"t": [
{
c: 4
},
{
"dateP": "20200-09-20",
"l": "English",
"size": "XXL"
},
{
c: 1
}
]
},
c: {
t: 2
}
}
}
}
]
}
playground
I attempted with arrayFilters, but without success as not all elements exist in all documents and some documents are pretty empty. Please advice.
MongoDB 4.2 community
Believe that you need the filtered positional operator for _a array to check whether the document has the _p field or not.
db.collection.update({},
{
$set: {
"_a.$[a]._p.s.a.t.$[x].dateP": "2022-01-02"
}
},
{
arrayFilters: [
{
"a._p": {
$exists: true
}
},
{
"x.dateP": "20200-09-20"
}
]
})
Demo # Mongo Playground

How to get argmax/argmin of multiple fields simultaneously in mongodb?

Here's the data example I'm working with.
[
{
"uid": "111",
"a": 1,
"b": 3,
"c": 1,
},
{
"uid": "222",
"a": 2,
"b": 2,
"c": 2
},
{
"uid": "333",
"a": 3,
"b": 1,
"c": 3
}
]
Then I want to perform argmax on fields "a" and "b", and argmin on field "c" and return the "uid" as the result.
For example:
For "a", it's maximum value is 3, the corresponding "uid" is "333", so argmax of "a" should be "uid" : "333".
The question is what query should be executed so that I can get the result as below?
[
{
"argmax_of_a": "333",
"argmax_of_b": "111",
"argmin_of_c": "111",
}
]
Here's the code snipped I'm playing with https://mongoplayground.net/p/gEDuHd-aCiZ
I can find someway to get argmax/argmin of one specific field, but I have no idea how to work on multiple fields simultaneously.
Thanks in advance!
give this aggreation pipeline a try:
db.collection.aggregate(
[
{
$group: {
_id: null,
a: { $push: { uid: '$uid', val: '$a' } },
b: { $push: { uid: '$uid', val: '$b' } },
c: { $push: { uid: '$uid', val: '$c' } }
}
},
{
$project: {
_id: 0,
max_of_a: { $arrayElemAt: ["$a", { $indexOfArray: ["$a.val", { $max: '$a.val' }] }] },
max_of_b: { $arrayElemAt: ["$b", { $indexOfArray: ["$b.val", { $max: '$b.val' }] }] },
max_of_c: { $arrayElemAt: ["$c", { $indexOfArray: ["$c.val", { $max: '$c.val' }] }] }
}
},
{
$project: {
arg_max_of_a: '$max_of_a.uid',
arg_max_of_b: '$max_of_b.uid',
arg_max_of_c: '$max_of_c.uid'
}
}
])

Add field to $map entry in MongoDB

I have MongoDB collection items with following document:
{
"values": [
{ "number1": 5, "number2": 6, "anotherProp": "...", "anotherProp2": "..." },
{ "number1": 8, "number2": 1, "anotherProp": "...", "anotherProp2": "..." }
]
}
Is there any way to add sum property to each item of values (sum = number1 + number2)? I would like to avoid naming all other properties (number1, number2, anotherProp, anotherProp2, ...), only add new one (sum). My current solution is:
db.items.aggregate([{
$project: {
values: {
$map: {
input: "$values",
as: "v",
in: {
sum: {$add: ["$$v.number1", "$$v.number2"]},
number1: "$$v.number1", // This and next 3 lines I would like to omit.
number2: "$$v.number2",
anotherProp: "$$v.anotherProp",
anotherProp2: "$$v.anotherProp2"
}
}
}
}
}])
Desired result is:
{
"values": [
{ "number1": 5, "number2": 6, "anotherProp": "...", "anotherProp2": "...", "sum": 11 },
{ "number1": 8, "number2": 1, "anotherProp": "...", "anotherProp2": "...", "sum": 9 }
]
}
Is there any way to do this? I tried use $addFields instead of $project, however result is same.
Yes, you can use $mergeObjects
db.collection.aggregate([
{
$project: {
values: {
$map: {
input: "$values",
as: "v",
in: {
"$mergeObjects": [
{
sum: {
$add: [
"$$v.number1",
"$$v.number2"
]
}
},
"$$v"
]
}
}
}
}
}
])
MongoPlayground

Mongodb Query for an array with more than word structure

If you have an array in mongodb as follows:
"tokens": [
{
"index": 1,
"word": "I",
"originalText": "I",
"lemma": "I",
"characterOffsetBegin": 0,
"characterOffsetEnd": 5,
"pos": "NNP",
"ner": "PERSON",
"before": "",
"after": " "
},
{
"index": 2,
"word": "played",
"originalText": "played",
"lemma": "play",
"characterOffsetBegin": 6,
"characterOffsetEnd": 11,
"pos": "VBZ",
"ner": "O",
"before": " ",
"after": " "
},
{
"index": 3,
"word": "football",
"originalText": "football",
"lemma": "football",
"characterOffsetBegin": 22,
"characterOffsetEnd": 24,
"pos": "IN",
"ner": "O",
"before": " ",
"after": " "
}
]
and I want to query this array as follows:
I need to check if the (word:I) and (word which contains word:regex(p.*) and pos:VBZ) are in this array or not? if yes I need to return that array.
$elemMatch didn't help as I search for two conditions in that array {"word":"I" and ("word":/p.* and "pos":"VBZ") together and in order
Anyone can help me in this issue?
OK, I think I get what you want, and it's a little tricky because:
If you didn't have the index field you would rely on the order of the array elements which is bad practice
The solution below is not generic and it will be hard to modify it if you want additional parameters (like more than 2 elements, more complicated regex).
What I wanted to achieve in this solution is to $filter the matched elements and check if the filtered indexes are $subtract into 1 which means they are in order:
db.test.aggregate([
{
$addFields: {
filtered_tokens: {
$filter: {
input: '$tokens',
as: 'token',
cond: {
$or: [
{
$eq: ['$$token.word', 'I']
},
{
$and: [
{
$eq: [{$substr: ['$$token.word', 0, 1]}, 'p']
},
{
$eq: ['$$token.pos', 'VBZ']
}
]
}
]
}
}
}
}
},
{
$match: {
filtered_tokens: {$size: 2}
}
},
{
$addFields: {
filtered_tokens: {
$subtract: [
{
$arrayElemAt: ['$filtered_tokens.index', 1]
},
{
$arrayElemAt: ['$filtered_tokens.index', 0]
}
]
}
}
},
{
$match: {
filtered_tokens: 1
}
}
])
I'm not sure I understood what you need, but I think this it what you are looking for:
db.test.find({
$and: [
{
'tokens.word': 'I'
},
{
tokens: {
$elemMatch: {
word: /p.*/,
pos: 'VBZ'
}
}
}
]
})