"iterate" through all document fields in mongodb - mongodb

I have a collection with documents in this form:
{
"fields_names": ["field1", "field2", "field3"]
"field1": 1,
"field2": [1, 2, 3]
"field3": "12345"
}
where field1, field2, field3 are "dynamic" for each document (I have for each document the fields names in the "fields_names" array)
I would like to test whether 2 documents are equals using the aggregation framework.
I used $lookup stage for getting another documents.
My issue is: how can I "iterate" through the whole fields for my collection?
db.collection.aggregate([
{
{$match: "my_id": "test_id"},
{$lookup:
from: "collection"
let: my_id: "$my_id", prev_id: "$_id"
pipeline: [
{$match: "my_id": "$$my_id", "_id": {$ne: "$$prev_id"}}
]
as: "lookup_test"
}
}])
and in the pipeline of the lookup, I would like to iterate the "fields_names" array for getting the names of the fields, and then access their value and compare between the "orig document" (not the $lookup) and the other documents ($lookup documents).
OR: just to iterate all fields (not include the "fields_names" array)
I would like to fill the "lookup_test" array with all documents which as the same fields values..

You will have to compare the two "partial" parts of the document meaning you'll have to ( for each document ) do this in the $lookup, needless to say this is going to be a -very- expensive pipeline. With that said here's how I would do it:
db.collection.aggregate([
{
$match: {
"my_id": "test_id"
}
},
{
"$lookup": {
"from": "collection",
"let": {
id: "$_id",
partialRoot: {
$filter: {
input: {
"$objectToArray": "$$ROOT"
},
as: "fieldObj",
cond: {
"$setIsSubset": [
[
"$$fieldObj.k"
],
"$fields_names"
]
}
}
}
},
pipeline: [
{
$match: {
$expr: {
$and: [
{
$ne: [
"$$id",
"$_id"
]
},
{
$eq: [
{
$size: "$$partialRoot"
},
{
$size: {
"$setIntersection": [
"$$partialRoot",
{
$filter: {
input: {
"$objectToArray": "$$ROOT"
},
as: "fieldObj",
cond: {
"$setIsSubset": [
[
"$$fieldObj.k"
],
"$fields_names"
]
}
}
}
]
}
}
]
}
]
}
}
},
],
"as": "x"
}
}
])
Mongo Playground
If you could dynamically build the query through code you could make this much more efficient by using the same match query in the $lookup stage like so:
const query = { my_id: "test_id" };
db.collection.aggregate([
{
$match: query
},
{
$lookup: {
...
pipeline: [
{ $match: query },
... rest of pipeline ...
]
}
}
])
This way you're only matching documents who at least match the initial query, this should drastically improve query performance ( obviously dependant on field x value entropy )
One more caveat to note is that if x document match you will get the same result x times, meaning you probably want to add $limit: 1 stage to your pipeline.

Related

Find MongoDB documents that are not contained across arrays

MongoDB Collection A contains documents with an array with some document ids of collection B:
Collection A:
{
some_ids_of_b: ["id1", ...]
}
Collection B:
{
_id: "id1"
},
{
_id: "id2"
},
...
How do I query all documents from B whose _ids are NOT in contained in the some_ids_of_b arrays of documents of A?
Simple lookup from collection B to A and filter to keep only those documents where you don't find any matches.
db.collb.aggregate([
{
"$lookup": {
"from": "colla",
"localField": "_id",
"foreignField": "someIdsOfB",
"as": "a"
}
},
{
$match: {
$expr: {
$eq: [{$size: "$a"}, 0]
}
}
}
])
Demo
One option is:
db.collectionB.aggregate([
{$lookup: {
from: "collectionA",
let: {my_id: "$_id"},
pipeline: [
{$match: {$and: [
{_id: collADocId},
{$expr: {$in: ["$$my_id", "$some_ids_of_b"]}}
]}},
{$project: {_id: 1}}
],
as: "some_ids_of_b"
}},
{$match: {"some_ids_of_b.0": {$exists: false}}},
{$unset: "some_ids_of_b"}
])
See how it works on the playground example
You can do it with Aggregation Framework:
$group and $addToSet - To get all $some_ids_of_b from all the documents in A collection.
$set with $reduce - To create an array with all unique values of the IDs from the B collection.
$lookup - To fetch the documents from the B collection, where the _id of the document is not present in the $b_ids array.
$project - To project data as expected output.
db.A.aggregate([
{
"$group": {
"_id": null,
"b_ids": {
"$addToSet": "$some_ids_of_b"
}
}
},
{
"$set": {
b_ids: {
$reduce: {
input: "$b_ids",
initialValue: [],
in: {
$setUnion: [
"$$value",
"$$this"
]
}
}
}
}
},
{
"$lookup": {
from: "B",
let: {
b_ids: "$b_ids"
},
pipeline: [
{
"$match": {
"$expr": {
$ne: [
{
"$in": [
"$_id",
"$$b_ids"
]
},
true
]
}
}
}
],
as: "data"
}
},
{
"$project": {
data: 1,
_id: 0
}
}
])
Working Example

How to ggregate two collections and match field with array

I need to group the results of two collections candidatos and ofertas, and then "merge" those groups to return an array with matched values.
I've created this example with the aggregate and similar data to make this easier to test:
https://mongoplayground.net/p/m0PUfdjEye4
This is the explanation of the problem that I'm facing.
I can get both groups with the desired results independently:
candidatos collection:
db.getCollection('ofertas').aggregate([
{"$group" : {_id:"$ubicacion_puesto.provincia", countProvinciaOferta:{$sum:1}}}
]);
This is the result...
ofertas collection:
db.getCollection('candidatos').aggregate([
{"$group" : {_id:"$que_busco.ubicacion_puesto_trabajo.provincia", countProvinciaCandidato:{$sum:1}}}
]);
This is the result...
What I need to do, is to aggregate those groups to merge their results based on their _id coincidence. I think I'm going in the right way with the next aggregate, but the field countOfertas always returns 0.0. I think that there is something wrong in my project $cond, but I don't know what is it. This is the aggregate:
db.getCollection('candidatos').aggregate([
{"$group" : {_id:"$que_busco.ubicacion_puesto_trabajo.provincia", countProvinciaCandidato:{$sum:1}}},
{
$lookup: {
from: 'ofertas',
let: {},
pipeline: [
{"$group" : {_id:"$ubicacion_puesto.provincia", countProvinciaOferta:{$sum:1}}}
],
as: 'ofertas'
}
},
{
$project: {
_id: 1,
countProvinciaCandidato: 1,
countOfertas: {
$cond: {
if: {
$eq: ['$ofertas._id', "$_id"]
},
then: '$ofertas.countProvinciaOferta',
else: 0,
}
}
}
},
{ $sort: { "countProvinciaCandidato": -1}},
{ $limit: 20 }
]);
And this is the result, but as you can see, field countOfertas is always 0
Any kind of help will be welcome
What you have tried is so much appreciated. But in $project you need to use $reduce which helps to loop through the array and satisfy the condition
Here is the code
db.candidatos.aggregate([
{
"$group": {
_id: "$que_busco.ubicacion_puesto_trabajo.provincia",
countProvinciaCandidato: { $sum: 1 }
}
},
{
$lookup: {
from: "ofertas",
let: {},
pipeline: [
{
"$group": {
_id: "$ubicacion_puesto.provincia",
countProvinciaOferta: { $sum: 1 }
}
}
],
as: "ofertas"
}
},
{
$project: {
_id: 1,
countProvinciaCandidato: 1,
countOfertas: {
"$reduce": {
"input": "$ofertas",
initialValue: 0,
"in": {
$cond: [
{ $eq: [ "$$this._id", "$_id" ] },
{ $add: [ "$$value", 1 ] },
"$$value"
]
}
}
}
}
},
{ $sort: { "countProvinciaCandidato": -1 } },
{ $limit: 20 }
])
Working Mongo playground
Note : If you need to do with aggregations only, this is fine. But I personally feel this approach is not good. My suggestion is, you can concurrently call group aggregations in different service and do it with programmatically. Because $lookup is expensive, when you get massive data, this performance will be reduced
The $eq in the $cond is comparing an array to an ObjectId, so it never matches.
The $lookup stage results will be in the ofertas field as an array of documents, so '$ofertas._id' will be an array of all the _id values.
You will probably need to use $unwind, $reduce after the $lookup.

Array is reordered when using $lookup

I have this aggregation:
db.getCollection("users").aggregate([
{
"$match": {
"_id": "5a708a38e6a4078bd49f01d5"
}
},
{
"$lookup": {
"from": "user-locations",
"localField": "locations",
"as": "locations",
"foreignField": "_id"
}
}
])
It works well, but there is one small thing that I don't understand and I can't fix.
In the query output, the locations array is reordered by ObjectId and I really need to keep the original order of data.
Here is how the locations array from the users collection looks like
'locations' : [
ObjectId("5b55e9820b720a1a7cd19633"),
ObjectId("5a708a38e6a4078bd49ef13f")
],
And here is the result after the aggregation:
'locations' : [
{
'_id' : ObjectId("5a708a38e6a4078bd49ef13f"),
'name': 'Location 2'
},
{
'_id' : ObjectId("5b55e9820b720a1a7cd19633"),
'name': 'Location 1'
}
],
What am I missing here? I really have no idea how to proceed with this issue.
Could you give me a push?
$lookup does not guarantee order of result documents, you can try a approach to manage natural order of document,
$unwind deconstruct locations array and add auto index number will start from 0,
$lookup with locations
$set to select first element from locations
$sort by index field in ascending order
$group by _id and reconstruct locations array
db.users.aggregate([
{ $match: { _id: "5a708a38e6a4078bd49f01d5" } },
{
$unwind: {
path: "$locations",
includeArrayIndex: "index"
}
},
{
$lookup: {
from: "user-locations",
localField: "locations",
foreignField: "_id",
as: "locations"
}
},
{ $set: { locations: { $arrayElemAt: ["$locations", 0] } } },
{ $sort: { index: 1 } },
{
$group: {
_id: "$_id",
locations: { $push: "$locations" }
}
}
])
Playground
From this closed bug report:
When using $lookup, the order of the documents returned is not guaranteed. The documents are returned in "natural order" - as they are encountered in the database. The only way to get a guaranteed consistent order is to add a $sort stage to the query.
Basically the way any Mongo query/pipeline works is that it returns documents in the order they were matched, meaning the "right" order is not guaranteed especially if there's indes usage involved.
What you should do is add a $sort stage as suggested, like so:
db.collection.aggregate([
{
"$match": {
"_id": "5a708a38e6a4078bd49f01d5"
}
},
{
"$lookup": {
"from": "user-locations",
"let": {
"locations": "$locations"
},
"pipeline": [
{
"$match": {
"$expr": {
"$setIsSubset": [
[
"$_id"
],
"$$locations"
]
}
}
},
{
$sort: {
_id: 1 // any other sort field you want.
}
}
],
"as": "locations",
}
}
])
You can also keep the original $lookup syntax you're using and just $unwind, $sort and then $group to restore the structure.

Accessing a random field using other field value

I have a document like this:
{
value: "field2",
field1: [ ... ],
field2: [ ... ],
...
}
Where value will be the value of one of the fields in the document. and many different fields are possible for one document.
I want to match a document. fetch the relevant field only and them do some calculations on it.
For example I want to do:
{
$unwind: "$value"
}
And get the results of field2 unwinded.
How can I do this?
It's a little bit "hacky" but you can achieve this using operators like $objectToArray and $filter like so:
db.collection.aggregate([
{
$addFields: {
"values": {
$arrayElemAt: [
{
$filter: {
input: {
$objectToArray: "$$ROOT"
},
as: "field",
cond: {
$eq: [
"$$field.k",
"$value"
]
}
}
},
0
]
}
}
},
{
$unwind: "$values.v"
},
{
$replaceRoot: {
newRoot: "$values.v"
}
},
])
MongoPlayground

MongoDb Aggregate nested documents with $add

I need to get sum value from nested documents.
DB document:
{
"_id": 123,
"products": [
{
"productId": 1,
"charges": [
{
"type": "che",
"amount": 100
}
]
}
]
}
i wanted to get sum value.
sumValue = products.charges.amount+20; where "products.productId" is 1 and "products.charges.type" is "che"
i tried below query but no hope:
db.getCollection('test').aggregate(
[
{"$match":{$and:[{"products.productId": 14117426}, {"products.charges.type":"che"}]},
{ $project: { "_id":0, total: { $add: [ "$products.charges.price", 20 ] } }}
]
)
please help me to solve this.
You have to take a look at $unwind operator which deconstructs an array to output a document for each element of array. Also take a look at add and project operators.
I assume your db query should look like this:
db.test.aggregate([
{$unwind: '$products'}, // Unwind products array
{$match: {'products.productId' : 3}}, // Matching product id
{$unwind: '$products.charges'}, // Unwind charges
{$match: {'products.charges.type' : 'che'}}, // Matching charge type of che
{$project: {'with20': {$add: ["$products.charges.amount", 20]}}}, // project total field which is value + 20
{$group: {_id : null, amount: { $sum: '$with20' }}} // total sum
])
You can run $reduce twice to convert your arrays into scalar value. The outer condition could be applied as $filter, the inner one can be run as $cond:
db.collection.aggregate([
{
"$project": {
_id: 0,
total: {
$reduce: {
input: { $filter: { input: "$products", cond: [ "$$this.productId", 1 ] } },
initialValue: 20,
in: {
$add: [
"$$value",
{
$reduce: {
input: "$$this.charges",
initialValue: 0,
in: {
$cond: [ { $eq: [ "$$this.type", "che" ] }, "$$this.amount", 0 ]
}
}
}
]
}
}
}
}
}
])
Mongo Playground