Use $lookup on a double nested array - mongodb

I'm trying to use an _id saved in a double nested array to find and attach data from a different document. It essentially looks like this:
doc1 = {
_id: ObjectId,
...other stuff...,
firstArray: [
...other stuff...,
secondArray: [
other_id: ObjectId
]
]
}
doc2 = {
_id: ObjectId,
...the stuff I want...
}
Not every entry in secondArray is going to contain an other_id field, only some. This has gotten me close to the result I want except that the firstArray field contains an entry for the total number of entries in the secondArray field. I think I'm just missing one step to essentially undo the one of the two $unwinds.
doc1.aggregate([
{ $match: { _id: req.params._id }},
{ $unwind: "$firstArray" },
{ $unwind: "$firstArray.secondArray" },
{ $lookup: {
from: "doc2",
localField: "firstArray.secondArray.other_id",
foreignField: "_id",
as: "firstArray.secondArray.other",
}},
{ $addFields: {
"firstArray.secondArray.other": {
$arrayElemAt: ["$firstArray.secondArray.other", 0]
}
}},
{ $group: {
_id: "$_id",
...other stuff...,
firstArray: { $push: "$firstArray" },
}},
]);

You need to add another $group stage to reconstruct the "secondArray" structure, in general it's easy to remember that for each $unwind action you do, you'll need to counter that with another $group in order to restore the initial structure.
In order to do that you'll need some kind of a unique identifier for firstArray so you can group by it, if it doesn't exist you can add an _id that is the index of the element in the array like so:
{
"$addFields": {
"firstArray": {
$map: {
input: {
$zip: {
inputs: [
"$firstArray",
{
$range: [
0,
{
$size: "$firstArray"
}
]
}
]
}
},
as: "item",
in: {
"$mergeObjects": [
{
"$arrayElemAt": [
"$$item",
0
]
},
{
_id: {
"$arrayElemAt": [
"$$item",
1
]
}
}
]
}
}
}
}
}
Now firstArray has a unique field _id that we can later use to group on like so:
... the pipeline ...
{
$group: {
_id: {
id: "$_id",
first: "$firstArray._id"
},
secondArray: {
$push: "$firstArray.secondArray"
}
}
},
{
$group: {
_id: "$id._id",
firstArray: {
$push: {
secondArray: "$secondArray"
}
},
}
}
You will need to add support for all the "other stuff" you defined in your own pipeline, but this pipeline flow is the way to go.
Mongo Playground

Related

MongoDB document merge without a-priori knowledge of fields

I would like to merge several documents. Most of the fields have the same values but there might be one or two fields that have different values. These fields are unknown beforehand. Ideally I would like to merge all the documents keeping the fields that are the same as is but creating an array of values only for those fields that have some variation.
For my first approach I grouped by a common field to my documents and kept the first document, this however discards some information that varies in other fields.
group_documents = {
"$group": {
"_id": "$0020000E.Value",
"doc": {
"$first": "$$ROOT"
}
}
}
merge_documents = {
"$replaceRoot": {
"newRoot": "$doc"
}
}
write_collection = { "$out": { "db": "database", "coll": "records_nd" } }
objects = coll.aggregate(pipeline)
IF the fields that have different values where known I would have done something like this,
merge_sol1
or
merge_sol2
or
merge_sol3
The third solution is actually very close to my desired output and I could tweak it a bit. But these answers assume a-priori knowledge of the fields to be merged.
You can first convert $$ROOT to array of k-v tuples by $objectToArray. Then, $group all fields by $addToSet to put all distinct values into an array first. Then, check the size of the result array and conditionally pick the first item if the array size is 1 (i.e. the value is the same for every documents in the field); Otherwise, keep the result array. Finally, revert back to original document form by $arrayToObject.
db.collection.aggregate([
{
$project: {
_id: "$key",
arr: {
"$objectToArray": "$$ROOT"
}
}
},
{
"$unwind": "$arr"
},
{
$match: {
"arr.k": {
$nin: [
"key",
"_id"
]
}
}
},
{
$group: {
_id: {
id: "$_id",
k: "$arr.k"
},
v: {
"$addToSet": "$arr.v"
}
}
},
{
$project: {
_id: "$_id.id",
arr: [
{
k: "$_id.k",
v: {
"$cond": {
"if": {
$gt: [
{
$size: "$v"
},
1
]
},
"then": "$v",
"else": {
$first: "$v"
}
}
}
}
]
}
},
{
"$project": {
doc: {
"$arrayToObject": "$arr"
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{
_id: "$_id"
},
"$doc"
]
}
}
}
])
Mongo Playground

How to query an array and retrieve it from MongoDB

Updated:
I have a document on the database that looks like this:
My question is the following:
How can I retrieve the first 10 elements from the friendsArray from database and sort it descending or ascending based on the lastTimestamp value.
I don't want to download all values to my API and then sort them in Python because that is wasting my resources.
I have tried it using this code (Python):
listOfUsers = db.user_relations.find_one({'userId': '123'}, {'friendsArray' : {'$orderBy': {'lastTimestamp': 1}}}).limit(10)
but it just gives me this error pymongo.errors.OperationFailure: Unknown expression $orderBy
Any answer at this point would be really helpful! Thank You!
use aggregate
first unwind
then sort according timestap
group by _id to create sorted array
use addfields and filter for getting first 10 item of array
db.collection.aggregate([
{ $match:{userId:"123"}},
{
"$unwind": "$friendsArray"
},
{
$sort: {
"friendsArray.lastTimeStamp": 1
}
},
{
$group: {
_id: "$_id",
friendsArray: {
$push: "$friendsArray"
}
},
},
{
$addFields: {
friendsArray: {
$filter: {
input: "$friendsArray",
as: "z",
cond: {
$lt: [
{
$indexOfArray: [
"$friendsArray",
"$$z"
]
},
10
]
}// 10 is n first item
}
}
},
}
])
https://mongoplayground.net/p/2Usk5sRY2L2
and for pagination use this
db.collection.aggregate([
{ $match:{userId:"123"}},
{
"$unwind": "$friendsArray"
},
{
$sort: {
"friendsArray.lastTimeStamp": 1
}
},
{
$group: {
_id: "$_id",
friendsArray: {
$push: "$friendsArray"
}
},
},
{
$addFields: {
friendsArray: {
$filter: {
input: "$friendsArray",
as: "z",
cond: {
$and: [
{
$gt: [
{
$indexOfArray: [
"$friendsArray",
"$$z"
]
},
10
]
},
{
$lt: [
{
$indexOfArray: [
"$friendsArray",
"$$z"
]
},
20
]
},
]
}// 10 is n first item
}
}
},
}
])
The translation of your find to aggregation(we need unwind that why aggregation is used) would be like the bellow query.
Test code here
Query (for descending replace 1 with -1)
db.collection.aggregate([
{
"$match": {
"userId": "123"
}
},
{
"$unwind": {
"path": "$friendsArray"
}
},
{
"$sort": {
"friendsArray.lastTimeStamp": 1
}
},
{
"$limit": 10
},
{
"$replaceRoot": {
"newRoot": "$friendsArray"
}
}
])
If you want to skip some before limit add one stage also
{
"$skip" : 10
}
To take the 10-20 messages for example.

MongoDB $lookup and $map array of objects

I'm trying to do this for days, but can't find any success
I'm using MongoDB, and I tried to do it with many pipeline steps but I couldn't find a way.
I have a players collection, each player contains an items array
{
"_id": ObjectId("5fba17c1c4566e57fafdcd7e"),
"username": "moshe",
"items": [
{
"_id": ObjectId("5fbb5ac178045a985690b5fd"),
"equipped": false,
"itemId": "5fbb5ab778045a985690b5fc"
}
]
}
I have an items collection where there is more information about each item
in the player items array.
{
"_id": ObjectId("5fbb5ab778045a985690b5fc"),
"name": "Axe",
"damage": 4,
"defense": 6
}
My goal is to have a player document with all the information about the item inside his items array, so it will look like that:
{
"_id": ObjectId("5fba17c1c4566e57fafdcd7e"),
"username": "moshe",
"items": [
{
"_id": ObjectId("5fbb5ac178045a985690b5fd"),
"equipped": false,
"itemId": "5fbb5ab778045a985690b5fc",
"name": "Axe",
"damage": 4,
"defense": 6
}
]
}
$unwind deconstruct items array
$lookup to join items collection, pass itemsId into let after converting it to object id using $toObjectId and pass items object,
$match itemId condition
$mergeObject merge items object and $$ROOT object and replace to root using $replaceRoot
$group reconstruct items array again, group by _id and get first username and construct items array
db.players.aggregate([
{ $unwind: "$items" },
{
$lookup: {
from: "items",
let: {
itemId: { $toObjectId: "$items.itemId" },
items: "$items"
},
pipeline: [
{ $match: { $expr: { $eq: ["$_id", "$$itemId" ] } } },
{ $replaceRoot: { newRoot: { $mergeObjects: ["$$items", "$$ROOT"] } } }
],
as: "items"
}
},
{
$group: {
_id: "$_id",
username: { $first: "$username" },
items: { $push: { $first: "$items" } }
}
}
])
Playground
Second option using $map, and without $unwind,
$addFields for items convert itemId string to object type id using $toObjectId and $map
$lookup to join items collection
$project to show required fields, and merge items array and itemsCollection using $map to iterate loop of items array $filter to get matching itemId and $first to get first object from return result, $mergeObject to merge current object and returned object from $first
db.players.aggregate([
{
$addFields: {
items: {
$map: {
input: "$items",
in: {
$mergeObjects: ["$$this", { itemId: { $toObjectId: "$$this.itemId" } }]
}
}
}
}
},
{
$lookup: {
from: "items",
localField: "items.itemId",
foreignField: "_id",
as: "itemsCollection"
}
},
{
$project: {
username: 1,
items: {
$map: {
input: "$items",
as: "i",
in: {
$mergeObjects: [
"$$i",
{
$first: {
$filter: {
input: "$itemsCollection",
cond: { $eq: ["$$this._id", "$$i.itemId"] }
}
}
}
]
}
}
}
}
}
])
Playground
First I'd strongly suggest that you should store the items.itemId as ObjectId, not strings.
Then another simple solution can be:
db.players.aggregate([
{
$lookup: {
from: "items",
localField: "items.itemId",
foreignField: "_id",
as: "itemsDocuments",
},
},
{
$addFields: {
items: {
$map: {
input: { $zip: { inputs: ["$items", "$itemsDocuments"] } },
in: { $mergeObjects: "$$this" },
},
},
},
},
{ $unset: "itemsDocuments" },
])

Accessing a random field using other field value

I have a document like this:
{
value: "field2",
field1: [ ... ],
field2: [ ... ],
...
}
Where value will be the value of one of the fields in the document. and many different fields are possible for one document.
I want to match a document. fetch the relevant field only and them do some calculations on it.
For example I want to do:
{
$unwind: "$value"
}
And get the results of field2 unwinded.
How can I do this?
It's a little bit "hacky" but you can achieve this using operators like $objectToArray and $filter like so:
db.collection.aggregate([
{
$addFields: {
"values": {
$arrayElemAt: [
{
$filter: {
input: {
$objectToArray: "$$ROOT"
},
as: "field",
cond: {
$eq: [
"$$field.k",
"$value"
]
}
}
},
0
]
}
}
},
{
$unwind: "$values.v"
},
{
$replaceRoot: {
newRoot: "$values.v"
}
},
])
MongoPlayground

How do you convert an array of ObjectIds into an array of embedded documents with a field containing the original array element value

I have a collection of documents where one of the fields is currently an array of ObjectId items.
{
_id: ObjectId(...),
user: "jdoe",
docs: [
ObjectId(1),
ObjectId(2),
...
]
}
{
_id: ObjectId(...),
user: "jsmith",
docs: [
ObjectId(3),
ObjectId(4),
...
]
}
How can I update all of the documents in my collection to convert the docs field into an array of objects that contain a "docID" field equal to the original element value?
For example, I'd want my documents to end up looking like:
{
_id: ObjectId(...),
user: "jdoe",
docs: [
{ docID: ObjectId(1) },
{ docID: ObjectId(2) },
...
]
}
{
_id: ObjectId(...),
user: "jsmith",
docs: [
{ docID: ObjectId(3)},
{ docID: ObjectId(4)},
...
]
}
I'm hoping there is a command that I can run from the shell such as:
db.getCollection('myCollection').update(
{},
{
$set: {
'docs.$[]: { docID: '$$VALUE'}
}
},
{multi: true }
);
But I can't figure out how to reference the original value of the element.
Update:
I'm marking #mickl with the correct answer since it got me on the correct track. Below is the final aggregate that I ended up with which only changes the docs field if it is an array of object IDs, otherwise the existing value is left as-is, including documents that don't have a docs field.
db.getCollection('myCollection').aggregate([
{ $addFields: {
'docs': { $cond: {
if : { $eq: [{ $type: { $arrayElemAt: [ '$docs', 0]} }, "objectId"]},
then: { $map: {
input: '$docs',
in: { tocID: '$$this'}
}},
else : '$docs'
}}
}},
{ $out: "myCollection" }
])
You can use $map to reshape your data and $out to replace existing collection with aggregation result:
db.col.aggregate([
{
$addFields: {
docs: {
$map: {
input: "$docs",
in: { docID: "$$this" }
}
}
}
},
{ $out: "col" }
])