MongoDB Aggregation pipeline inner array mapping - mongodb

I am trying to perform a lookup operation between two collections like shown below,
first collection records:
{
field1: "FIELD",
title: "sometitle",
secondIds: [
{
value: "nestedval1",
secondId: "234
},
{
value: "netedval2,
secondId: "342"
}
]
}
Second collection record
{
id: "234",
secvalue: "somevalue"
},
{
id: "342",
secvalue: "anothervalue"
}
I am trying to get the output in the below format for matching field1 name "FIELD" inside the first collection.
{
field1: "FIELD",
title: "sometitle",
secondIds: [
{
value: "nestedval1",
secondId: "234",
second: {
id: "234",
secvalue: "somevalue"
}
},
{
value: "nestedval2",
secondId: "342",
second: {
id: "342",
secvalue: "anothervalue"
}
}
]
}
for aggregation pipeline after matching operation, I still stuck at how to create a lookup operation for retrieving the second collection entry mapped with the first. Can it possible to do it or do have any other way to achieve it?

firstCollection.aggregate([
{ $unwind: '$secondIds' }, // Lets first separate the secondIds to as different documents
{
$lookup: {
from: 'secondCollection', // second collection name
localField: 'secondIds.secondId', // field in first collection after unwind
foreignField: 'id', // field in second collection
as: 'secondIds.second' // field to attach values matched
}
},
{ $unwind: '$secondIds.second' }, // attached value after lookup will be an array, so let unwind to make it a object
{ $project: { 'secondIds.second._id': 0 } }, // remove _id
{
$group: {
_id: { // grouper fields
field1: "$field1",
title: "$title",
},
secondIds: { $push: '$secondIds' } // group by pushing to an array
}
},
{
$project: { // projection
_id: 0,
field1: '$_id.field1',
title: "$_id.title",
secondIds: 1
}
}
]).pretty()
Explanations are in the comments

Related

Incorrect data sampling using two collections MongoDB

ChatsUsers - this collection is needed to control the filtering of messages, for example, if a user has cleared the history of correspondence - it is only for himself, he cleared it. How this works is simple, the start_message_id - current date is specified which lets you know to return messages message collection with create_date greater than or equal to the new start_message_id.
type ChatsUsers struct {
ID string `json:"id" bson:"id"`
ChatID string `json:"chat_id" bson:"chat_id"`
UserID string `json:"user_id" bson:"user_id"`
StartMessageID int64 `json:"start_message_id,omitempty" bson:"start_message_id"`
EndMessageID int64 `json:"end_message_id,omitempty" bson:"end_message_id"`
}
type Message struct {
ID string `json:"id" bson:"id"`
ChatID string `json:"chat_id" bson:"chat_id"`
FromID string `json:"from_id" bson:"from_id"`
CreateDate int64 `json:"create_date" bson:"create_date"`
Body string `json:"body" bson:"body"`
UpdateAt int64 `json:"update_at" bson:"update_at"`
...
}
type Chat struct {
ID string `json:"id" bson:"id"`
Participants []string `json:"participants" bson:"participants"`
LastMessage *Message `json:"last_message,omitempty" bson:"last_message"`
...
}
Which is what I'm trying to do now in the example below. I need to find all last messages $last_message (see aggregation I wrote) for each chat which creation time $last_message.create_date is greater than or equal to chatsusers.start_message_id.
My problem, my option doesn't work the way I want it to, it doesn't select one message, and $gte doesn't work. I may not have written the query quite correctly, but I'm working on it still
My question: how do I get ONE last message for each chat that has a creation time $last_message.create_date greater than or equal to chatsusers.start_message_id?
My try
You are actually very close. Just remember when using sub-pipeline in $lookup, if you need to access values in the "from" collection, you will need to put it in a variable in let clause. (i.e. $chat_user.start_message_id in your case)
When you access the values in "to" collection, you don't need to put the new name of the $lookup result in the sub-pipeline. (i.e. $last_message.create_date should be simply $create_date)
db.chat.aggregate([
{
$match: {
participants: "63ce54460aeee5e72c778d90"
}
},
{
$lookup: {
from: "chatsusers",
localField: "id",
foreignField: "chat_id",
as: "chat_user"
}
},
{
$unwind: {
path: "$chat_user"
}
},
{
$lookup: {
from: "message",
localField: "id",
foreignField: "chat_id",
let: {
smid: "$chat_user.start_message_id"
},
pipeline: [
{
$match: {
$expr: {
$gte: [
"$create_date",
"$$smid"
]
}
}
},
{
$sort: {
create_date: -1
}
},
{
$limit: 1
}
],
as: "last_message",
}
},
{
$unwind: {
path: "$last_message",
preserveNullAndEmptyArrays: true
}
},
{
$group: {
_id: "$_id",
"create_date": {
$first: "$create_date"
},
"id": {
$first: "$_id"
},
"last_message": {
$max: "$last_message"
},
"owner_id": {
$first: "$owner_id"
},
"participants": {
$first: "$participants"
},
"title": {
$first: "$title"
},
"type": {
$first: "$type"
},
"unread": {
$first: "$unread"
}
}
},
{
$project: {
id: 1,
title: 1,
create_date: 1,
type: 1,
participants: 1,
owner_id: 1,
last_message: "$last_message",
unread: 1
}
}
])
Mongo Playground

How to match an array from one collection with a value in another?

I have two collections, which I then used the aggregate function to compare them and return the matching values. (commented out my attempt) I am trying to get both the values converted to upper value and then return another record from the other collection (Last_name) if the name and dbname value matches.
Log.aggregate([
{
$addFields: {
'First_name': {
$toUpper: '$First_name'
},
}
},
{
$lookup: {
from: 'db2',
pipeline: [
{ $addFields: { 'db_first_name': {$toUpper: '$db_first_name'}}},
//{ $match: { $expr: { $ne: [ 'First_name', 'db_first_name' ] } } }
], as: 'Without_array',
},
},
{
$project: {
lastname:
{$arrayElemAt:['$Without_array.last_name',0]},
// last_name is another record in the db2 collection the element of the array in this collection should match if 'First_name', 'db_first_name' match
name: 1,
}
}
])

Add number field in $project mongodb

I have an issue that need to insert index number when get data. First i have this data for example:
[
{
_id : 616efd7e56c9530018e318ac
student : {
name: "Alpha"
email: null
nisn: "0408210001"
gender : "female"
}
},
{
_id : 616efd7e56c9530018e318af
student : {
name: "Beta"
email: null
nisn: "0408210001"
gender : "male"
}
}
]
and then i need the output like this one:
[
{
no:1,
id:616efd7e56c9530018e318ac,
name: "Alpha",
nisn: "0408210001"
},
{
no:2,
id:616efd7e56c9530018e318ac,
name: "Beta",
nisn: "0408210002"
}
]
i have tried this code but almost get what i expected.
{
'$project': {
'_id': 0,
'id': '$_id',
'name': '$student.name',
'nisn': '$student.nisn'
}
}
but still confuse how to add the number of index. Is it available to do it in $project or i have to do it other way? Thank you for the effort to answer.
You can use $unwind which can return an index, like this:
db.collection.aggregate([
{
$group: {
_id: 0,
data: {
$push: {
_id: "$_id",
student: "$student"
}
}
}
},
{
$unwind: {path: "$data", includeArrayIndex: "no"}
},
{
"$project": {
"_id": 0,
"id": "$data._id",
"name": "$data.student.name",
"nisn": "$data.student.nisn",
"no": {"$add": ["$no", 1] }
}
}
])
You can see it works here .
I strongly suggest to use a $match step before these steps, otherwise you will group your entire collection into one document.
You need to run a pipeline with a $setWindowFields stage that allows you to add a new field which returns the position of a document (known as the document number) within a partition. The position number creation is made possible by the $documentNumber operator only available in the $setWindowFields stage.
The partition could be an extra field (which is constant) that can act as the window partition.
The final stage in the pipeline is the $replaceWith step which will promote the student embedded document to the top-level as well as replacing all input documents with the specified document.
Running the following aggregation will yield the desired results:
db.collection.aggregate([
{ $addFields: { _partition: 'students' }},
{ $setWindowFields: {
partitionBy: '$_partition',
sortBy: { _id: -1 },
output: { no: { $documentNumber: {} } }
} },
{ $replaceWith: {
$mergeObjects: [
{ id: '$_id', no: '$no' },
'$student'
]
} }
])

Mongodb lookup with array

I have two collections first one is
user_profile collection
const userProfileSchema = mongoose.Schema({
phone_number: {
type: String,
required: false,
},
primary_skills: [
{
skill_id: {
type: mongoose.Schema.Types.ObjectId,
ref: 'Skill'
},
years: Number,
}
]
});
sample data
{
"phone_number":"222",
"primary_skills":[{skill_id:1,years:12},{skill_id:2,years:13}]
}
in the primary_skills the key skill_id is mapped with another collection named skills
skills collection
const skillSchema = mongoose.Schema({
name: {
type: String,
required: true,
unique:true,
},
});
sample data
[
{
id:1,
name:'php'
},
{
id:2,
name:'java'
}
]
I want to fetch all values in the user_profile collection along with the respective skills name
expected output:
{
"phone_number":"222",
"primary_skills":[{
name:"php",skill_id:1,years:12
},{
name:"java",skill_id:2,years:13}
]
}
I found a similar thread to my question MongoDB lookup when foreign field is an array of objects but it's doing the opposite of what I want
This is the query I tried
profile.aggregate([{
$lookup:{
from:'skills',
localField:'primary_skills.skill_id',
foreignField:'_id',
'as':'primary_skills'
}
}])
This works fine but it didn't contain the years key
You need to do it with $unwind and $group,
$unwind primary_skills because its an array and we need to lookup sub document wise
db.user_profile.aggregate([
{
$unwind: "$primary_skills"
},
$lookup to join primary_skills, that you have already did
{
$lookup: {
from: "skills",
localField: "primary_skills.skill_id",
foreignField: "id",
as: "primary_skills.name"
}
},
$unwind primary_skills.name that we have stored join result, its array and we are unwinding to do object
{
$unwind: {
path: "$primary_skills.name"
}
},
$addFields replace field name that we have object and we need only name
{
$addFields: {
"primary_skills.name": "$primary_skills.name.name"
}
},
$group by _id because we have unwind and we need to combine all documents
{
$group: {
_id: "$_id",
phone_number: {
$first: "$phone_number"
},
primary_skills: {
$push: "$primary_skills"
}
}
}
])
Playground: https://mongoplayground.net/p/bDmrOwmASn5

MongoDB Exclude docs from aggregate if field/reference exists in other collection

const sellerSchema = Schema(
{
name: String,
url:String
}
const productSchema = Schema(
{
title: String,
sellerUrl:String
}
Below query will return unique sellerUrl from all products:
context.Product.aggregate([
{
$group: {
_id: "$sellerUrl",
}
}
]);
But I also want to exclude from aggregation, sellers that I already saved. So if url == sellerUrl aggregation must exclude that seller.
Please help me
You can try below query :
db.product.aggregate([
{
$group: {
_id: "", /** group on no condition & push all unique `sellerUrl` to sellerUrls array */
sellerUrls: { $addToSet: "$sellerUrl" }
}
},
{
$lookup: {
from: "seller",
let: { sellerUrls: "$sellerUrls" }, // creating local variable
pipeline: [
{ $group: { _id: "", urls: { $addToSet: "$url" } } }, /** group on no condition & push all unique `url` to urls array */
{ $project: { _id: 0, uniqueAndNotInSellerColl: { $setDifference: [ "$$sellerUrls", "$urls" ] } } } // get difference between two arrays
],
as: "data" // As we're grouping will always be one doc/element in an array
}
},
/** Create a new root doc from getting first element(though it will have only one) from `data` array */
{
$replaceRoot: { newRoot: { $arrayElemAt: [ "$data", 0 ] } }
}
])
Test : mongoplayground
Update :
As you need few other fields from product collection but not just the sellerUrl field then try below query :
db.product.aggregate([
{
$group: {
_id: "$sellerUrl",
docs: { $push: { title: "$title" } } // We're only retrieving `title` field from `product` docs, if every field is needed use `$$ROOT`
}
},
/** We've used basic `lookup` stage, use this if you've only few matching docs from `seller` collection
* If you've a lot of matching docs for each `_id` (sellerUrl),
* then instead of getting entire `seller` doc (which is not needed) use `lookup` with aggregation pipeline &
* just get `_id`'s of seller docs for better performace refer previous query
*/
{
$lookup: {
from: "seller",
localField: "_id",
foreignField: "url",
as: "sellerDocs"
}
},
/** match will retain only docs which doesn't have a matching doc in seller collection */
{
$match: { sellerDocs: [] }
},
{
$project: { sellerDocs: 0 }
}
])
Test : mongoplayground