Incorrect data sampling using two collections MongoDB - mongodb

ChatsUsers - this collection is needed to control the filtering of messages, for example, if a user has cleared the history of correspondence - it is only for himself, he cleared it. How this works is simple, the start_message_id - current date is specified which lets you know to return messages message collection with create_date greater than or equal to the new start_message_id.
type ChatsUsers struct {
ID string `json:"id" bson:"id"`
ChatID string `json:"chat_id" bson:"chat_id"`
UserID string `json:"user_id" bson:"user_id"`
StartMessageID int64 `json:"start_message_id,omitempty" bson:"start_message_id"`
EndMessageID int64 `json:"end_message_id,omitempty" bson:"end_message_id"`
}
type Message struct {
ID string `json:"id" bson:"id"`
ChatID string `json:"chat_id" bson:"chat_id"`
FromID string `json:"from_id" bson:"from_id"`
CreateDate int64 `json:"create_date" bson:"create_date"`
Body string `json:"body" bson:"body"`
UpdateAt int64 `json:"update_at" bson:"update_at"`
...
}
type Chat struct {
ID string `json:"id" bson:"id"`
Participants []string `json:"participants" bson:"participants"`
LastMessage *Message `json:"last_message,omitempty" bson:"last_message"`
...
}
Which is what I'm trying to do now in the example below. I need to find all last messages $last_message (see aggregation I wrote) for each chat which creation time $last_message.create_date is greater than or equal to chatsusers.start_message_id.
My problem, my option doesn't work the way I want it to, it doesn't select one message, and $gte doesn't work. I may not have written the query quite correctly, but I'm working on it still
My question: how do I get ONE last message for each chat that has a creation time $last_message.create_date greater than or equal to chatsusers.start_message_id?
My try

You are actually very close. Just remember when using sub-pipeline in $lookup, if you need to access values in the "from" collection, you will need to put it in a variable in let clause. (i.e. $chat_user.start_message_id in your case)
When you access the values in "to" collection, you don't need to put the new name of the $lookup result in the sub-pipeline. (i.e. $last_message.create_date should be simply $create_date)
db.chat.aggregate([
{
$match: {
participants: "63ce54460aeee5e72c778d90"
}
},
{
$lookup: {
from: "chatsusers",
localField: "id",
foreignField: "chat_id",
as: "chat_user"
}
},
{
$unwind: {
path: "$chat_user"
}
},
{
$lookup: {
from: "message",
localField: "id",
foreignField: "chat_id",
let: {
smid: "$chat_user.start_message_id"
},
pipeline: [
{
$match: {
$expr: {
$gte: [
"$create_date",
"$$smid"
]
}
}
},
{
$sort: {
create_date: -1
}
},
{
$limit: 1
}
],
as: "last_message",
}
},
{
$unwind: {
path: "$last_message",
preserveNullAndEmptyArrays: true
}
},
{
$group: {
_id: "$_id",
"create_date": {
$first: "$create_date"
},
"id": {
$first: "$_id"
},
"last_message": {
$max: "$last_message"
},
"owner_id": {
$first: "$owner_id"
},
"participants": {
$first: "$participants"
},
"title": {
$first: "$title"
},
"type": {
$first: "$type"
},
"unread": {
$first: "$unread"
}
}
},
{
$project: {
id: 1,
title: 1,
create_date: 1,
type: 1,
participants: 1,
owner_id: 1,
last_message: "$last_message",
unread: 1
}
}
])
Mongo Playground

Related

Add number field in $project mongodb

I have an issue that need to insert index number when get data. First i have this data for example:
[
{
_id : 616efd7e56c9530018e318ac
student : {
name: "Alpha"
email: null
nisn: "0408210001"
gender : "female"
}
},
{
_id : 616efd7e56c9530018e318af
student : {
name: "Beta"
email: null
nisn: "0408210001"
gender : "male"
}
}
]
and then i need the output like this one:
[
{
no:1,
id:616efd7e56c9530018e318ac,
name: "Alpha",
nisn: "0408210001"
},
{
no:2,
id:616efd7e56c9530018e318ac,
name: "Beta",
nisn: "0408210002"
}
]
i have tried this code but almost get what i expected.
{
'$project': {
'_id': 0,
'id': '$_id',
'name': '$student.name',
'nisn': '$student.nisn'
}
}
but still confuse how to add the number of index. Is it available to do it in $project or i have to do it other way? Thank you for the effort to answer.
You can use $unwind which can return an index, like this:
db.collection.aggregate([
{
$group: {
_id: 0,
data: {
$push: {
_id: "$_id",
student: "$student"
}
}
}
},
{
$unwind: {path: "$data", includeArrayIndex: "no"}
},
{
"$project": {
"_id": 0,
"id": "$data._id",
"name": "$data.student.name",
"nisn": "$data.student.nisn",
"no": {"$add": ["$no", 1] }
}
}
])
You can see it works here .
I strongly suggest to use a $match step before these steps, otherwise you will group your entire collection into one document.
You need to run a pipeline with a $setWindowFields stage that allows you to add a new field which returns the position of a document (known as the document number) within a partition. The position number creation is made possible by the $documentNumber operator only available in the $setWindowFields stage.
The partition could be an extra field (which is constant) that can act as the window partition.
The final stage in the pipeline is the $replaceWith step which will promote the student embedded document to the top-level as well as replacing all input documents with the specified document.
Running the following aggregation will yield the desired results:
db.collection.aggregate([
{ $addFields: { _partition: 'students' }},
{ $setWindowFields: {
partitionBy: '$_partition',
sortBy: { _id: -1 },
output: { no: { $documentNumber: {} } }
} },
{ $replaceWith: {
$mergeObjects: [
{ id: '$_id', no: '$no' },
'$student'
]
} }
])

MongoDB Exclude docs from aggregate if field/reference exists in other collection

const sellerSchema = Schema(
{
name: String,
url:String
}
const productSchema = Schema(
{
title: String,
sellerUrl:String
}
Below query will return unique sellerUrl from all products:
context.Product.aggregate([
{
$group: {
_id: "$sellerUrl",
}
}
]);
But I also want to exclude from aggregation, sellers that I already saved. So if url == sellerUrl aggregation must exclude that seller.
Please help me
You can try below query :
db.product.aggregate([
{
$group: {
_id: "", /** group on no condition & push all unique `sellerUrl` to sellerUrls array */
sellerUrls: { $addToSet: "$sellerUrl" }
}
},
{
$lookup: {
from: "seller",
let: { sellerUrls: "$sellerUrls" }, // creating local variable
pipeline: [
{ $group: { _id: "", urls: { $addToSet: "$url" } } }, /** group on no condition & push all unique `url` to urls array */
{ $project: { _id: 0, uniqueAndNotInSellerColl: { $setDifference: [ "$$sellerUrls", "$urls" ] } } } // get difference between two arrays
],
as: "data" // As we're grouping will always be one doc/element in an array
}
},
/** Create a new root doc from getting first element(though it will have only one) from `data` array */
{
$replaceRoot: { newRoot: { $arrayElemAt: [ "$data", 0 ] } }
}
])
Test : mongoplayground
Update :
As you need few other fields from product collection but not just the sellerUrl field then try below query :
db.product.aggregate([
{
$group: {
_id: "$sellerUrl",
docs: { $push: { title: "$title" } } // We're only retrieving `title` field from `product` docs, if every field is needed use `$$ROOT`
}
},
/** We've used basic `lookup` stage, use this if you've only few matching docs from `seller` collection
* If you've a lot of matching docs for each `_id` (sellerUrl),
* then instead of getting entire `seller` doc (which is not needed) use `lookup` with aggregation pipeline &
* just get `_id`'s of seller docs for better performace refer previous query
*/
{
$lookup: {
from: "seller",
localField: "_id",
foreignField: "url",
as: "sellerDocs"
}
},
/** match will retain only docs which doesn't have a matching doc in seller collection */
{
$match: { sellerDocs: [] }
},
{
$project: { sellerDocs: 0 }
}
])
Test : mongoplayground

MongoDB Aggregation pipeline inner array mapping

I am trying to perform a lookup operation between two collections like shown below,
first collection records:
{
field1: "FIELD",
title: "sometitle",
secondIds: [
{
value: "nestedval1",
secondId: "234
},
{
value: "netedval2,
secondId: "342"
}
]
}
Second collection record
{
id: "234",
secvalue: "somevalue"
},
{
id: "342",
secvalue: "anothervalue"
}
I am trying to get the output in the below format for matching field1 name "FIELD" inside the first collection.
{
field1: "FIELD",
title: "sometitle",
secondIds: [
{
value: "nestedval1",
secondId: "234",
second: {
id: "234",
secvalue: "somevalue"
}
},
{
value: "nestedval2",
secondId: "342",
second: {
id: "342",
secvalue: "anothervalue"
}
}
]
}
for aggregation pipeline after matching operation, I still stuck at how to create a lookup operation for retrieving the second collection entry mapped with the first. Can it possible to do it or do have any other way to achieve it?
firstCollection.aggregate([
{ $unwind: '$secondIds' }, // Lets first separate the secondIds to as different documents
{
$lookup: {
from: 'secondCollection', // second collection name
localField: 'secondIds.secondId', // field in first collection after unwind
foreignField: 'id', // field in second collection
as: 'secondIds.second' // field to attach values matched
}
},
{ $unwind: '$secondIds.second' }, // attached value after lookup will be an array, so let unwind to make it a object
{ $project: { 'secondIds.second._id': 0 } }, // remove _id
{
$group: {
_id: { // grouper fields
field1: "$field1",
title: "$title",
},
secondIds: { $push: '$secondIds' } // group by pushing to an array
}
},
{
$project: { // projection
_id: 0,
field1: '$_id.field1',
title: "$_id.title",
secondIds: 1
}
}
]).pretty()
Explanations are in the comments

$lookup using multiple criteria mongodb java aggregation

Have 2 following collections:
user collection
{
userId:user1,
creationTimeStamp:2019-11-05T08:15:30
status:active
},
{
userId:user2,
creationTimeStamp:2019-10-05T08:15:30
status:active
}
document collection
{
userId:user1,
category:Development
published:true
},
{
userId:user2,
category:Development
published:false
}
I want to join these two collections and filter users such that documents which are of development category and are not published from active users between creationtimestamp
How can I write a mongodb java aggregation in order to get a result like this:
{
userId: user2,
status:active,
category:Development,
published:false
}
You could run below aggregation query on the document collection to get the expected result
[{$match: {
category:'development',
published: false
}}, {$lookup: {
from: 'user',
localField: 'userId',
foreignField: 'userId',
as: 'JoinedTable'
}}, {$unwind: {
path: '$JoinedTable'
}}, {$group: {
_id: '$_id',
userId: {
$first: '$userId'
},
status: {
$first: '$JoinedTable.status'
},
category: {
$first: '$category'
},
published: {
$first: '$published'
},
}}]
Explanation:
1. filter documents using match for criteria category: 'development' & published: false
2. join document collection with user collection with key userId
3. unwind the joined collection field to convert array to object
4. project the fields needed using groups.
Hope this helps!
You haven't mentioned about the duplicate of userId in User collection.
So the script is
[{
$match: {
category: "Development",
published: false
}
}, {
$lookup: {
from: 'user',
localField: 'userId',
foreignField: 'userId',
as: 'joinUser'
}
}, {
$unwind: {
path: "$joinUser",
preserveNullAndEmptyArrays: true
}
}, {
$match: {
"joinUser.status": "active"
}
}, {
$addFields: {
"status": "$joinUser.status"
}
}, {
$project: {
_id: 0,
userId: 1,
category: 1,
published: 1,
status: 1
}
}]
And the java code,
include these imports
import static org.springframework.data.mongodb.core.aggregation.Aggregation.match;
import static org.springframework.data.mongodb.core.aggregation.Aggregation.lookup;
import static org.springframework.data.mongodb.core.aggregation.Aggregation.unwind;
import static org.springframework.data.mongodb.core.aggregation.Aggregation.project;
method is,
public Object findAllwithVideos() {
Aggregation aggregation=Aggregation.newAggregation(
match(Criteria.where("category").is("Development").and("published").is(false)),
lookup("user","userId","userId","joinUser"),
unwind("joinUser",true),
new AggregationOperation(){
#Override
public Document toDocument(AggregationOperationContext aggregationOperationContext){
return new Document("$addFields",
new Document("status","$joinUser.status")
);
}
},
project("userId","category","published","status")
).withOptions(AggregationOptions.builder().allowDiskUse(Boolean.TRUE).build());
return mongoTemplate.aggregate(aggregation, mongoTemplate.getCollectionName(Document.class), Object.class);
}

MongoDB $lookup with nested object with nested array

I have 2 collections.
cases
_id: ObjectId.
name: string.
info: {
[here can be many different fields with diff types]
relatedEntities: [
{ role: string;
id: ObjectId;
} <--- here can be a lot of entities
]
}
entities
_id: ObjectId.
type: string,
name: string,
info: {
[here can be many different fields with diff types]
}
I need to retrieve all cases and for each case.info.entities object I need to have field data which will equal to entity document ( case.info.entities.id === entity_id)
Example what I need to have
_id: ObjectId.
name: string.
info: {
[here can be many different fields with diff types]
entities: [
{ role: string;
id: ObjectId;
data: {
_id: ObjectId.
type: string,
name: string,
info: {
[here can be many different fields with diff types]
}
}
} <--- here can be a lot of entities
]
}
How to do it in a proper way?
At the moment I implemented this is that way:
{ $unwind: "$info.relatedEntities" },
{ $lookup: {
"from": "entities",
"localField": "info.relatedEntities.entity",
"foreignField": "_id",
"as": "info.relatedEntities.entityObject"
}},
{ $group: {
"_id": "$_id",
"templateType":{$first: "$templateType"},
"info":{$first: "$info"},
"relatedEntities": {
$push: "$info.relatedEntities"
}
}}
It's working, but required additional parsing when data is retrieved, but I'd like to do it without workarounds..
You have done almost everything. Based on your query and model I have given below query (Field names might be different). Hope it helps.
db.cases.aggregate([
{ $unwind: '$info.relatedEntities' },
{ $lookup: {
from: 'entities',
localField: 'info.relatedEntities.entity',
foreignField: '_id',
as: 'info.relatedEntities.entityObject'
}
},
{ $group: {
_id: {
_id : '$_id',
templateType : '$templateType',
name : '$name',
info : {
address : "$info.address",
}
},
'relatedEntities': {
$push: {
role : '$info.relatedEntities.role',
entity : '$info.relatedEntities.entity',
data : { $arrayElemAt: [ '$info.relatedEntities.entityObject', 0 ] }
}
}
}
},
{
$project : {
_id : '$_id._id',
name : '$_id.name',
templateType : '$_id.templateType',
info : {
address : '$_id.info.address',
entities : '$relatedEntities'
}
}
}
]).pretty()