How to find by id after lookup aggregation - mongodb

I have a collection of news articles in mongodb and another collection that maps a user's ID to an article's ID and has a "like" state, which can be either "like" or "dislike" or "none" if no entry with the user and article exists.
Here are both schemas:
// news collection
const articleSchema = new Schema({
title: String,
content: String,
})
// newslikes collection
const articleLikeSchema = new Schema({
user: { type: Schema.Types.ObjectId, ref: 'Client' },
article: { type: Schema.Types.ObjectId, ref: 'News' },
state: { type: String, enum: ['like', 'dislike'] }
})
I'm trying to write an aggregation query which joins these two collection using $lookup and then finds the state of a specific user's like on all articles. This is what I have so far:
const results = await News.aggregate([
{ $match: query },
{ $sort: { date: -1 } },
{ $skip: page * pageLength },
{ $limit: pageLength },
{ $lookup: {
from: 'newslikes',
localField: '_id',
foreignField: 'article',
as: 'likes'
} },
{ $project: {
title: 1,
likes: 1,
content: 1,
// numLikes: { $size: '$likes' }
userLikeStatus: {
$filter: {
input: '$likes',
as: 'like',
cond: {
$eq: ['$user._id', '5ccf13adcec5e6d84f940417']
}
}
}
} }
])
However this is not working. Is what I'm doing even the correct approach or is there a better way to do this rather than $filter?

You can use below aggregation with mongodb 3.6 and above
News.aggregate([
{ "$match": query },
{ "$sort": { "date": -1 } },
{ "$skip": page * pageLength },
{ "$limit": pageLength },
{ "$lookup": {
"from": "newslikes",
"let": { "articleId": "$_id" },
"pipeline": [
{ "$match": {
"$expr": { "$eq": [ "$article", "$$articleId" ] },
"user": mongoose.Types.ObjectId("5ccf13adcec5e6d84f940417")
}}
],
"as": "likes"
}},
{ "$addFields": {
"userLikeStatus": { "$ifNull": [{ "$arrayElemAt": ["$likes.state", 0] }, "none"] }
}}
])
Or the way you are trying
Basically here you need to put $cond for the field userLikeStatus i.e if the $size of the array after $filter is $gte 1 then user likes it else does not.
News.aggregate([
{ "$match": query },
{ "$sort": { "date": -1 } },
{ "$skip": page * pageLength },
{ "$limit": pageLength },
{ "$lookup": {
"from": "newslikes",
"localField": "_id",
"foreignField": "article",
"as": "likes"
}},
{ "$project": {
"title": 1,
"likes": 1,
"content": 1,
// numLikes: { $size: '$likes' }
"userLikeStatus": {
"$let": {
"vars": {
"array": {
"$filter": {
"input": "$likes",
"as": "like",
"cond": { "$eq": ["$$like.user", mongoose.Types.ObjectId("5ccf13adcec5e6d84f940417")] }
}
}
},
"in": {
"$ifNull": [{ "$arrayElemAt": ["$$array.state", 0] }, "none"]
}
}
}
}}
])

Related

MongoDB: how to aggregate from multiple collections with same aggregation pipeline

I'm trying to get aggregations with same aggregation pipeline including $match and $group operations from multiple collections.
For example,
with a users collection and collections of questions, answers and comments where every document has authorId and created_at field,
db = [
'users': [{ _id: 123 }, { _id: 456} ],
'questions': [
{ authorId: ObjectId('123'), createdAt: ISODate('2022-09-01T00:00:00Z') },
{ authorId: ObjectId('456'), createdAt: ISODate('2022-09-05T00:00:00Z') },
],
'answers': [
{ authorId: ObjectId('123'), createdAt: ISODate('2022-09-05T08:00:00Z') },
{ authorId: ObjectId('456'), createdAt: ISODate('2022-09-01T08:00:00Z') },
],
'comments': [
{ authorId: ObjectId('123'), createdAt: ISODate('2022-09-01T16:00:00Z') },
{ authorId: ObjectId('456'), createdAt: ISODate('2022-09-05T16:00:00Z') },
],
]
I want to get counts of documents from each collections with created_at between a given range and grouped by authorId.
A desired aggregation result may look like below. The _ids here are ObjectIds of documents in users collection.
\\ match: { createdAt: { $gt: ISODate('2022-09-03T00:00:00Z) } }
[
{ _id: ObjectId('123'), questionCount: 0, answerCount: 1, commentCount: 0 },
{ _id: ObjectId('456'), questionCount: 1, answerCount: 0, commentCount: 1 }
]
Currently, I am running aggregation below for each collection, combining the results in the backend service. (I am using Spring Data MongoDB Reactive.) This seems very inefficient.
db.collection.aggregate([
{ $match: {
created_at: { $gt: ISODate('2022-09-03T00:00:00Z') }
}},
{ $group : {
_id: '$authorId',
count: {$sum: 1}
}}
])
How can I get the desired result with one aggregation?
I thought $unionWith or $lookup may help but I'm stuck here.
You can try something like this, using $lookup, here we join users, with all the three collections one-by-one, and then calculate the count:
db.users.aggregate([
{
"$lookup": {
"from": "questions",
"let": {
id: "$_id"
},
"pipeline": [
{
"$match": {
$expr: {
"$and": [
{
"$gt": [
"$createdAt",
ISODate("2022-09-03T00:00:00Z")
]
},
{
"$eq": [
"$$id",
"$authorId"
]
}
]
}
}
}
],
"as": "questions"
}
},
{
"$lookup": {
"from": "answers",
"let": {
id: "$_id"
},
"pipeline": [
{
"$match": {
$expr: {
"$and": [
{
"$gt": [
"$createdAt",
ISODate("2022-09-03T00:00:00Z")
]
},
{
"$eq": [
"$$id",
"$authorId"
]
}
]
}
}
}
],
"as": "answers"
}
},
{
"$lookup": {
"from": "comments",
"let": {
id: "$_id"
},
"pipeline": [
{
"$match": {
$expr: {
"$and": [
{
"$gt": [
"$createdAt",
ISODate("2022-09-03T00:00:00Z")
]
},
{
"$eq": [
"$$id",
"$authorId"
]
}
]
}
}
}
],
"as": "comments"
}
},
{
"$project": {
"questionCount": {
"$size": "$questions"
},
"answersCount": {
"$size": "$answers"
},
"commentsCount": {
"$size": "$comments"
}
}
}
])
Playground link. In the above query, we use pipelined form of $lookup, to perform join on some custom logic. Learn more about $lookup here.
Another way is this, perform normal lookup and then filter out the elements:
db.users.aggregate([
{
"$lookup": {
"from": "questions",
"localField": "_id",
"foreignField": "authorId",
"as": "questions"
}
},
{
"$lookup": {
"from": "answers",
"localField": "_id",
"foreignField": "authorId",
"as": "answers"
}
},
{
"$lookup": {
"from": "comments",
"localField": "_id",
"foreignField": "authorId",
"as": "comments"
}
},
{
"$project": {
questionCount: {
"$size": {
"$filter": {
"input": "$questions",
"as": "item",
"cond": {
"$gt": [
"$$item.createdAt",
ISODate("2022-09-03T00:00:00Z")
]
}
}
}
},
answerCount: {
"$size": {
"$filter": {
"input": "$answers",
"as": "item",
"cond": {
"$gt": [
"$$item.createdAt",
ISODate("2022-09-03T00:00:00Z")
]
}
}
}
},
commentsCount: {
"$size": {
"$filter": {
"input": "$comments",
"as": "item",
"cond": {
"$gt": [
"$$item.createdAt",
ISODate("2022-09-03T00:00:00Z")
]
}
}
}
}
}
}
])
Playground link.

Find one user then get their ranking based on their total points using MongoDB

So I got the following data:
Users collection
{
_id: ObjectId("62a2a0422ec90fea68390aaa"),
name: 'Robert Yamashita',
username: 'robyama',
email: 'robert.yamashita#rocketmail.com',
},
{
_id: ObjectId("62a2a0452ec90fea68390aad"),
name: 'Charles X',
username: 'cvx',
email: 'charles.xxx#rocketmail.com',
}
Points collection
{
userId: ObjectId("62a2a0422ec90fea68390aaa"),
action: 'Liked a post',
points: 10,
}
{
userId: ObjectId("62a2a0422ec90fea68390aaa"),
action: 'Liked a post',
points: 10,
}
{
userId: ObjectId("62a2a0452ec90fea68390aad"),
action: 'Liked a comment',
points: 5,
}
I created a pipeline to get the total points of username robyama using the following query:
db.users.aggregate([
{ $match: { username: 'robyama' } },
{
$lookup: {
from: 'points',
localField: '_id',
foreignField: 'user',
as: 'userPoints'
}
},
{
$unwind: '$userPoints'
},
{
$group: {
_id: {
name: '$name',
email: '$email',
username: '$username',
},
count: { $sum: '$userPoints.points' }
}
}
]);
I got the following result:
{
"_id": {
"name": "Robert Yamashita",
"email": "robert.yamashita#rocketmail.com",
"username": "robyama",
},
"count": 20
}
This is exactly what I needed but I wanted to add a ranking field to the returned query since Robert has 20 points and Charles only has 5. So ideally I want the result to be this:
{
"_id": {
"name": "Robert Yamashita",
"email": "robert.yamashita#rocketmail.com",
"username": "robyama",
},
"count": 20
"rank": 1
}
What should I add to my pipeline to get the above output? Any help would be greatly appreciated!
Here's another way to do it. There's only one "$lookup" with one embedded "$group" so it should be fairly efficient. The "$project" seems a bit contrived, but it gives the output in the format you want.
db.users.aggregate([
{
"$match": {
"username": "robyama"
}
},
{
"$lookup": {
"from": "points",
"as": "sortedPoints",
"pipeline": [
{
"$group": {
"_id": "$userId",
"count": {"$sum": "$points"}
}
},
{"$sort": {"count": -1}}
]
}
},
{
"$unwind": {
"path": "$sortedPoints",
"includeArrayIndex": "idx"
}
},
{
"$match": {
"$expr": {
"$eq": ["$_id", "$sortedPoints._id"]
}
}
},
{
"$project": {
"_id": {
"name": "$name",
"username": "$username",
"email": "$email"
},
"count": "$sortedPoints.count",
"rank": {
"$add": ["$idx", 1]
}
}
}
])
Try it on mongoplayground.net.
Well, this is one way of doing it.
Perform join using $lookup and calculate counts for each user.
Sort the elements by counts in desc order.
Group documents by _id as NULL and push them all in an array.
Unwind the array, along with getting row numbers.
Find your required document and calculate the rank using row number.
db.users.aggregate([
{
$lookup: {
from: "points",
localField: "_id",
foreignField: "userId",
as: "userPoints"
}
},
{
$unwind: "$userPoints"
},
{
$group: {
_id: {
name: "$name",
email: "$email",
username: "$username",
},
count: {
$sum: "$userPoints.points"
}
}
},
{
"$sort": {
count: -1
}
},
{
"$group": {
"_id": null,
"docs": {
"$push": "$$ROOT",
}
}
},
{
"$unwind": {
path: "$docs",
includeArrayIndex: "rownum"
}
},
{
"$match": {
"docs._id.username": "robyama"
}
},
{
"$addFields": {
"docs.rank": {
"$add": [
"$rownum",
1
]
}
}
},
{
"$replaceRoot": {
"newRoot": "$docs"
}
}
])
This is the playground link.

mongodb - summations of array length with same ids

I am creating a platform where people can share their memes. On one page I want to show them who are the most popular members on the platform. so, there is a collection of 'meme' and 'user'
for example,
There is two content with same ids:
{
_id: 1,
username: "name",
bio: "bio",
image: "url",
};
memes
{
_id: 0,
user_id: 1,
image: "meme1.jpg",
likes: [
{
user_id: 4
}
]
},
{
_id: 1,
user_id: 1,
image: "meme2.jpg",
likes: [
{
user_id: 5
},
{
user_id: 6
}
]
}
and I want to output something like this way
{
user_id:1,
username:"name"
likes:3,
}
I wrote this query using aggregate functions but I am not understanding how to identify ids are the same or not?
meme
.aggregate([
{
$lookup: {
from: "users",
localField: "user_id",
foreignField: "_id",
as: "userDetails",
},
},
{
$project: {
user_id: "$user_id",
username: "$userDetails.username",
likes: {
$size: "$likes",
},
},
},
{
$sort: { likes: 1 },
},
])
.exec()
.then((result) => {
console.log(result);
});
It will be easier to start query with users.
You can use $sum, $map, $size aggregations to get the total likes, and add it using $addFields.
db.users.aggregate([
{
$lookup: {
from: "memes",
localField: "_id",
foreignField: "user_id",
as: "userDetails"
}
},
{
$addFields: {
"likes": {
"$sum": {
"$map": {
"input": "$userDetails",
"in": {
"$size": "$$this.likes"
}
}
}
}
}
},
{
$project: {
_id: 0,
user_id: "$_id",
username: 1,
likes: 1
}
}
])
Playground
Result:
[
{
"likes": 3,
"user_id": 1,
"username": "name"
}
]
You could project the length of the likes-array and group each projection by the user_id and cound the results. Something like this should work:
db.getCollection('memes').aggregate([{
$lookup: {
from: "users",
localField: "user_id",
foreignField: "_id",
as: "userDetails"
}
}, {
"$project": {
"user_id": 1,
"likesSize": {
"$size": "$likes"
}
}
}, {
$group: {
_id: "$user_id",
"count": {
"$sum": "$likesSize"
}
}
}
])
The above query should return:
{
"_id" : 1,
"count" : 3
}

mongodb aggregation pipeline not returning proper result and slow

I have three collections users, products and orders , orders type has two possible values "Cash" and "Online". One users can have single/multiple products and products have none/single/multiple orders. I want to text search on users collection on name. Now I want to write a query which will return all matching users on text search highest text score first, it might be possible one user's name is returning top score but don't have any products and orders.
I have written a query but it's not returning users who has text score highest but don't have any products/orders. It's only returning users who has record present in all three collections. And also performance of this query is not great taking long time if a user has lot of products for example more than 3000 products. Any help appreciated.
db.users.aggregate(
[
{
"$match": {
"$text": {
"$search": "john"
}
}
},
{
"$addFields": {
"score": {
"$meta": "textScore"
}
}
},
{
"$sort": {
"Score": {
"$meta": "textScore"
}
}
},
{
"$skip": 0
},
{
"$limit": 6
},
{
"$lookup": {
"from": "products",
"localField": "userId",
"foreignField": "userId",
"as": "products"
}
},
{ $unwind: '$products' },
{
"$lookup": {
"from": "orders",
"let": {
"products": "$products"
},
"pipeline": [
{
"$match": {
"$expr": {
"$and": [
{
"$in": [
"$productId",
["$$products.productId"]
]
},
{
"$eq": [
"$orderType",
"Cash"
]
}
]
}
}
}
],
"as": "orders"
}
},
{ $unwind: 'orders' },
{
$group: {
_id: "$_id",
name: { $first: "$name" },
userId: { $first: "$userId" },
products: { $addToSet: "$products" },
orders: { $addToSet: "$orders" },
score: { $first: "$score" },
}
},
{ $sort: { "score": -1 } }
]
);
Issue:
Every lookup produces an array which holds the matched documents. When no documents are found, the array would be empty. Unwinding that empty array would break the pipeline immediately. That's the reason, we are not getting user records with no products/orders. We would need to preserve such arrays so that the pipeline execution can continue.
Improvements:
In orders lookup, the $eq can be used instead of $in, as we already
unwinded the products array and each document now contains only
single productId
Create an index on userId in products collection to make the query more efficient
Following is the updated query:
db.users.aggregate([
{
"$match": {
"$text": {
"$search": "john"
}
}
},
{
"$addFields": {
"score": {
"$meta": "textScore"
}
}
},
{
"$skip": 0
},
{
"$limit": 6
},
{
"$lookup": {
"from": "products",
"localField": "userId",
"foreignField": "userId",
"as": "products"
}
},
{
$unwind: {
"path":"$products",
"preserveNullAndEmptyArrays":true
}
},
{
"$lookup": {
"from": "orders",
"let": {
"products": "$products"
},
"pipeline": [
{
"$match": {
"$expr": {
"$and": [
{
"$eq": [
"$productId",
"$$products.productId"
]
},
{
"$eq": [
"$orderType",
"Cash"
]
}
]
}
}
}
],
"as": "orders"
}
},
{
$unwind: {
"path":"$orders"
"preserveNullAndEmptyArrays":true
}
},
{
$group: {
_id: "$_id",
name: {
$first: "$name"
},
userId: {
$first: "$userId"
},
products: {
$addToSet: "$products"
},
orders: {
$addToSet: "$orders"
},
score: {
$first: "$score"
}
}
},
{
$sort: {
"score": -1
}
}
]);
To get more information on unwind, please check https://docs.mongodb.com/manual/reference/operator/aggregation/unwind/

How to check if an element exists in a document and return true or false depending on it?

I have an aggregation query in which I use $lookup to get data from other collections. But I cannot understand how do I get a boolean value if a $match is found.
Schema
const likesSchema = new mongoose.Schema({
user: {
id: {
type: String,
required: true,
},
name: {
type: String,
required: true,
},
},
storyID: {
type: String,
required: true,
}
}, {
timestamps: true
});
Complete Query
const user_id = req.authorizedUser.sub;
const stories = await Story.aggregate([
{
$lookup: {
from: "comments",
localField: "storyID",
foreignField: "storyID",
as: "comments"
},
},
{
$lookup: {
from: "likes",
let: {storyID: "$storyID"},
pipeline: [
{
$match: {
$expr: { $eq: ["$$storyID", "$storyID"] }
}
},
{
$facet: {
"total": [{ $count: "count" }],
"byMe": [{
$match: {
$expr: { $eq: ["$user.id", user_id] } // Need boolean value if found/ not found
}
}]
}
}
],
as: "likes"
}
},
Snippet of Response
"likes": [
{
"total": [
{
"count": 2
}
],
"byMe": [
{
"_id": "5d04fe8e982bb50bbcbd2b48",
"user": {
"id": "63p6PpPyOh",
"name": "Ayan Dey"
},
"storyID": "b0g5GA6ZJFKkJcnJlp6w8qGR",
"createdAt": "2019-06-15T14:19:58.531Z",
"updatedAt": "2019-06-15T14:19:58.531Z",
"__v": 0
}
]
}
]
Required Response
"likes": {
"total": 2,
"byMe": true
}
You can use below aggregation
{ "$lookup": {
"from": "likes",
"let": { "storyID": "$storyID" },
"pipeline": [
{ "$match": { "$expr": { "$eq": ["$$storyID", "$storyID"] }}}
],
"as": "likes1"
}},
{ "$addFields": {
"likes.total": { "$size": "$likes1" },
"likes.byMe": { "$ne": [{ "$indexOfArray": ["$likes1.user.id", user_id] }, -1] }
}},
{ "$project": { "likes1": 0 }}
Or
{ "$lookup": {
"from": "likes",
"let": { "storyID": "$storyID" },
"pipeline": [
{ "$match": { "$expr": { "$eq": ["$$storyID", "$storyID"] }}},
{ "$facet": {
"total": [{ "$count": "count" }],
"byMe": [{ "$match": { "$expr": { "$eq": ["$user.id", user_id] }}}]
}}
{ "$project": {
"total": {
"$ifNull": [{ "$arrayElemAt": ["$total.count", 0] }, 0 ]
},
"byMe": { "$ne": [{ "$size": "$byMe" }, 0] }
}}
],
"as": "likes"
}},
{ "$unwind": "$likes" }