I am new in using MongoDB's aggregation framework and here I have below a schema:
var bookSoldSchema = new Schema({
buyer:{
type: Number
}
book: {
type: Schema.Types.ObjectId, ref: 'Book'
}
});
var bookSchema = new Schema({
bookName: { type: String },
categories: [{ type: Schema.Types.ObjectId, ref: 'BookCategory'}],
})
Each book has multiple categories, I would like to show the top 5 best seller categories and each category I need show top 3 books which where sold most, and I also need show the category name, the results which I need is:
[
{
categoryId: xxx,
categoryName:xxx,
top3books:[
{bookId:xxx, bookName:xxx},
{bookId:xxx, bookName:xxx},
{bookId:xxx, bookName:xxx},
]
},
{
categoryId: xxx,
categoryName:xxx,
top3books:[
{bookId:xxx, bookName:xxx},
{bookId:xxx, bookName:xxx},
{bookId:xxx, bookName:xxx},
]
}
]
How can I go about this in MongoDB?
You can run the following aggregation pipeline which assumes you have a model BookSold that uses the bookSoldSchema above, books and bookcategories underlying collections:
BookSold.aggregate([
{
"$lookup": {
"from": "books",
"localField": "book",
"foreignField": "_id",
"as": "book_join"
}
},
{ "$unwind": "$book_join" },
{ "$unwind": "$book_join.categories" },
{
"$lookup": {
"from": "bookcategories",
"localField": "book_join.categories",
"foreignField": "_id",
"as": "categories"
}
},
{ "$unwind": "$categories" },
{ "$sort": { "categories.name": 1, "buyer": -1 } },
{
"$group": {
"_id": "$categories.name",
"categoryId": { "$first": "$categories._id" },
"buyer": { "$first": "$buyer" },
"books": {
"$push": {
"bookId": "$book_join._id",
"bookName": "$book_join.bookName"
}
}
}
},
{ "$sort": { "buyer": -1 } },
{ "$limit": 5 },
{
"$project": {
"_id": 0,
"categoryId": 1,
"categoryName": "$_id",
"top3books": { "$slice": [ "$books", 3 ] }
}
}
], function(err, result) {
if (err) handleError(err);
console.log(JSON.stringify(result, null, 4));
})
Related
So I got the following data:
Users collection
{
_id: ObjectId("62a2a0422ec90fea68390aaa"),
name: 'Robert Yamashita',
username: 'robyama',
email: 'robert.yamashita#rocketmail.com',
},
{
_id: ObjectId("62a2a0452ec90fea68390aad"),
name: 'Charles X',
username: 'cvx',
email: 'charles.xxx#rocketmail.com',
}
Points collection
{
userId: ObjectId("62a2a0422ec90fea68390aaa"),
action: 'Liked a post',
points: 10,
}
{
userId: ObjectId("62a2a0422ec90fea68390aaa"),
action: 'Liked a post',
points: 10,
}
{
userId: ObjectId("62a2a0452ec90fea68390aad"),
action: 'Liked a comment',
points: 5,
}
I created a pipeline to get the total points of username robyama using the following query:
db.users.aggregate([
{ $match: { username: 'robyama' } },
{
$lookup: {
from: 'points',
localField: '_id',
foreignField: 'user',
as: 'userPoints'
}
},
{
$unwind: '$userPoints'
},
{
$group: {
_id: {
name: '$name',
email: '$email',
username: '$username',
},
count: { $sum: '$userPoints.points' }
}
}
]);
I got the following result:
{
"_id": {
"name": "Robert Yamashita",
"email": "robert.yamashita#rocketmail.com",
"username": "robyama",
},
"count": 20
}
This is exactly what I needed but I wanted to add a ranking field to the returned query since Robert has 20 points and Charles only has 5. So ideally I want the result to be this:
{
"_id": {
"name": "Robert Yamashita",
"email": "robert.yamashita#rocketmail.com",
"username": "robyama",
},
"count": 20
"rank": 1
}
What should I add to my pipeline to get the above output? Any help would be greatly appreciated!
Here's another way to do it. There's only one "$lookup" with one embedded "$group" so it should be fairly efficient. The "$project" seems a bit contrived, but it gives the output in the format you want.
db.users.aggregate([
{
"$match": {
"username": "robyama"
}
},
{
"$lookup": {
"from": "points",
"as": "sortedPoints",
"pipeline": [
{
"$group": {
"_id": "$userId",
"count": {"$sum": "$points"}
}
},
{"$sort": {"count": -1}}
]
}
},
{
"$unwind": {
"path": "$sortedPoints",
"includeArrayIndex": "idx"
}
},
{
"$match": {
"$expr": {
"$eq": ["$_id", "$sortedPoints._id"]
}
}
},
{
"$project": {
"_id": {
"name": "$name",
"username": "$username",
"email": "$email"
},
"count": "$sortedPoints.count",
"rank": {
"$add": ["$idx", 1]
}
}
}
])
Try it on mongoplayground.net.
Well, this is one way of doing it.
Perform join using $lookup and calculate counts for each user.
Sort the elements by counts in desc order.
Group documents by _id as NULL and push them all in an array.
Unwind the array, along with getting row numbers.
Find your required document and calculate the rank using row number.
db.users.aggregate([
{
$lookup: {
from: "points",
localField: "_id",
foreignField: "userId",
as: "userPoints"
}
},
{
$unwind: "$userPoints"
},
{
$group: {
_id: {
name: "$name",
email: "$email",
username: "$username",
},
count: {
$sum: "$userPoints.points"
}
}
},
{
"$sort": {
count: -1
}
},
{
"$group": {
"_id": null,
"docs": {
"$push": "$$ROOT",
}
}
},
{
"$unwind": {
path: "$docs",
includeArrayIndex: "rownum"
}
},
{
"$match": {
"docs._id.username": "robyama"
}
},
{
"$addFields": {
"docs.rank": {
"$add": [
"$rownum",
1
]
}
}
},
{
"$replaceRoot": {
"newRoot": "$docs"
}
}
])
This is the playground link.
I have two collections.
Collection 1 is like an account.
Collection 2 creates a unique association between a user and an account
I am trying to return the accounts for which the user has no association
Collection1 schema
const Collection1Schema = new Schema({
name: { type: String, required: true },
});
Collection1 data
[
{
"_id": "61cf8452fca008360872c9cd",
"name": "Aff 2"
},
{
"_id": "61cf845ffca008360872c9d0",
"name": "AFF 1"
},
{
"_id": "61cf8468fca008360872c9d3",
"name": "Aff 3"
}
]
Collection2 schema
const Collection2Schema = new Schema({
userID: { type: Schema.Types.ObjectId, required: true },
col_1_ID: { type: Schema.Types.ObjectId, required: true },
});
Collection2 data
[
{
"_id": "61e05bb5fe1d8327d4c73663",
"userID": "61cf82dac828bd519cfd38ca",
"col_1_ID": "61cf845ffca008360872c9d0"
},
{
"_id": "61e05c14fe1d8327d4c7367d",
"userID": "61cf82dac828bd519cfd38ca",
"col_1_ID": "61cf8468fca008360872c9d3"
},
{
"_id": "61e05ca0fe1d8327d4c73695",
"userID": "61e05906246ccc41d4ebd30f",
"col_1_ID": "61cf8452fca008360872c9cd"
}
]
This is what I have so far... but it does not return what the user is NOT apart of
I am using Collection2 as the basis in the pipeline
[
{
'$match': {
'userID': new ObjectId('61cf82dac828bd519cfd38ca')
}
}, {
'$lookup': {
'from': 'Collection1',
'localField': 'col_1_ID',
'foreignField': '_id',
'as': 'aa'
}
}, {
'$unwind': {
'path': '$aa',
'preserveNullAndEmptyArrays': true
}
}
]
What I would like to return is all the collection 1 documents ( where userIdD = '61cf82dac828bd519cfd38ca') is NOT associated in collection 2 ... like this :
[
{
"_id": "61cf8452fca008360872c9cd",
"name": "Aff 2"
}
]
UPDATE 1
Here is a playground where another user has joined another account, so the pipeline does not return "Aff 2" like expected
https://mongoplayground.net/p/W6W88_2MaI3
UPDATE 2
Here is a playground that almost does what I want... it's returning duplication "AFF 2" entries.
https://mongoplayground.net/p/nTI3MKNPEmD
try the inversing lookup
https://mongoplayground.net/p/hXAYyv8X461
db.Collection1.aggregate([
{
"$lookup": {
"from": "Collection2",
"localField": "_id",
"foreignField": "col_1_ID",
"as": "joined_docs"
}
},
{
$unwind: {
"path": "$joined_docs"
}
},
{
$match: {
"joined_docs.userID": {
$ne: "61cf82dac828bd519cfd38ca"
}
}
},
{
$project: {
"joined_docs": 0
}
}
])
ANSWER:
after messing around with several mongo playgrounds and digging into a few different pipeline attributes... here is what works:
https://mongoplayground.net/p/xbZeRfVcrZq
Data:
db={
"Collection1": [
{
"_id": "61cf8452fca008360872c9cd",
"name": "Aff 2"
},
{
"_id": "61cf845ffca008360872c9d0",
"name": "AFF 1"
},
{
"_id": "61cf8468fca008360872c9d3",
"name": "Aff 3"
}
],
"Collection2": [
{
"_id": "61e05bb5fe1d8327d4c73663",
"userID": "61cf82dac828bd519cfd38ca",
"col_1_ID": "61cf845ffca008360872c9d0"
},
{
"_id": "61e05c14fe1d8327d4c7367d",
"userID": "61cf82dac828bd519cfd38ca",
"col_1_ID": "61cf8468fca008360872c9d3"
},
{
"_id": "61e05ca0fe1d8327d4c73695",
"userID": "61e05906246ccc41d4ebd30f",
"col_1_ID": "61cf8452fca008360872c9cd"
},
{
"_id": "61e05c14fe1d8327d4c73600",
"userID": "61cf82dac828bd519cfd3111",
"col_1_ID": "61cf8468fca008360872c9d3"
},
{
"_id": "61e05c14fe1d8327d4c73601",
"userID": "61cf82dac828bd519cfd3112",
"col_1_ID": "61cf8452fca008360872c9cd"
},
]
}
Pipeline:
db.Collection1.aggregate([
{
"$lookup": {
"from": "Collection2",
"localField": "_id",
"foreignField": "col_1_ID",
"as": "joined_docs"
}
},
{
$match: {
"joined_docs.userID": {
$ne: "61cf82dac828bd519cfd38ca"
}
}
},
{
$unwind: {
"path": "$joined_docs",
}
},
{
$group: {
_id: "$_id",
"name": {
"$first": "$name"
},
}
}
])
result:
[
{
"_id": "61cf8452fca008360872c9cd",
"name": "Aff 2"
}
]
try this instead:
https://mongoplayground.net/p/HDm2sbdvH88
db.Collection1.aggregate([
{
"$lookup": {
"from": "Collection2",
"localField": "_id",
"foreignField": "col_1_ID",
"as": "joined_docs"
}
},
{
$unwind: {
"path": "$joined_docs"
}
},
{
$group: {
_id: {
account_id: "$_id",
account_name: "$name",
},
user_ids: {
$push: {
"userID": "$joined_docs.userID"
}
}
}
},
{
$match: {
"user_ids.userID": {
$nin: [
"61cf82dac828bd519cfd38ca"
]
}
}
},
{
$project: {
user_ids: 0
}
}
])
I have three collections users, products and orders , orders type has two possible values "Cash" and "Online". One users can have single/multiple products and products have none/single/multiple orders. I want to text search on users collection on name. Now I want to write a query which will return all matching users on text search highest text score first, it might be possible one user's name is returning top score but don't have any products and orders.
I have written a query but it's not returning users who has text score highest but don't have any products/orders. It's only returning users who has record present in all three collections. And also performance of this query is not great taking long time if a user has lot of products for example more than 3000 products. Any help appreciated.
db.users.aggregate(
[
{
"$match": {
"$text": {
"$search": "john"
}
}
},
{
"$addFields": {
"score": {
"$meta": "textScore"
}
}
},
{
"$sort": {
"Score": {
"$meta": "textScore"
}
}
},
{
"$skip": 0
},
{
"$limit": 6
},
{
"$lookup": {
"from": "products",
"localField": "userId",
"foreignField": "userId",
"as": "products"
}
},
{ $unwind: '$products' },
{
"$lookup": {
"from": "orders",
"let": {
"products": "$products"
},
"pipeline": [
{
"$match": {
"$expr": {
"$and": [
{
"$in": [
"$productId",
["$$products.productId"]
]
},
{
"$eq": [
"$orderType",
"Cash"
]
}
]
}
}
}
],
"as": "orders"
}
},
{ $unwind: 'orders' },
{
$group: {
_id: "$_id",
name: { $first: "$name" },
userId: { $first: "$userId" },
products: { $addToSet: "$products" },
orders: { $addToSet: "$orders" },
score: { $first: "$score" },
}
},
{ $sort: { "score": -1 } }
]
);
Issue:
Every lookup produces an array which holds the matched documents. When no documents are found, the array would be empty. Unwinding that empty array would break the pipeline immediately. That's the reason, we are not getting user records with no products/orders. We would need to preserve such arrays so that the pipeline execution can continue.
Improvements:
In orders lookup, the $eq can be used instead of $in, as we already
unwinded the products array and each document now contains only
single productId
Create an index on userId in products collection to make the query more efficient
Following is the updated query:
db.users.aggregate([
{
"$match": {
"$text": {
"$search": "john"
}
}
},
{
"$addFields": {
"score": {
"$meta": "textScore"
}
}
},
{
"$skip": 0
},
{
"$limit": 6
},
{
"$lookup": {
"from": "products",
"localField": "userId",
"foreignField": "userId",
"as": "products"
}
},
{
$unwind: {
"path":"$products",
"preserveNullAndEmptyArrays":true
}
},
{
"$lookup": {
"from": "orders",
"let": {
"products": "$products"
},
"pipeline": [
{
"$match": {
"$expr": {
"$and": [
{
"$eq": [
"$productId",
"$$products.productId"
]
},
{
"$eq": [
"$orderType",
"Cash"
]
}
]
}
}
}
],
"as": "orders"
}
},
{
$unwind: {
"path":"$orders"
"preserveNullAndEmptyArrays":true
}
},
{
$group: {
_id: "$_id",
name: {
$first: "$name"
},
userId: {
$first: "$userId"
},
products: {
$addToSet: "$products"
},
orders: {
$addToSet: "$orders"
},
score: {
$first: "$score"
}
}
},
{
$sort: {
"score": -1
}
}
]);
To get more information on unwind, please check https://docs.mongodb.com/manual/reference/operator/aggregation/unwind/
I have a collection of news articles in mongodb and another collection that maps a user's ID to an article's ID and has a "like" state, which can be either "like" or "dislike" or "none" if no entry with the user and article exists.
Here are both schemas:
// news collection
const articleSchema = new Schema({
title: String,
content: String,
})
// newslikes collection
const articleLikeSchema = new Schema({
user: { type: Schema.Types.ObjectId, ref: 'Client' },
article: { type: Schema.Types.ObjectId, ref: 'News' },
state: { type: String, enum: ['like', 'dislike'] }
})
I'm trying to write an aggregation query which joins these two collection using $lookup and then finds the state of a specific user's like on all articles. This is what I have so far:
const results = await News.aggregate([
{ $match: query },
{ $sort: { date: -1 } },
{ $skip: page * pageLength },
{ $limit: pageLength },
{ $lookup: {
from: 'newslikes',
localField: '_id',
foreignField: 'article',
as: 'likes'
} },
{ $project: {
title: 1,
likes: 1,
content: 1,
// numLikes: { $size: '$likes' }
userLikeStatus: {
$filter: {
input: '$likes',
as: 'like',
cond: {
$eq: ['$user._id', '5ccf13adcec5e6d84f940417']
}
}
}
} }
])
However this is not working. Is what I'm doing even the correct approach or is there a better way to do this rather than $filter?
You can use below aggregation with mongodb 3.6 and above
News.aggregate([
{ "$match": query },
{ "$sort": { "date": -1 } },
{ "$skip": page * pageLength },
{ "$limit": pageLength },
{ "$lookup": {
"from": "newslikes",
"let": { "articleId": "$_id" },
"pipeline": [
{ "$match": {
"$expr": { "$eq": [ "$article", "$$articleId" ] },
"user": mongoose.Types.ObjectId("5ccf13adcec5e6d84f940417")
}}
],
"as": "likes"
}},
{ "$addFields": {
"userLikeStatus": { "$ifNull": [{ "$arrayElemAt": ["$likes.state", 0] }, "none"] }
}}
])
Or the way you are trying
Basically here you need to put $cond for the field userLikeStatus i.e if the $size of the array after $filter is $gte 1 then user likes it else does not.
News.aggregate([
{ "$match": query },
{ "$sort": { "date": -1 } },
{ "$skip": page * pageLength },
{ "$limit": pageLength },
{ "$lookup": {
"from": "newslikes",
"localField": "_id",
"foreignField": "article",
"as": "likes"
}},
{ "$project": {
"title": 1,
"likes": 1,
"content": 1,
// numLikes: { $size: '$likes' }
"userLikeStatus": {
"$let": {
"vars": {
"array": {
"$filter": {
"input": "$likes",
"as": "like",
"cond": { "$eq": ["$$like.user", mongoose.Types.ObjectId("5ccf13adcec5e6d84f940417")] }
}
}
},
"in": {
"$ifNull": [{ "$arrayElemAt": ["$$array.state", 0] }, "none"]
}
}
}
}}
])
Overview
A few devices are collecting data and sending it to a Node/MongoDb endpoint. Then, the user would use an endpoint to get all that data into a json.
Models
Device Model
const deviceSchema = new Schema({
group: { type: Schema.Types.ObjectId, ref: 'Group' },
deviceId: { type: String, unique: true },
name: String,
notes: String,
pac: String,
endCertificate: String,
lat: Number,
lng: Number
});
Message Model
const messageSchema = new Schema({
deviceId: { type: String, required: true },
raw: { type: String, required: true },
receivedAt: { type: Date, default: Date.now() }
});
One device can have N messages
Problem to solve
I want to get a json that has all the devices and have an array containing
all the messages that belongs to that device.
[
{
"id":"5b86c323e95759603ad7ea54",
"deviceId":"Device 01",
"name":"Device bla",
"notes":"...",
"pac":"pac",
"lat":-20.817396,
"endCertificate":"cert",
"lng":-27.031321,
"messages":[
{
"id":"5b869a42e0b94041b5f21eed",
"deviceId":"Device 01",
"raw":"1111111",
"receivedAt":"2018-08-29T13:04:43.641Z",
"__v":0
},
{
"id":"5b8c782fef4f8e98783f6f35",
"deviceId":"Device 01",
"raw":"2222222",
"receivedAt":"2018-09-01T09:04:43.641Z",
"__v":0
},
{
"id":"5b8c7840ef4f8e98783f6f3e",
"deviceId":"Device 01",
"raw":"3333333",
"receivedAt":"2018-09-02T09:04:43.641Z",
"__v":0
}
]
},
{
"id":"5b8c28ec38c51813cd159bac",
"deviceId":"Device 02",
"name":"Device ...",
"notes":"...",
"lat":-27.812296,
"lng":-27.073314,
"__v":0,
"messages":[
{
"id":"5b8c784cef4f8e98783f6f43",
"deviceId":"Device 02",
"raw":"1111111",
"receivedAt":"2018-09-01T09:04:43.641Z",
"__v":0
}
]
}
]
My solution
To get a json as the one above I have:
const [results, itemCount] = await Promise.all([
Device.aggregate([
{ $match: {} },
{
$lookup: {
from: 'messageschemas',
localField: 'deviceId',
foreignField: 'deviceId',
as: 'messages'
}
}
]).limit(req.query.limit).skip(req.skip)
.exec(),
Device.countDocuments(match)
]);
res.setHeader('X-Total-Count', itemCount);
res.send(results);
My question
How can I order the messages I get from the $lookup into messages[] by
'receivedAt'?
You need to $unwind the messages array and can apply $sort with receivedAt field and finally $group to rollback again into the array.
Device.aggregate([
{ "$match": { }},
{ "$lookup": {
"from": "messageschemas",
"localField": "deviceId",
"foreignField": "deviceId",
"as": "messages"
}},
{ "$unwind": "$messages" },
{ "$sort": { "messages.receivedAt": 1 }},
{ "$group": {
"_id": "$_id",
"messages": { "$push": "$messages" }
}}
])
Which can be simply done with the 3.6 $lookup syntax
Device.aggregate([
{ "$match": { }},
{ "$lookup": {
"from": "messageschemas",
"let": { "deviceId": "$deviceId" },
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$deviceId", "$$deviceId" ] } } },
{ "$sort": { "receivedAt": 1 }}
],
"as": "messages"
}}
])