MongoDb: Getting $avg in aggregate for complex data - mongodb

I'm trying to get an average rating in my Mongo aggregate and am having trouble accessing the nested array. I've gotten my aggregation to give the following array. I'm trying to have city_reviews return an array of averages.
[
{
"_id": "Dallas",
"city_reviews": [
//arrays of restaurant objects that include the rating
//I would like to get an average of the rating in each review, so these arrays will be numbers (averages)
[ {
"_id": "5b7ead6d106f0553d8807276",
"created": "2018-08-23T12:41:29.791Z",
"text": "Crackin good place. ",
"rating": 4,
"store": "5b7d67d5356114089909e58d",
"author": "5b7d675e356114089909e58b",
"__v": 0
}, {review2}, {review3}]
[{review1}, {review2}, {review3}],
[{review1}. {review2}],
[{review1}, {review2}, {review3}, {review4}],
[]
]
},
{
"_id": "Houston",
"city_reviews": [
// arrays of restaurants
[{review1}, {review2}, {review3}],
[{review1}, {review2}, {review3}],
[{review1}, {review2}, {review3}, {review4}],
[],
[]
]
}
]
I would like to do an aggregation on this that returns an array of averages within the city_reviews, like this:
{
"_id": "Dallas",
"city_reviews": [
// arrays of rating averages
[4.7],
[4.3],
[3.4],
[],
[]
]
}
Here's what I've tried. It's giving me back averageRating of null, because $city_reviews is an array of object and I'm not telling it to go deep enough to capture the rating key.
return this.aggregate([
{ $lookup: { from: 'reviews', localField: '_id', foreignField: 'store', as:
'reviews' }},
{$group: {_id: '$city', city_reviews: { $push : '$reviews'}}},
{ $project: {
averageRating: { $avg: '$city_reviews'}
}}
])
Is there a way to work with this line so I can return arrays of averages instead of the full review objects.
averageRating: { $avg: '$city_reviews'}
EDIT: Was asked for entire pipeline.
return this.aggregate([
{ $lookup: { from: 'reviews', localField: '_id', foreignField: 'store', as: 'reviews' }},
{$group: {
_id: '$city',
city_reviews: { $push : '$reviews'}}
},
{ $project: {
photo: '$$ROOT.photo',
name: '$$ROOT.name',
reviews: '$$ROOT.reviews',
slug: '$$ROOT.slug',
city: '$$ROOT.city',
"averageRatingIndex":{
"$map":{
"input":"$city_reviews",
"in":[{"$avg":"$$this.rating"}]
}
},
}
},
{ $sort: { averageRating: -1 }},
{ $limit: 5 }
])
My first query was to connect two models together:
{ $lookup: { from: 'reviews', localField: '_id', foreignField: 'store', as: 'reviews' }},
Which resulted in this:
[ {
"_id": "5b7d67d5356114089909e58d",
"location": {},
"tags": [],
"created": "2018-08-22T13:23:23.224Z",
"name": "Lucia",
"description": "Great name",
"city": "Dallas",
"photo": "ab64b3e7-6207-41d8-a670-94315e4b23af.jpeg",
"author": "5b7d675e356114089909e58b",
"slug": "lucia",
"__v": 0,
"reviews": []
},
{..more object like above}
]
Then, I grouped them like this:
{$group: {
_id: '$city',
city_reviews: { $push : '$reviews'}}
}
This returned what my original question is about. Essentially, I just want to have a total average rating for each city. My accepted answer does answer my original question. I'm getting back this:
{
"_id": "Dallas",
"averageRatingIndex": [
[ 4.2 ],
[ 3.6666666666666665 ],
[ null ],
[ 3.2 ],
[ 5 ],
[ null ]
]
}
I've tried to use the $avg operator on this to return one, final average that I can display for each city, but I'm having trouble.

You can use $map to with $avg to output avg.
{"$project":{
"averageRating":{
"$map":{
"input":"$city_reviews",
"in":[{"$avg":"$$this.rating"}]
}
}
}}

With respect to your optimization request, I don't think there's a lot of room for improvement beyond the version that you already have. However, the following pipeline might be faster than your current solution because of the initial $group stage which should result in way less $lookups. I am not sure how MongoDB will optimize all of that internally so you might want to profile the two versions against a real data set.
db.getCollection('something').aggregate([{
$group: {
_id: '$city', // group by city
"averageRating": { $push: "$_id" } // create array of all encountered "_id"s per "city" bucket - we use the target field name to avoid creation of superfluous fields which would need to be removed from the output later on
}
}, {
$lookup: {
from: 'reviews',
let: { "averageRating": "$averageRating" }, // create a variable called "$$ids" which will hold the previously created array of "_id"s
pipeline: [{
$match: { $expr: { $in: [ "$store", "$$averageRating" ] } } // do the usual "joining"
}, {
$group: {
"_id": null, // group all found items into the same single bucket
"rating": { $avg: "$rating" }, // calculate the avg on a per "store" basis
}
}],
as: 'averageRating'
}
}, {
$sort: { "averageRating.rating": -1 }
}, {
$limit: 5
}, {
$addFields: { // beautification of the output only, technically not needed - we do this as the last stage in order to only do it for the max. of 5 documents that we're interested in
"averageRating": { // this is where we reuse the field we created in the first stage
$arrayElemAt: [ "$averageRating.rating", 0 ] // pull the first element inside the array outside of the array
}
}
}])
In fact, the "initial $group stage" approach could also be used in conjunction with #Veerams solution like this:
db.collection.aggregate([{
$group: {
_id: '$city', // group by city
"averageRating": { $push: "$_id" } // create array of all encountered "_id"s per "city" bucket - we use the target field name to avoid creation of superfluous fields which would need to be removed from the output later on
}
}, {
$lookup: {
from: 'reviews',
localField: 'averageRating',
foreignField: 'store',
as: 'averageRating'
},
}, {
$project: {
"averageRating": {
$avg: {
$map: {
input: "$averageRating",
in: { $avg: "$$this.rating" }
}
}
}
}
}, {
$sort: { averageRating: -1 }
}, {
$limit: 5
}])

Related

How to Group By and Count values inside multi layered array in mongoDB?

I have very complicated Document of MongoDB
For example: Order Document >>>
{
"_id": "62cdbae0421b250009acc329",
"cartitems": "62cdbaaf74c9c80009f5a4b2",
},
{
"_id": "62d27e192b254600099ae680",
"cartitems": "62d27d9d91568c0009866d23",
}
and cart Document >>>
{
"_id": "62cdbaaf74c9c80009f5a4b2",
"cartItems": [
{
"productCode": [
"NCK-1"
],
"Price": "56",
},
{
"productCode": [
"NCK-2"
],
"Price": "56",
}
],
},
{
"_id": "62d27d9d91568c0009866d23",
"cartItems": [
{
"productCode": [
"NCK-3"
],
"Price": "56",
},
{
"productCode": [
"NCK-1"
],
"Price": "56",
}
],
},
I want to join Order Document Order.cartitems with Cart._id and groupby ProductCode and Count Product Code and Sum Price i.e In total the result must look like
NCK-1 112
NCK-2 56
NCK-3 56
I tried the following code >>>
Order.aggregate([
{
$lookup: {
from: Cart.collection.name,
localField: 'cartitems',
foreignField: '_id',
as: 'cartitems',
},
},
{ $unwind: '$cartitems' },
{
$group: {
_id: '$cartitems.cartItems.productCode',
count: { $sum: '$cartitems.cartItems.Price' },
},
},
]);
I could not find the solution please guide me in solving this.
$lookup
$unwind
$unwind - Deconstruct the cartitems.cartItems array into multiple documents.
$group - Since there is only one item in cartitems.cartItems.productCode, can consider using $first to take the first value of the array. And you need to cast cartitems.cartItems.Price to number type before sum.
db.order.aggregate([
{
$lookup: {
from: "cart",
localField: "cartitems",
foreignField: "_id",
as: "cartitems"
}
},
{
$unwind: "$cartitems"
},
{
$unwind: "$cartitems.cartItems"
},
{
$group: {
_id: {
$first: "$cartitems.cartItems.productCode"
},
count: {
$sum: {
$toInt: "$cartitems.cartItems.Price"
}
}
}
}
])
Sample Mongo Playground

mongodb - get top items from a collection based on its usage count as a field in another collection

How to get a list of top ranked items based on their usage as a field in items of another collection?
Here is a mongodb playground explaining the scenario - https://mongoplayground.net/p/gTMm1JVv9uV
In the example below, category 245 is used twice and 276 is used once in the posts collection. The output will rank the categories based on their count of usage in posts
Note that the post collection only has the category id so looking up categories collection is necessary.
Based on this, the expected output is an array of category's text.
{
topCategories: ["category 245", "category 276"]
}
A sample data in the two collections is below:
db={
categories: [
{
"_id": 231,
"text": "category 231",
},
{
"_id": 245,
"text": "category 245",
},
{
"_id": 276,
"text": "category 276",
}
],
posts: [
{
"_id": 71,
category: "245"
},
{
"_id": 72,
category: "276"
},
{
"_id": 74,
category: "245"
}
]
}
I have used addToSet in earlier query but found out that it does not maintain the order. I have replaced it with push operator.
db.posts.aggregate([
{
$addFields: {
category: {
$toInt: "$category"
},
}
},
{
$lookup: {
from: "categories",
as: "category",
localField: "category",
foreignField: "_id"
}
},
{
"$unwind": "$category"
},
{
"$group": {
"_id": "$category._id",
"count": {
"$sum": 1
},
"category": {
"$first": "$category"
}
}
},
{
"$sort": {
"count": -1
}
},
{
"$project": {
categoriesText: "$category.text"
}
},
{
"$group": {
"_id": null,
"categoriesText": {
"$push": "$categoriesText"
}
}
},
{
"$project": {
_id: 0,
topCategories: "$categoriesText"
}
}
])
New Playground
You can try,
$group by category and convert to integer using $toInt, get count of total duplicate categories in count using $sum
$loopup with categories collection
$sort by count field descending order
$group by null for combine categories in a array field, get first element from category array using $arrayElemAt and push it in topCategories field
db.posts.aggregate([
{
$group: {
_id: { $toInt: "$category" },
count: { $sum: 1 }
}
},
{
$lookup: {
from: "categories",
as: "category",
localField: "_id",
foreignField: "_id"
}
},
{ $sort: { count: -1 } },
{
$group: {
_id: null,
topCategories: {
$push: { $arrayElemAt: ["$category.text", 0] }
}
}
}
])
Playground

Adding remaining filed in $project after adding lookups in mongoose

I'm doing a $lookup from an _id in Order schema, and its working as expected. But in $project how to add remaining keys. I have added my code below.
Product Collection:
{
"_id": "54759eb3c090d83494e2d804",
"product_name": "sample product",
"image": "default.png",
"price": 55,
"discount": 5,
}
Order list Collection
{
"user_name": "sample1",
"product_list":[
{
"product_id": "54759eb3c090d83494e2d804"
"quantity": 5
}
]
}
lookups
[
{
from: 'product',
localField: 'product_list.product_id',
foreignField: '_id',
as: 'product_list.product_id',
model: 'ProductModel',
},
],
$Project
{
user_name: true,
product_list: {
$map: {
input: '$product_list.product_id',
as: 'product',
in: {
product_name: '$$product.product_name',
},
},
},
}
Current Result:
{
"user_name": "sample1",
"product_list":[
"product_id":{
"product_name": "sample product"
}
]
}
In this current result, the quantity field is missing. How to add in $project?. The expected result shown below
Expected Result:
{
"user_name": "sample1",
"product_list":[
{
"product_id": {
"product_name": "sample product"
}
"quantity": 5
}
]
}
You need to do $unwind before $lookup, because it will not work directly in array fields, and here you don't need $map inside $project,
$unwind product_list deconstruct array
db.order.aggregate([
{ $unwind: "$product_list" },
$lookup with pipeline, this will allow to use pipeline inside lookup, here $project to required fields
{
$lookup: {
from: "product",
as: "product_list.product_id",
let: { product_id: "$product_list.product_id" },
pipeline: [
{
$match: {
$expr: { $eq: ["$$product_id", "$_id"] }
}
},
{
$project: {
_id: 0,
product_name: 1
}
}
]
}
},
$unwind with path product_list.product_id because you need it as object
{ $unwind: { path: "$product_list.product_id" } },
$group by _id re-construct your product_list array
{
$group: {
_id: "$_id",
user_name: { $first: "$user_name" },
product_list: { $push: "$product_list" }
}
}
])
Playground

perform lookup on array from another collection in MongoDB

I have a collection of Orders. each order has a list of Items, and each Item has catalog_id, which is an ObjectId pointing to the Catalogs collection.
I need an aggregate query that will retrieve certain orders - each order with its Items in extended fashion including the Catalog name and SKU. i.e:
Original data structure:
Orders: [{
_id : ObjectId('ord1'),
items : [{
catalog_id: ObjectId('xyz1'),
qty: 5
},
{
catalog_id: ObjectId('xyz2'),
qty: 3
}]
Catalogs: [{
_id : ObjectId('xyz1')
name: 'my catalog name',
SKU: 'XxYxZx1'
},{
_id : ObjectId('xyz2')
name: 'my other catalog name',
SKU: 'XxYxZx2'
}
]
ideal outcome would be:
Orders: [{
_id : ObjectId('ord1'),
items : [{
catalog_id: ObjectId('xyz1'),
catalog_name: 'my catalog name',
catalog_SKU: 'XxYxZx1' ,
qty: 5
},
{
catalog_id: ObjectId('xyz2'),
catalog_name: 'my other catalog name',
catalog_SKU: 'XxYxZx2' ,
qty: 3
}
]
What I did so far was:
db.orders.aggregate(
[
{
$match: {merchant_order_id: 'NIM333'}
},
{
$lookup: {
from: "catalogs",
//localField: 'items.catalog_id',
//foreignField: '_id',
let: { 'catalogId' : 'items.catalog_id' },
pipeline: [
{
$match : {$expr:{$eq:["$catalogs._id", "$$catalogId"]}}
},
{
$project: {"name": 1, "merchant_SKU": 1 }
}
],
as: "items_ex"
},
},
])
but items_ex comes out empty for some reason i cannot understand.
You need to first $unwind the items and reconstruct the array back using $group to match the exact position of qty with the catalogs_id inside the items array
db.orders.aggregate([
{ "$match": { "merchant_order_id": "NIM333" }},
{ "$unwind": "$items" },
{ "$lookup": {
"from": "catalogs",
"let": { "catalogId": "$items.catalog_id", "qty": "$items.qty" },
"pipeline": [
{ "$match": { "$expr": { "$eq": ["$_id", "$$catalogId"] } }},
{ "$project": { "name": 1, "merchant_SKU": 1, "qty": "$$qty" }}
],
"as": "items"
}},
{ "$unwind": "$items" },
{ "$group": {
"_id": "$_id",
"items": { "$push": "$items" },
"data": { "$first": "$$ROOT" }
}},
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": ["$data", { "items": "$items" }]
}
}}
])
MongoPlayground
You're missing a dollar sign when you define your pipeline variable. There should be:
let: { 'catalogId' : '$items.catalog_id' },
and also this expression returns an array to you need $in instead of $eq:
{
$lookup: {
from: "catalogs",
let: { 'catalogId' : 'items.catalog_id' },
pipeline: [
{
$match : {$expr:{$in:["$_id", "$$catalogId"]}}
},
{
$project: {"name": 1, "merchant_SKU": 1 }
}
],
as: "items_ex"
}
}
Mongo Playground

How to resolve the many-to-many relation keeping the order of ID array in mongoDB

I have two collections posts and tags on mongoDB.
There is a many-to-many relationship between these collections.
A post can belong to some tags, and a tag can contain some posts.
I am looking for an efficient query method to join posts to tags keeping the order of postIds.
If the data schema is inappropriate, I can change it.
The mongoDB version is 3.6.5
Sample data
db.posts.insertMany([
{ _id: 'post001', title: 'this is post001' },
{ _id: 'post002', title: 'this is post002' },
{ _id: 'post003', title: 'this is post003' }
])
db.tags.insertMany([
{ _id: 'tag001', postIds: ['post003', 'post001', 'post002'] }
])
Desired result
{
"_id": "tag001",
"postIds": [ "post003", "post001", "post002" ],
"posts": [
{ "_id": "post003", "title": "this is post003" },
{ "_id": "post001", "title": "this is post001" },
{ "_id": "post002", "title": "this is post002" }
]
}
What I tried
I tried a query which use $lookup.
db.tags.aggregate([
{ $lookup: {
from: 'posts',
localField: 'postIds',
foreignField: '_id',
as: 'posts'
}}
])
However I got a result which is different from I want.
{
"_id": "tag001",
"postIds": [ "post003", "post001", "post002" ],
"posts": [
{ "_id": "post001", "title": "this is post001" },
{ "_id": "post002", "title": "this is post002" },
{ "_id": "post003", "title": "this is post003" }
]
}
In MongoDB you would attempt to model your data such that you avoid joins (as in $lookups) alltogether, e.g. by storing the tags alongside the posts.
db.posts.insertMany([
{ _id: 'post001', title: 'this is post001', tags: [ "tag001", "tag002" ] },
{ _id: 'post002', title: 'this is post002', tags: [ "tag001" ] },
{ _id: 'post003', title: 'this is post003', tags: [ "tag002" ] }
])
With this structure in place you could get the desired result like this:
db.posts.aggregate([{
$unwind: "$tags"
}, {
$group: {
_id: "$tags",
postsIds: {
$push: "$_id"
},
posts: {
$push: "$$ROOT"
}
}
}])
In this case, I would doubt that you even need the postIds field in the result as it would be contained in the posts array anyway.
You can use a combination of $map and $filter to re-order elements in the posts array in a projection stage:
db.tags.aggregate([
{ $lookup: {
from: 'posts',
localField: 'postIds',
foreignField: '_id',
as: 'posts'
} },
{ $project: {
_id: 1,
postIds: 1,
posts: { $map: {
input: "$postIds",
as: "postId",
in: {
$arrayElemAt: [ { $filter: {
input: "$posts",
as: "post",
cond: { $eq: ["$$post._id", "$$postId"] }
} }, 0 ]
}
} }
} }
])
The missing posts will be filled with null to keep index consistent with postIds.