MongoDB data aggregation assistance - mongodb

I have 'customers' collection with the following document:
{
id: 1,
name: 'Customer Name',
projects: [
{
id: 1000,
name: 'Project 1',
description: 'Project description',
instances: [10, 20],
},
{
id: 2000,
name: 'Project 2',
description: 'Project description',
instances: [30, 40, 10],
}
]
}
I have another collection 'instances' which looks like the following:
[
{
id: 10,
operatingSystem: 'Microsoft Windows 2012R2',
version: '3.1.5',
product: {
id: 100,
name: 'Product 1',
vendor: 'Vendor A',
},
},
{
id: 20,
operatingSystem: 'Microsoft Windows 2016',
version: '4.1.0',
product: {
id: 200,
name: 'Product 5',
vendor: 'Vendor B',
},
},
{
id: 30,
operatingSystem: 'Microsoft Windows 2019',
version: '3.0',
product: {
id: 300,
name: 'Product 2',
vendor: 'Vendor A',
},
},
{
id: 40,
operatingSystem: 'Linux',
version: '1.0',
product: {
id: 100,
name: 'Product 1',
vendor: 'Vendor A',
}
}
]
I'm trying to use the aggregation framework to make the results look like the following:
{
id: 1,
name: 'Customer Name',
projects: [
{
id: 1000,
name: 'Project 1',
description: 'Project description',
products: [
{
id: 100,
name: 'Product 1',
vendor: 'Vendor A',
instances: [
{
id: 10,
operatingSystem: 'Microsoft Windows 2012R2',
version: '3.1.5',
},
],
},
{
id: 200,
name: 'Product 5',
vendor: 'Vendor B',
instances: [
{
id: 20,
operatingSystem: 'Microsoft Windows 2016',
version: '4.1.0',
},
],
},
],
},
{
id: 2000,
name: 'Project 2',
description: 'Project description',
products: [
{
id: 300,
name: 'Product 2',
vendor: 'Vendor A',
instances: {
id: 30,
operatingSystem: 'Microsoft Windows 2019',
version: '3.0',
},
},
{
id: 100,
name: 'Product 1',
vendor: 'Vendor A',
instances: [
{
id: 40,
operatingSystem: 'Linux',
version: '1.0',
},
{
id: 10,
operatingSystem: 'Microsoft Windows 2012R2',
version: '3.1.5',
}
]
}
]
}
]
}
The current pipeline I managed to build is:
[{$match: {
_id: 1
}}, {$unwind: {
path: "$projects"
}
}, {$lookup: {
from: 'instances',
localField: 'projects.instances',
foreignField: '_id',
as: 'projects.instances'
}}, {$group: {
_id: "$projects.instances.product",
test: { "$push": "$$ROOT" }
}}, {$unwind: {
path: "$_id"
}}, {$unwind: {
path: "$test"
}}, {$project: {
_id: "$test._id",
name: "$test.name",
description: "$test.description",
projects: {
_id: "$test.projects._id",
name: "$test.projects.name",
description: "$test.projects.description",
products: {
_id: "$_id._id",
name: "$_id.name",
vendor: "$_id.vendor",
instances: "$test.projects.instances",
}
}
}}, {$group: {
_id: "$_id",
name: {"$first": "$name"},
projects: {
"$push": "$projects"
}
}}]
But I'm getting duplicates in the 'projects' array (if I'm having the same project with a different product, it will show twice instead of having a single project with 2 products in the products array
Will appreciate your help with finding the right pipeline stages to manipulate my results are expected

I didn't follow your pipeline as it was easier for me to just rewrite it from scratch but here's how I would do it, the obvious data structure manipulation concepts remain the same:
db.customers.aggregate([
{
$match: {
_id: 1
}
},
{
$unwind: "$projects"
},
{
$lookup: {
from: "instances",
let: {
instances: "$projects.instances"
},
pipeline: [
{
$match: {
$expr: {
$setIsSubset: [
[
"$id"
],
"$$instances"
]
}
}
}
],
as: "projects.instances"
}
},
{
$unwind: "$projects.instances"
},
{
$group: {
_id: {
id: "$_id",
project: "$projects.name",
product: "$projects.instances.product.id"
},
name: {
$first: "$name"
},
description: {
$first: "$projects.description"
},
product: {
$first: "$projects.instances.product"
},
instances: {
$push: {
id: "$projects.instances.id",
operatingSystem: "$projects.instances.operatingSystem",
version: "$projects.instances.version",
}
}
}
},
{
$group: {
_id: {
id: "$_id.id",
project: "$_id.project"
},
name: {
$first: "$name"
},
description: {
$first: "$description"
},
products: {
$push: {
$mergeObjects: [
"$product",
{
instances: "$instances"
}
]
}
}
}
},
{
$group: {
_id: "$_id.id",
name: {
$first: "$name"
},
projects: {
$push: {
name: "$project_name",
description: "$description",
products: "$products"
}
}
}
}
])
MongoPlayground

Related

$sortArray Issue (MongoDB)

I have a section of my aggregate query that I cannot get to work for the life of me. I am running 6.0. The section of the query with the issue looks like this:
$project: {
_id: 0,
games: {
$sortArray: {
input: '$games',
sortBy: { date: -1 }
}
},
total: { $size: '$games' }
}
For some reason the $sortArray function is not working for me in that I am getting no output from the query at all. The query, however, will work if I remove this sort like this:
$project: {
_id: 0,
games: 1,
total: { $size: '$games' }
}
After studying the $sortArray documentation, I believe that I am implementing this into the pipeline correctly. Can anyone identify what the issue is? Here is the full pipeline for context:
const pipeline = [
{
$match: { _id: ObjectID( user_id ) }
},
{
$lookup: {
from: 'game',
localField: '_id',
foreignField: 'player_id',
pipeline: pipelineFilters,
as: 'owned_games'
}
},
{
$lookup: {
from: 'viewers',
pipeline: [
{ $match: { email: user_email } },
{
$lookup: {
from: 'games',
localField: 'game_id',
foreignField: '_id',
pipeline: pipelineFilters,
as: 'games'
}
},
{
$project: {
game: { $arrayElemAt: [ '$games', 0 ] }
}
},
{
$replaceRoot: {
newRoot: '$game'
}
}
],
as: 'viewing_games'
}
},
{
$project: {
games: {
$concatArrays: [ '$viewing_games', '$owned_games' ]
}
}
},
{
$project: {
_id: 0,
games: {
$sortArray: {
input: '$games',
sortBy: { date: -1 }
}
},
total: { $size: '$games' }
}
}
];
and example of the document structure right before the final $project:
{
_id: new ObjectId("6359ac2149c98388770fb2b3"),
games: [
{
_id: new ObjectId("63595544435af1b923d1bda1"),
name: 'game 1',
owner_id: new ObjectId("63595544435af1b923d1bd98"),
date: 2022-10-26T15:41:56.584Z,
status: 'draft',
createdAt: 2022-10-26T15:41:56.599Z,
updatedAt: 2022-10-26T15:41:56.599Z,
__v: 0
},
{
_id: new ObjectId("63595544435af1b923d1bd99"),
name: 'game 2',
owner_id: new ObjectId("63595544435af1b923d1bd8b"),
date: 2011-10-05T14:48:00.000Z,
status: 'draft',
createdAt: 2022-10-26T15:41:56.585Z,
updatedAt: 2022-10-26T15:41:56.585Z,
__v: 0
},
{
_id: new ObjectId("63595544435af1b923d1bd9b"),
name: 'game 3',
owner_id: new ObjectId("63595544435af1b923d1bd8b"),
date: 1990-01-01T01:22:00.000Z,
status: 'draft',
createdAt: 2022-10-26T15:41:56.588Z,
updatedAt: 2022-10-26T15:41:56.588Z,
__v: 0
},
{
_id: new ObjectId("63595544435af1b923d1bd9d"),
name: 'game 4',
owner_id: new ObjectId("63595544435af1b923d1bd8b"),
date: 2500-10-05T14:48:00.000Z,
status: 'draft',
createdAt: 2022-10-26T15:41:56.592Z,
updatedAt: 2022-10-26T15:41:56.592Z,
__v: 0
},
{
_id: new ObjectId("63595544435af1b923d1bd9f"),
name: 'game 5',
owner_id: new ObjectId("63595544435af1b923d1bd8b"),
date: 1995-12-25T01:22:00.000Z,
status: 'draft',
createdAt: 2022-10-26T15:41:56.595Z,
updatedAt: 2022-10-26T15:41:56.595Z,
__v: 0
}
]
}
We were using mongo:latest tag when launching our docker container which doesn't actually pull the latest mongo image...
Conclusion: assumptions are bad

MongoDB query to find top store from list of orders

I'm pretty new to Mongo. I have two collections that look as follows.
Order collection
[
{
id: 1,
price: 249,
store: 1,
status: true
},
{
id: 2,
price: 230,
store: 1,
status: true
},
{
id: 3,
price: 240,
store: 1,
status: true
},
{
id: 4,
price: 100,
store: 2,
status: true
},
{
id: 5,
price: 150,
store: 2,
status: true
},
{
id: 6,
price: 500,
store: 3,
status: true
},
{
id: 7,
price: 70,
store: 4,
status: true
},
]
Store Collection
[
{
id: 1,
name: "Store A",
status: true
},
{
id: 2,
name: "Store B",
status: true
},
{
id: 3,
name: "Store C",
status: true
},
{
id: 4,
name: "Store D",
status: false
}
]
How to find the top store from the list of orders, which should be based on the total sales in each store.
I have tried the following
db.order.aggregate([
{
"$match": {
status: true
}
},
{
"$group": {
"_id": "$store",
"totalSale": {
"$sum": "$price"
}
}
},
{
$sort: {
totoalSale: -1
}
}
])
I got the sorted list of stores from the above snippets. But I want to add store details along with total sales.
For more: https://mongoplayground.net/p/V3UH1r6YRnS
Expected Output
[
{
id: 1,
name: "Store A",
status: true,
totalSale: 719
},
{
id: 1,
name: "Store c",
status: true,
totalSale: 500
},
{
_id: 2,
id: 1,
name: "Store B",
status: true,
totalSale: 250
},
{
_id: 4,
name: "Store D",
status: true,
totalSale: 70
}
]
$lookup - store collection joins order collection and generate new field store_orders.
$set - Filter order with status: true from store_orders.
$set - totalSale field sum for store_orders.price.
$sort - Sort totalSale by descending.
$unset - Remove store_orders field.
db.store.aggregate([
{
$lookup: {
from: "order",
localField: "id",
foreignField: "store",
as: "store_orders"
}
},
{
$set: {
"store_orders": {
$filter: {
input: "$store_orders",
as: "order",
cond: {
$eq: [
"$$order.status",
true
]
}
}
}
}
},
{
$set: {
"totalSale": {
"$sum": "$store_orders.price"
}
}
},
{
$sort: {
totalSale: -1
}
},
{
$unset: "store_orders"
}
])
Sample Mongo Playground
You can start from store collection, $lookup the order collection, $sum the totalSales, then wrangle to your expected form
db.store.aggregate([
{
"$lookup": {
"from": "order",
let: {
id: "$id"
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$$id",
"$store"
]
}
}
},
{
$group: {
_id: null,
totalSale: {
$sum: "$price"
}
}
}
],
"as": "totalSale"
}
},
{
$unwind: "$totalSale"
},
{
$addFields: {
totalSale: "$totalSale.totalSale"
}
},
{
$sort: {
totalSale: -1
}
}
])
Here is the Mongo playground for youre reference.

Mongodb - Aggregation over multiple collections specific result format

I need to create an aggregation using information from multiple collections and arrange them in a specific result format. Here we have three collections (parent, task, slots; the model of these cannot be changed anymore). Each parent can have many tasks, which consist of different slots (see the simplified sample data below)
Slot collection
[
{
_id: ObjectId('6015720974cff84db3499cad'),
name: 'Test 1',
start: ISO-Date('2021-07-13T09:00:00.000+00:00'),
end: ISO-Date('2021-07-13T09:15:00.000+00:00'),
parentId: 1,
taskId: 1
},
{
_id: ObjectId('6015720974cff84db3478cad'),
name: 'Test 2',
start: ISO-Date('2021-07-13T09:15:00.000+00:00'),
end: ISO-Date('2021-07-13T09::30.000+00:00'),
parentId: 1,
taskId: 1
},
{
_id: ObjectId('6015720974cff84db3499bcd'),
name: 'Test 3',
start: ISO-Date('2021-07-13T11:00:00.000+00:00'),
end: ISO-Date('2021-07-13T11:15:00.000+00:00'),
parentId: 1,
taskId: 2
},
{
_id: ObjectId('6015720974cff84db3499efg'),
name: 'Test 4',
start: ISO-Date('2021-07-13T13:00:00.000+00:00'),
end: ISO-Date('2021-07-13T13:15:00.000+00:00'),
parentId: 2,
taskId: 3
},
{
_id: ObjectId('6015720974cff84db349967e'),
name: 'Test 5',
start: ISO-Date('2021-07-13T13:15:00.000+00:00'),
end: ISO-Date('2021-07-13T13:30:00.000+00:00'),
parentId: 2,
taskId: 3
},
]
Task collection
[
{
_id: ObjectId('6015720974cff84db3499c87'),
name: 'Task 1',
taskId: 1,
},
{
_id: ObjectId('6015720974cff84db3499b6b'),
name: 'Task 2',
taskId: 2,
},
{
_id: ObjectId('6015720974cff84db3499b19'),
name: 'Task 3',
taskId: 3,
}
]
Parent collection
[
{
_id: ObjectId('6015720974cff84db34995a6'),
name: 'Parent 1',
parentId: 1
},
{
_id: ObjectId('6015720974cff84db349962f'),
name: 'Parent 2',
parentId: 2
}
]
The result should have the following format:
[
{
parentName: 'Parent 1',
tasks: [
{
taskName: 'Task 1',
slots: [
{
name: 'Test 1',
start: ISO-Date('2021-07-13T09:00:00.000+00:00'),
end: ISO-Date('2021-07-13T09:15:00.000+00:00'),
},
{
name: 'Test 2',
start: ISO-Date('2021-07-13T09:15:00.000+00:00'),
end: ISO-Date('2021-07-13T09::30.000+00:00'),
},
]
},
{
taskName: 'Task 2',
slots: [
{
name: 'Test 3',
start: ISO-Date('2021-07-13T11:00:00.000+00:00'),
end: ISO-Date('2021-07-13T11:15:00.000+00:00'),
}
]
}
]
},
{
parentName: 'Parent 2',
tasks: [
{
taskName: 'Task 3',
slots: [
{
name: 'Test 4',
start: ISO-Date('2021-07-13T13:00:00.000+00:00'),
end: ISO-Date('2021-07-13T13:15:00.000+00:00'),
},
{
name: 'Test 5',
start: ISO-Date('2021-07-13T13:15:00.000+00:00'),
end: ISO-Date('2021-07-13T13:30:00.000+00:00'),
}
]
}
]
}
]
How can I achieve this result structure by given data? The lookup part is not a big deal, but I currently struggle to achieve this structure, as I'm very new to MongoDB aggregation framework. Thank you very much for any hints.
You can use lookup to join collections
$lookup to join collections
$unwind to deconstruct the array
$group to reconstruct the array
Here is the code,
db.Task.aggregate([
{
"$lookup": {
"from": "Slot",
"localField": "taskId",
"foreignField": "taskId",
"as": "slots"
}
},
{ "$unwind": "$slots" },
{
"$lookup": {
"from": "Parent",
"localField": "slots.parentId",
"foreignField": "parentId",
"as": "parents"
}
},
{ "$unwind": "$parents" },
{
$group: {
_id: { pId: "$parents._id", tId: "$taskId" },
parentName: { $first: "$parents.name" },
taskName: { "$first": "$name" },
slots: { $push: "$slots" }
}
},
{
$group: {
_id: "$_id.pId",
parentName: { $first: "$parentName" },
tasks: {
$push: {
taskName: "$taskName",
slots: "$slots"
}
}
}
}
])
Working Mongo playground

MongoDB replace sub doc Id, with the document

I am sure it was answered before but I can't find any proper explanation, maybe the aggregate methods I assumed are not relevant here.
I have a mongo collection containing an array which points to sub-docs
{
_id: '123',
name: 'my shop',
items: [
{
itemId: '234',
},
{
itemId: '345',
},
]
}
This is the collection of sub docs:
{
_id: '234',
name: 'apple',
amount: 13
},
{
_id: '345',
name: 'orange',
amount: 25
},
How can I replace in the upper document the id-reference to the actual document content.
Desired final result:
{
_id: '123',
name: 'my shop',
items: [
{
itemId: '234',
name: 'apple',
amount: 13
},
{
itemId: '345',
name: 'orange',
amount: 25
},
]
}
Let's say sample data for shop collection is:
{
_id: '123',
name: 'my shop',
items: [
{
itemId: '234',
},
{
itemId: '345',
},
]
}
And the sample data for item collection is:
{
_id: '234',
name: 'apple',
amount: 13
},
{
_id: '345',
name: 'orange',
amount: 25
}
Then the aggregation will be like this one:
db.shop.aggregate([{
$lookup: {
from: 'item',
localField: 'items.itemId',
foreignField: '_id',
as: 'items'
}
}])

Get a deep-nested array from a mongodb aggregate and include in result

Let's say I have these two collections
book: {
_id: 'aaa'
name: 'Book 1',
chapters: [
0: {
_id: 'chapter0',
name: 'Chapter 1',
pages: [
0: {
_id: 'page0',
name: 'Page 1',
paragraphs: [
0: {
_id: 'paragraph0',
name: 'Paragraph 1',
bookmarks: [
0: {sentence: 3, reader: 'Foo'},
1: {sentence: 8, reader: 'Bar'},
2: {sentence: 14, reader: 'John'}
]
}
]
}
]
}
]
}
book: {
_id: 'bbb'
name: 'Book 2',
chapters: [
0: {
_id: 'chapter0',
name: 'Chapter 1',
pages: [
0: {
_id: 'page0',
name: 'Page 1',
paragraphs: [
0: {
_id: 'paragraph0',
name: 'Paragraph 1',
bookmarks: []
},
1: {
_id: 'paragraph1',
name: 'Paragraph 2',
bookmarks: [
0: {sentence: 2, reader: 'George'},
1: {sentence: 1, reader: 'Paul'},
2: {sentence: 76, reader: 'John'},
3: {sentence: 54, reader: 'Ringo'}
]
}
]
}
]
}
]
}
I want to be able to extract the array bookmarks and attach them to the book collection when getting the result. Something like this would be good:
{
id: 'aaa'
name: 'Book 1'
bookmarks: [{...}, {...}, {...}] //since the first book has 3 bookmarks
},
{
id: 'bbb'
name: 'Book 2'
bookmarks: [{...}, {...}, {...}, {...}] //since the second book has 4 bookmarks
},
And if there are no bookmarks, it should look like:
{
id: 'aaa'
name: 'Book 1'
bookmarks: [{...}, {...}, {...}] //since the first book has 3 bookmarks
},
{
id: 'bbb'
name: 'Book 2'
bookmarks: [{...}, {...}, {...}, {...}] //since the second book has 4 bookmarks
},
{
id: 'ccc'
name: 'Book 3'
bookmarks: [] //third book does not have bookmarks for example
},
I've tried aggregation with this code, but it just separates each bookmark per book and pushes it into the object.
return yield Books.aggregate()
.unwind('chapters')
.unwind('chapters.pages')
.unwind('chapters.pages.paragraphs')
.unwind('chapters.pages.paragraphs.bookmarks')
.group({
_id: '$_id',
books: {
$push: {
_id: '$_id',
name: '$name',
bookmarks: '$chapters.pages.paragraphs.bookmarks'
}
}
}).exec()
Can someone point me to the right direction? Thanks!
Try below aggregate pipeline:
Books.aggregate([
{
$unwind: "$book"
},
{
$unwind: "$book.chapters"
},
{
$unwind: "$book.chapters.pages"
},
{
$unwind: "$book.chapters.pages.paragraphs"
},
{
$unwind: {
path: "$book.chapters.pages.paragraphs.bookmarks",
preserveNullAndEmptyArrays: true
}
},
{
$group: {
_id: {
_id: "$_id",
book: "$book.name"
},
bookmarks: {
$push: "$book.chapters.pages.paragraphs.bookmarks"
}
}
}
])