Optimise MongoDB aggregate query performance - mongodb

I have next DB structure:
Workspaces:
Key
Index
PK
id
id
content
Projects:
Key
Index
PK
id
id
FK
workspace
workspace_1
deleted
deleted_1
content
Items:
Key
Index
PK
id
id
FK
project
project_1
type
_type_1
deleted
deleted_1
content
I need to calculate a number of items of each type for each project in workspace, e.g. expected output:
[
{ _id: 'projectId1', itemType1Count: 100, itemType2Count: 50, itemType3Count: 200 },
{ _id: 'projectId2', itemType1Count: 40, itemType2Count: 100, itemType3Count: 300 },
....
]
After few attempts and some debugging I've created a query which provides output I needed:
const pipeline = [
{ $match: { workspace: 'workspaceId1' } },
{
$lookup: {
from: 'items',
let: { id: '$_id' },
pipeline: [
{
$match: {
$expr: {
$eq: ['$project', '$$id'],
},
},
},
// project only fields necessary for later pipelines to not overload
// memory and to not get `exceeded memory limit for $group` error
{ $project: { _id: 1, type: 1, deleted: 1 } },
],
as: 'items',
},
},
// Use $unwind here to optimize aggregation pipeline, see:
// https://stackoverflow.com/questions/45724785/aggregate-lookup-total-size-of-documents-in-matching-pipeline-exceeds-maximum-d
// Without $unwind we may get an `matching pipeline exceeds maximum document size` error.
// Error appears not in all requests and it's really strange and hard to debug.
{ $unwind: '$items' },
{ $match: { 'items.deleted': { $eq: false } } },
{
$group: {
_id: '$_id',
items: { $push: '$items' },
},
},
{
$project: {
_id: 1,
// Note: I have only 3 possible item types, so it's OK that it's names hardcoded.
itemType1Count: {
$size: {
$filter: {
input: '$items',
cond: { $eq: ['$$this.type', 'type1'] },
},
},
},
itemType2Count: {
$size: {
$filter: {
input: '$items',
cond: { $eq: ['$$this.type', 'type2'] },
},
},
},
itemType3Count: {
$size: {
$filter: {
input: '$items',
cond: { $eq: ['$$this.type', 'type3'] },
},
},
},
},
},
]
const counts = await Project.aggregate(pipeline)
Query works like expected, but very slow... If I have some about 1000 items in one workspace it takes about 8 seconds to complete. Any ideas how to make it faster are appreciated.
Thanks.

Assuming your indexs are properly indexed that they contain the "correct" fields, we can still have some tweaks on the query itself.
Approach 1: keeping existing collection schema
db.projects.aggregate([
{
$match: {
workspace: "workspaceId1"
}
},
{
$lookup: {
from: "items",
let: {id: "$_id"},
pipeline: [
{
$match: {
$expr: {
$and: [
{$eq: ["$project","$$id"]},
{$eq: ["$deleted",false]}
]
}
}
},
// project only fields necessary for later pipelines to not overload
// memory and to not get `exceeded memory limit for $group` error
{
$project: {
_id: 1,
type: 1,
deleted: 1
}
}
],
as: "items"
}
},
// Use $unwind here to optimize aggregation pipeline, see:
// https://stackoverflow.com/questions/45724785/aggregate-lookup-total-size-of-documents-in-matching-pipeline-exceeds-maximum-d
// Without $unwind we may get an `matching pipeline exceeds maximum document size` error.
// Error appears not in all requests and it's really strange and hard to debug.
{
$unwind: "$items"
},
{
$group: {
_id: "$_id",
itemType1Count: {
$sum: {
"$cond": {
"if": {$eq: ["$items.type","type1"]},
"then": 1,
"else": 0
}
}
},
itemType2Count: {
$sum: {
"$cond": {
"if": {$eq: ["$items.type","type2"]},
"then": 1,
"else": 0
}
}
},
itemType3Count: {
$sum: {
"$cond": {
"if": {$eq: ["$items.type","type1"]},
"then": 1,
"else": 0
}
}
}
}
}
])
There are 2 major changes:
moving the items.deleted : false condition into the $lookup subpipeline to lookup less items documents
skipped items: { $push: '$items' }. Instead, do a conditional sum in later $group stage
Here is the Mongo playground for your reference. (at least for the correctness of the new query)
Approach 2: If the collection schema can be modified. We can denormalize projects.workspace into the items collection like this:
{
"_id": "i1",
"project": "p1",
"workspace": "workspaceId1",
"type": "type1",
"deleted": false
}
In this way, you can skip the $lookup. A simple $match and $group will suffice.
db.items.aggregate([
{
$match: {
"deleted": false,
"workspace": "workspaceId1"
}
},
{
$group: {
_id: "$project",
itemType1Count: {
$sum: {
"$cond": {
"if": {$eq: ["$type","type1"]},
"then": 1,
"else": 0
}
}
},
...
Here is the Mongo playground with denormalized schema for your reference.

Related

Mongo DB Join on Primary/Foreign Key

I have two collections, viz: clib and mp.
The schema for clib is : {name: String, type: Number} and that for mp is: {clibId: String}.
Sample Document for clib:
{_id: ObjectId("6178008397be0747443a2a92"), name: "c1", type: 1}
{_id: ObjectId("6178008397be0747443a2a91"), name: "c2", type: 0}
Sample Document for mp:
{clibId: "6178008397be0747443a2a92"}
{clibId:"6178008397be0747443a2a91"}
While Querying mp, I want those clibId's that have type = 0 in clib collection.
Any ideas how this can be achieved?
One approach that I can think of was to use $lookUp, but that doesnt seem to be working. Also, I m not sure if this is anti-pattern for mongodb, another approach is to copy the type from clib to mp while saving mp document.
If I've understood correctly you can use a pipeline like this:
This query get the values from clib where its _id is the same as clibId and also has type = 0. Also I've added a $match stage to not output values where there is not any coincidence.
db.mp.aggregate([
{
"$lookup": {
"from": "clib",
"let": {
"id": "$clibId"
},
"pipeline": [
{
"$match": {
"$expr": {
"$and": [
{
"$eq": [
{
"$toObjectId": "$$id"
},
"$_id"
]
},
{
"$eq": [
"$type",
0
]
}
]
}
}
}
],
"as": "result"
}
},
{
"$match": {
"result": {
"$ne": []
}
}
}
])
Example here
db.mp.aggregate([
{
$lookup: {
from: "clib",
let: {
clibId: "$clibId"
},
pipeline: [
{
$match: {
$expr: {
$and: [
{
$eq: [ "$_id", "$$clibId" ],
}
]
}
}
},
{
$project: { type: 1, _id: 0 }
}
],
as: "clib"
}
},
{
"$unwind": "$clib"
},
{
"$match": {
"clib.type": 0
}
}
])
Test Here

$match in lookup stage without removing local documents in mongoDB

Right now I have a lookup stage that matches a collection of answers with their respective questions. The problem I'm having is that on the $match if the question does not having a matching answer document in the lookup collection it is removed from the results of the aggregation. How can I avoid this?
{
$lookup: {
from: 'groupansphotos',
let: { question_id: '$questions._id' },
pipeline: [
{
$match: {
$expr: { $eq: ['$_id', '$$question_id'] },
},
},
{ $unwind: '$answers' },
{ $match: { 'answers.reported': { $lt: 1 } } },
{ $sort: { 'answers.helpful': -1 } },
{ $limit: 2 },
{
$project: {
_id: 0,
k: { $toString: '$answers._id' }, // k
v: '$$ROOT.answers', // v
},
},
],
as: 'answers',
},
},

Referencing root _id in aggregate lookup match expression not working

This is my first experience using aggregate pipeline. I'm not able to get a "$match" expression to work inside the pipeline. If I remove the "_id" match, I get every document in the collection past the start date, but once I add the $eq expression, it returns empty.
I read a lot of other examples and tried many different ways, and this seems like it is correct. But the result is empty.
Any suggestions?
let now = new Date()
let doc = await Team.aggregate([
{ $match: { created_by: mongoose.Types.ObjectId(req.params.user_oid)} },
{ $sort: { create_date: 1 } },
{ $lookup: {
from: 'events',
let: { "team_oid": "$team_oid" },
pipeline: [
{ $addFields: { "team_oid" : { "$toObjectId": "$team_oid" }}},
{ $match: {
$expr: {
$and: [
{ $gt: [ "$start", now ] },
{ $eq: [ "$_id", "$$team_oid" ] }
]
},
}
},
{
$sort: { start: 1 }
},
{
$limit: 1
}
],
as: 'events',
}},
{
$group: {
_id: "$_id",
team_name: { $first: "$team_name" },
status: { $first: "$status" },
invited: { $first: "$invited" },
uninvited: { $first: "$uninvited" },
events: { $first: "$events.action" },
dates: { $first: "$events.start" } ,
team_oid: { $first: "$events.team_oid" }
}
}])
Example Docs (added by request)
Events:
_id:ObjectId("60350837c57b3a15a414d265")
invitees:null
accepted:null
sequence:7
team_oid:ObjectId("60350837c57b3a15a414d263")
type:"Calendar Invite"
action:"Huddle"
status:"Questions Issued"
title:"Huddle"
body:"This is a Huddle; you should receive new questions 5 days befor..."
creator_oid:ObjectId("5ff9e50a206b1924dccd691e")
start:2021-02-26T07:00:59.999+00:00
end:2021-02-26T07:30:59.999+00:00
__v:0
Team:
_id:ObjectId("60350837c57b3a15a414d263")
weekly_schedule:1
status:"Live"
huddle_number:2
reminders:2
active:true
created_by:ObjectId("5ff9e50a206b1924dccd691e")
team_name:"tESTI"
create_date:2021-02-23T13:50:47.172+00:00
__v:0
This is just a guess since you don't have schema in your question. But it looks like your have some of your _ids mixed up. Where you are currently trying to $match events whose _id is equal to a team_oid. Rather than the event's team_oid field being equal to the current 'team' _id.
I'm pretty confident this will produce the correct output. If you post any schema or sample docs I will edit it.
https://mongoplayground.net/p/5i1w2Ii7KCR
let now = new Date()
let doc = await Team.aggregate([
{ $match: { created_by: mongoose.Types.ObjectId(req.params.user_oid)} },
{ $sort: { create_date: 1 } },
{ $lookup: {
from: 'events',
// Set tea_oid as the current team _id
let: { "team_oid": "$_id" },
pipeline: [
{ $match: {
$expr: {
$and: [
{ $gt: [ "$start", now ] },
// Match events whose 'team_oid' field matches the 'team' _id set above
{ $eq: [ "$team_oid", "$$team_oid" ] }
]
},
}
},
{
$sort: { start: 1 }
},
{
$limit: 1
}
],
as: 'events',
}},
{
$group: {
_id: "$_id",
team_name: { $first: "$team_name" },
status: { $first: "$status" },
invited: { $first: "$invited" },
uninvited: { $first: "$uninvited" },
events: { $first: "$events.action" },
dates: { $first: "$events.start" } ,
team_oid: { $first: "$events.team_oid" }
}
}])

mongodb aggregate apply a function to a field

As part of an aggregate I need to run this transformation:
let inheritances = await db.collection('inheritance').aggregate([
{ $match: { status: 1 }}, // inheritance active
{ $project: { "_id":1, "name": 1, "time_trigger": 1, "signers": 1, "tree": 1, "creatorId": 1, "redeem": 1, "p2sh": 1 } },
{ $lookup:
{
from: "user",
let: { creatorId: { $concat: [ "secretkey", { $toString: "$creatorId" } ] }, time_trigger: "$time_trigger"},
pipeline: [
{ $match:
{ $expr:
{ $and:
[
{ $eq: [ "$_id", sha256( { $toString: "$$creatorId" } ) ] },
{ $gt: [ new Date(), { $add: [ { $multiply: [ "$$time_trigger", 24*60*60*1000 ] }, "$last_access" ] } ] },
]
}
}
},
],
as: "user"
},
},
{ $unwind: "$user" }
]).toArray()
creatorId comes from a lookup, and in order to compare it to _id I first need to do a sha256.
How can I do it?
Thanks.
External functions will not work with the aggregation framework. Everything is parsed to BSON by default. It is all basically processed from BSON operators to native C++ code implementation, This is by design for performance.
Basically in short, you can't do this. I recommend just storing the hashed value on every document as a new field, otherwise you'll have to do it in code just before the pipeline.

$match in $lookup result

I have next mongo code:
db.users.aggregate([
{
$match: {
$and: [
{ UserName: { $eq: 'administrator' } },
{ 'Company.CompanyName': { $eq: 'test' } }
]
}
},
{
$lookup: {
from: "companies",
localField: "CompanyID",
foreignField: "CompanyID",
as: "Company"
}
},
])
The $lookup part of the code working great. I got next result:
But if I add $match to the code, it brings nothing.
I found that the problem is in the second match: { 'Company.CompanyName': { $eq: 'test' } }, but I can not realize what is wrong with it.
Any ideas?
UPDATE:
I had also tried $unwind on the $lookup result, but no luck:
db.users.aggregate([
{
$match: {
$and: [
{ UserName: { $eq: 'administrator' } },
{ 'Company.CompanyName': { $eq: 'edt5' } }
]
}
},
{ unwind: '$Company' },
{
$lookup: {
from: 'companies',
localField: 'CompanyID',
foreignField: 'CompanyID',
as: 'Company'
}
},
])
With MongoDB 3.4, you can run an aggregation pipeline that uses the $addFields pipeline and a $filter operator to only return the Company array with elements that match the given condition. You can then wrap the $filter expression with the $arrayElemAt operator to return a single document which in essence incorporates the $unwind functionality by flattening the array.
Follow this example to understand the above concept:
db.users.aggregate([
{ "$match": { "UserName": "administrator" } },
{
"$lookup": {
"from": 'companies',
"localField": 'CompanyID',
"foreignField": 'CompanyID',
"as": 'Company'
}
},
{
"$addFields": {
"Company": {
"$arrayElemAt": [
{
"$filter": {
"input": "$Company",
"as": "comp",
"cond": {
"$eq": [ "$$comp.CompanyName", "edt5" ]
}
}
}, 0
]
}
}
}
])
Below answer is for mongoDB 3.6 or later.
Given that:
You have a collection users with a field CompanyID and a collection of companies with a field CompanyID
you want to lookup Companies on Users by matching CompanyID, where additionally:
each User must match condition: User.UserName equals administrator
each Company on User must match condition: CompanyName equals edt5
The following query will work for you:
db.users.aggregate([
{ $match: { UserName: 'administrator' } },
{
$lookup: {
from: 'companies',
as: 'Company',
let: { CompanyID: '$CompanyID' },
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: ['$CompanyID', '$$CompanyID'] },
{ $eq: ['$CompanyName', 'edt5'] },
]
}
}
}
]
}
},
])
Explanation:
This is the way to perform left join queries with conditions more complex than simple foreign / local field equality match.
Instead of using localField and foreignField, you use:
let option where you can map local fields to variables,
pipeline option where you can specify aggregation Array.
In pipeline you can use $match filter, with $expr, where you can reuse variables defined earlier in let.
More info on $lookup
Nice tutorial
here is code for fitering array inside lookup.
const userId = req.userData.userId;
const limit = parseInt(req.params.limit);
const page = parseInt(req.params.page);
Collection.aggregate([
{ $match: {} },
{ $sort: { count: -1 } },
{ $skip: limit * page },
{ $limit: limit },
{
$lookup: {
from: Preference.collection.name,
let: { keywordId: "$_id" },
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: ["$keyword", "$$keywordId"] },
{
$eq: ["$user", mongoose.Types.ObjectId(userId)],
},
],
},
},
},
],
as: "keywordData",
},
},
{
$project: {
_id: 0,
id: "$_id",
count: 1,
for: 1,
against: 1,
created_at: 1,
updated_at: 1,
keyword: 1,
selected: {
$cond: {
if: {
$eq: [{ $size: "$keywordData" }, 0],
},
then: false,
else: true,
},
},
},
}])