Aggregate function on mongo running very slow when running query - mongodb

Tried running a query on the aggregate function on Mongo which is currently taking 16 seconds when the result i wished for was under a second
{
"$lookup": {
"from": "session_attendances",
"let": { "id": "$_id" },
"pipeline": [
{
"$match": {
"$expr": {
"$eq": ["$student", "$$id"]
}
}
},
{
"$project": {
"attendance_code": "$attendance_code"
}
}
],
"as": "attendance"
}
},
{
// keep only matched students, can skip this and modifiy the next phase incase no such documents exists.
"$unwind": "$attendance"
},
{
"$lookup": {
"from": "attendance_codes",
"let": { "attendance_code": "$attendance.attendance_code" },
"pipeline": [
{
"$project": {
"type": 1
}
},
{
"$match": {
"$expr": {
"$eq": ["$_id", "$$attendance_code"]
}
}
}
],
"as": "attendance_code"
}
},
{
//again assuming we want to keep matched docs otherwise why lookup?
"$unwind": "$attendance_code"
},
{
"$group": {
"_id": { "a": "$attendance.attendance_code", "id": "$_id" },
"total": { "$sum": 1 },
"data": { "$first": "$$ROOT" } // if u want to keep document data
}
}
Hoping that some one can give me an answer to which part of my code is making the run time so slow.

Its not clear what your end goal is, if you wish to clarify that it would help me give an alternative to your current aggregation
With that said the second lookup stage is "useless" as you group right after without using any of the data gained by it, removing it will still get you the exact same result and save some time.
Assuming the second lookup is needed for some reason i recommend not nesting it but rather use after the first one, like so:
{
$lookup: {
from: 'session_attendances',
let: { 'id': '$_id' },
pipeline: [
{
"$match": {
"$expr": {
"$eq": ["$student", "$$id"]
}
}
}
,{
$project: {
attendance_code: '$attendance_code'
}
}
],
as: 'attendance'
}
},
{// keep only matched students, can skip this and modifiy the next phase incase no such documents exists.
$unwind: "$attendance"
},
{
$lookup: {
from: 'attendance_codes',
let: { 'attendance_code': '$attendance.attendance_code' },
pipeline: [
{
$project: {
type: 1
}
},
{
"$match": {
"$expr": {
"$eq": ["$_id", "$$attendance_code"]
}
}],
as: 'attendance_code'
}
}
},
{ //again assuming we want to keep matched docs otherwise why lookup?
$unwind: "$attendance_code"
},
{
$group: {
_id: {a: "$attendance.attendance_code", id: "$_id"}
total: { $sum: 1 },
data: {$first: "$$ROOT"} // if u want to keep document data
}
}
This should give you better performance, i also recommend dropping the projects stages, unless the documents are very large this usually does not end up helping performance but actually hurting it.

Related

Mongodb lookup for not equal fields

I want to join two collections and find the documents where has one equal field and one unequal field!
This is what I was tried, But not work
db.collectionOne.aggregate[
{
"$match": {
"$and": [
{ "$text": { "$search": "this is my query" } },
{ "b": { "$eq": "60e849054d2f0d409041b6a2" } }
]
}
},
{ "$addFields": { "pID": { "$toString": "$_id" }, "score": { "$meta": "textScore" } } },
{
"$lookup": {
"from": "collectionsTwo",
"as": "collectionsTwoName",
"pipeline": [{
"$match": {
"$expr": {
"$and": [{
"$ne": ["$fieldOne", "60dd0f98d10f072e2a225502"] // This one is unqual field
}, { "$eq": ["$pID", "$fieldTwo"] }] // This one is equal field
}
}
}]
}
},
{ "$sort": { "score": -1 } },
{ "$limit": 1 }
])
Fields in the source document, i.e. $pID are not available inside the lookup pipeline.
In order to reference those values, you would need to define a variable using let, such as:
{
"$lookup": {
"from": "collectionsTwo",
"as": "collectionsTwoName",
"let": { "srcpID":"$pID" },
"pipeline": [{
"$match": {
"$expr": {
"$and": [{
"$ne": ["$fieldOne", "60dd0f98d10f072e2a225502"] // This one is unqual field
}, { "$eq": ["$$srcpID", "$fieldTwo"] }] // This one is equal field
}
}
}]
}
},
See https://docs.mongodb.com/manual/reference/operator/aggregation/lookup/#join-conditions-and-uncorrelated-sub-queries

How to remove one document at the project aggregation

I am using mongodb atlas, I have buid one value. these is the users collection sample document details.
{"_id":{"$oid":"5f7de1f7e0044c8262f6adbc"},"bio":"Hi, I am a professional software developer, I have strong knowledge on coding.","email":"ats4#gmail.com","displayName":"ats demo","following":[],"name":"ats4demo6880904256665157633","image":" ","status":"active","isPrivate":false}
I tried to display all document except one.
"{\"collection\":\"users\",\"stages\":[{\"$match\":{\"$and\":[{\"_id\":{\"$oid\":\"%%args.user\"}},{\"following\":{\"$size\":0}}]}},{\"$lookup\":{\"from\":\"users\",\"localField\":\"status\",\"foreignField\":\"status\",\"as\":\"test\"}},{\"$unwind\":\"$test\"},{\"$project\":{\"test._id\":1,\"test.name\":1,\"test.displayName\":1,\"test.image\":1}}]}"
I have displayed all valuee except test._id: ObjectId("5f7f193585d5f70c177f6d27") at the project aggregate. but I can't.
can you please help me out with the solution. thanks in advance.
Took me a while to understand the question. I think this is what you are looking for.
db.collection.aggregate([
{
"$match": {
"$and": [
{
"_id": {
"$oid": "5f7de1f7e0044c8262f6adbc"
}
},
{
"following": {
"$size": 0
}
}
]
}
},
{
"$lookup": {
"from": "collection",
"localField": "status",
"foreignField": "status",
"as": "test"
}
},
{
"$unwind": "$test"
},
{
"$match": {
"test._id": {
$not: {
$eq: ObjectId("5f7de1f7e0044c8262f6adcc")
}
}
}
},
{
"$project": {
"test._id": 1,
"test.name": 1,
"test.displayName": 1,
"test.image": 1
}
}
])
Play around with the query here

GROUP BY tags in tags array inside foreign collection

I am looking for away to group collection1 by tags that reside on collection2
the two collections needs to be joined (lookup) by 2 fields (field1, field2)
So far I came up with the following query:
db.collection1.aggregate([
{
"$lookup": {
"from": "collection2",
"let": { _field1: '$field1', _field2: '$field2' },
"pipeline": [{
"$match": {
"$expr": {
"$and": [
{ "$eq": ["$field1", "$$_field1"] },
{ "$eq": ["$field2", "$$_field2"] }
]
}
}
},
{ "$project": { _id: 0, tags: 1 } },
],
"as": "col2"
}
},
{ "$unwind": "$col2" },
{ $group: { _id: "$col2.tags", count: { $sum: 1 } } }
]);
I got no result at all.
field1 and field2 are together unique in collection2 (having unique index)
Your syntax is correct apart from the name of your variables in:
{ _field1: '$field1', _field2: '$field2' },
When you define such variables, they are called user variables and mongo has certain naming limitations on them that are different from "real" variables convention.
from the docs:
User variable names must begin with a lowercase ascii letter [a-z] or a non-ascii character.
Meaning in your case the underscore is causing an error.
ok i have managed to solve it myself.
i have added a unique index on collection2 (filed1,field2)
added extra unwind to flat the tags array
my last query is as foolows:
db.collection1.aggregate([
{
"$lookup": {
"from": "collection2",
"let": { field1: '$field1', field2: '$field2' },
"pipeline": [{
"$match": {
"$expr": {
"$and": [
{ "$eq": ["$field1", "$$field1"] },
{ "$eq": ["$field2", "$$field2"] }
]
}
}
},
{ "$project": { _id: 0, tags: 1 } },
],
"as": "col2"
}
},
{ "$unwind": "$col2" },
{ "$unwind": "$col2.tags" },
{ $group: { _id: "$col2.tags", count: { $sum: 1 } } }
{ $sort: { count: -1 } },
]);

Merge the original array of objects into the "as" field after a $lookup

I have a hero collection where each hero document looks like the following:
{
_id:'the-name-of-the-hero',
name: 'Name of Hero',
(...), //other properties to this hero
relations: [
{
hero: 'the-id-of-another-hero',
relationType: 'trust'
},
{
hero: 'yet-another-id-of-another-hero',
relationType: 'hate'
}
]
}
The relations.hero points to an _id of another hero. I needed to grab some more information of the related heroes, therefore I used aggregate $lookup to match each against the "hero" collection, to grab it's name (and other data, but project simplified for the question). Here the currently working query, docummented:
let aggregate = db.collection('hero').aggregate([
// grabbing an specific hero
{ $match: { _id } },
//populate relations
{
$lookup: {
from: 'hero',
let: { letId: '$relations.hero' }, //create a local variable for the pipeline to use
// localField: "relations.hero", //this would bring entire hero data, which is unnecessary
// foreignField: "_id", //this would bring entire hero data, which is unnecessary
pipeline: [
//match each $relations.hero (as "$$letId") in collection hero's (as "from") $_id
{ $match: { $expr: { $in: ['$_id', '$$letId'] } } },
//grab only the _id and name of the matched heroes
{ $project: { name: 1, _id: 1 } },
//sort by name
{ $sort:{ name: 1 } }
],
//replace the current relations with the new relations
as: 'relations',
},
}
]).toArray(someCallbackHere);
In short, $lookup on hero collection using a pipeline that match each of relations.hero and bring back only the _id and name (which has the real name to be printed on UI) and replace current relations with this new relations, generating the document as:
{
_id:'the-name-of-the-hero',
name: 'Name of Hero',
(...), //other properties to this hero
relations: [
{
_id: 'the-id-of-another-hero',
name: 'The Real Name of Another Hero',
},
{
_id: 'yet-another-id-of-another-hero',
name: 'Yet Another Real Name of Another Hero',
}
]
}
The question:
What can I add on the pipeline to make it merge the matched heroes with the original relations, in order to not only have the projected _id and name, but also the original relationType? That is, have the following result:
{
_id:'the-name-of-the-hero',
name: 'Name of Hero',
(...), //other properties to this hero
relations: [
{
_id: 'the-id-of-another-hero',
name: 'The Real Name of Another Hero',
relationType: 'trust' //<= kept from the original relations
},
{
_id: 'yet-another-id-of-another-hero',
name: 'Yet Another Real Name of Another Hero',
relationType: 'hate' //<= kept from the original relations
}
]
}
I tried exporting as: 'relationsFull' and then tried to $push with $mergeObjects as part of a next step into the aggregation but no luck. I tried to do the same as a pipeline step (instead of a new aggregate step) but always end up relations as empty array..
How would I write a new aggregation step to merge old relations objects with the new looked-up relations?
Note: Consider MongoDB 3.6 or later (that is, $unwind array is not needed, at least for the $lookup). I'm querying using Node.js driver, if that info matters.
You can use below aggregation
db.collection("hero").aggregate([
{ "$match": { _id } },
{ "$unwind": "$relations" },
{ "$lookup": {
"from": "hero",
"let": { "letId": "$relations.hero" },
"pipeline": [
{ "$match": { "$expr": { "$eq": ["$_id", "$$letId"] } } },
{ "$project": { "name": 1 } }
],
"as": "relation"
}},
{ "$unwind": "$relation" },
{ "$addFields": { "relations.name": "$relation.name" }},
{ "$group": {
"_id": "$_id",
"relations": { "$push": "$relations" },
"name": { "$first": "$name" },
"rarity": { "$first": "$rarity" },
"classType": { "$first": "$classType" }
}}
])
Or alternate you can use this as well
db.collection("hero").aggregate([
{ "$match": { _id } },
{ "$lookup": {
"from": "hero",
"let": { "letId": "$relations.hero" },
"pipeline": [
{ "$match": { "$expr": { "$in": ["$_id", "$$letId"] } } },
{ "$project": { "name": 1 } }
],
"as": "lookupRelations"
}},
{ "$addFields": {
"relations": {
"$map": {
"input": "$relations",
"as": "rel",
"in": {
"$mergeObjects": [
"$$rel",
{ "name": { "$arrayElemAt": ["$lookupRelations.name", { "$indexOfArray": ["$lookupRelations._id", "$$rel._id"] }] }}
]
}
}
}
}}
])
Well, I think we should use different name for the as field.From there, we can use the following expression the the $addFields stage.
{
"$addFields": {
"relations": {
"$reduce": {
"input": {
"$reduce": {
"input": {
"$zip": {
"inputs": [
"$relations",
"$relheros"
]
}
},
"initialValue": [
],
"in": {
"$concatArrays": [
"$$value",
"$$this"
]
}
}
},
"initialValue": {
},
"in": {
"$mergeObjects": [
"$$value",
"$$this"
]
}
}
}
}
}
Note that the relheros here is the as field.
We really should not $unwind and $group here, before $unwind is cheap but $group is expensive.

How to find match in documents in Mongo and Mongo aggregation?

I have following json structure in mongo collection-
{
"students":[
{
"name":"ABC",
"fee":1233
},
{
"name":"PQR",
"fee":345
}
],
"studentDept":[
{
"name":"ABC",
"dept":"A"
},
{
"name":"XYZ",
"dept":"X"
}
]
},
{
"students":[
{
"name":"XYZ",
"fee":133
},
{
"name":"LMN",
"fee":56
}
],
"studentDept":[
{
"name":"XYZ",
"dept":"X"
},
{
"name":"LMN",
"dept":"Y"
},
{
"name":"ABC",
"dept":"P"
}
]
}
Now I want to calculate following output.
if students.name = studentDept.name
so my result should be as below
{
"name":"ABC",
"fee":1233,
"dept":"A",
},
{
"name":"XYZ",
"fee":133,
"dept":"X"
}
{
"name":"LMN",
"fee":56,
"dept":"Y"
}
Do I need to use mongo aggregation or is it possible to get above given output without using aggregation???
What you are really asking here is how to make MongoDB return something that is actually quite different from the form in which you store it in your collection. The standard query operations do allow a "limitted" form of "projection", but even as the title on the page shared in that link suggests, this is really only about "limiting" the fields to display in results based on what is present in your document already.
So any form of "alteration" requires some form of aggregation, which with both the aggregate and mapReduce operations allow to "re-shape" the document results into a form that is different from the input. Perhaps also the main thing people miss with the aggregation framework in particular, is that it is not just all about "aggregating", and in fact the "re-shaping" concept is core to it's implementation.
So in order to get results how you want, you can take an approach like this, which should be suitable for most cases:
db.collection.aggregate([
{ "$unwind": "$students" },
{ "$unwind": "$studentDept" },
{ "$group": {
"_id": "$students.name",
"tfee": { "$first": "$students.fee" },
"tdept": {
"$min": {
"$cond": [
{ "$eq": [
"$students.name",
"$studentDept.name"
]},
"$studentDept.dept",
false
]
}
}
}},
{ "$match": { "tdept": { "$ne": false } } },
{ "$sort": { "_id": 1 } },
{ "$project": {
"_id": 0,
"name": "$_id",
"fee": "$tfee",
"dept": "$tdept"
}}
])
Or alternately just "filter out" the cases where the two "name" fields do not match and then just project the content with the fields you want, if crossing content between documents is not important to you:
db.collection.aggregate([
{ "$unwind": "$students" },
{ "$unwind": "$studentDept" },
{ "$project": {
"_id": 0,
"name": "$students.name",
"fee": "$students.fee",
"dept": "$studentDept.dept",
"same": { "$eq": [ "$students.name", "$studentDept.name" ] }
}},
{ "$match": { "same": true } },
{ "$project": {
"name": 1,
"fee": 1,
"dept": 1
}}
])
From MongoDB 2.6 and upwards you can even do the same thing "inline" to the document between the two arrays. You still want to reshape that array content in your final output though, but possible done a little faster:
db.collection.aggregate([
// Compares entries in each array within the document
{ "$project": {
"students": {
"$map": {
"input": "$students",
"as": "stu",
"in": {
"$setDifference": [
{ "$map": {
"input": "$studentDept",
"as": "dept",
"in": {
"$cond": [
{ "$eq": [ "$$stu.name", "$$dept.name" ] },
{
"name": "$$stu.name",
"fee": "$$stu.fee",
"dept": "$$dept.dept"
},
false
]
}
}},
[false]
]
}
}
}
}},
// Students is now an array of arrays. So unwind it twice
{ "$unwind": "$students" },
{ "$unwind": "$students" },
// Rename the fields and exclude
{ "$project": {
"_id": 0,
"name": "$students.name",
"fee": "$students.fee",
"dept": "$students.dept"
}},
])
So where you want to essentially "alter" the structure of the output then you need to use one of the aggregation tools to do. And you can, even if you are not really aggregating anything.