Lookup and group from two fields in one aggregation - mongodb

I have an aggregation that looks like this:
userSchema.statics.getCounts = function (req, type) {
return this.aggregate([
{ $match: { organization: req.user.organization._id } },
{
$lookup: {
from: 'tickets', localField: `${type}Tickets`, foreignField: '_id', as: `${type}_tickets`,
},
},
{ $unwind: `$${type}_tickets` },
{ $match: { [`${type}_tickets.createdAt`]: { $gte: new Date(moment().subtract(4, 'd').startOf('day').utc()), $lt: new Date(moment().endOf('day').utc()) } } },
{
$group: {
_id: {
groupDate: {
$dateFromParts: {
year: { $year: `$${type}_tickets.createdAt` },
month: { $month: `$${type}_tickets.createdAt` },
day: { $dayOfMonth: `$${type}_tickets.createdAt` },
},
},
userId: `$${type}_tickets.assignee_id`,
},
ticketCount: {
$sum: 1,
},
},
},
{
$sort: { '_id.groupDate': -1 },
},
{ $group: { _id: '$_id.userId', data: { $push: { groupDate: '$_id.groupDate', ticketCount: '$ticketCount' } } } },
]);
};
Which outputs data like this:
[
{
_id: 5aeb6b71709f43359e0888bb,
data: [
{ "groupDate": 2018-05-07T00:00:000Z", ticketCount: 4 }
}
]
Ideally though, I would have data like this:
[
{
_id: 5aeb6b71709f43359e0888bb,
data: [
{ "groupDate": 2018-05-07T00:00:000Z", assignedCount: 4, resolvedCount: 8 }
}
]
The difference being that the object for the user would output both the total number of assigned tickets and the total number of resolved tickets for each date.
My userSchema is like this:
const userSchema = new Schema({
firstName: String,
lastName: String,
assignedTickets: [
{
type: mongoose.Schema.ObjectId,
ref: 'Ticket',
index: true,
},
],
resolvedTickets: [
{
type: mongoose.Schema.ObjectId,
ref: 'Ticket',
index: true,
},
],
}, {
timestamps: true,
});
An example user doc is like this:
{
"_id": "5aeb6b71709f43359e0888bb",
"assignedTickets": ["5aeb6ba7709f43359e0888bd", "5aeb6bf3709f43359e0888c2", "5aec7e0adcdd76b57af9e889"],
"resolvedTickets": ["5aeb6bc2709f43359e0888be", "5aeb6bc2709f43359e0888bf"],
"firstName": "Name",
"lastName": "Surname",
}
An example ticket doc is like this:
{
"_id": "5aeb6ba7709f43359e0888bd",
"ticket_id": 120292,
"type": "assigned",
"status": "Pending",
"assignee_email": "email#gmail.com",
"assignee_id": "5aeb6b71709f43359e0888bb",
"createdAt": "2018-05-02T20:05:59.147Z",
"updatedAt": "2018-05-03T20:05:59.147Z",
}
I've tried adding multiple lookups and group stages, but I keep getting an empty array. If I only do one lookup and one group, I get the correct counts for the searched on field, but I'd like to have both fields in one query. Is it possible to have the query group on two lookups?

In short you seem to be coming to terms with setting up your models in mongoose and have gone overboard with references. In reality you really should not keep the arrays within the "User" documents. This is actually an "anti-pattern" which was just something mongoose used initially as a convention for keeping "references" for population where it did not understand how to translate the references from being kept in the "child" to the "parent" instead.
You actually have that data in each "Ticket" and the natural form of $lookup is to use that "foreignField" in reference to the detail from the local collection. In this case the "assignee_id" on the tickets will suffice for looking at matching back to the "_id" of the "User". Though you don't state it, your "status" should be an indicator of whether the data is actually either "assigned" as when in "Pending" state or "resolved" when it is not.
For the sake of simplicity we are going to consider the state "resolved" if it is anything other than "Pending" in value, but extending on the logic from the example for actual needs is not the problem here.
Basically then we resolve to a single $lookup operation by actually using the natural "foreign key" as opposed to keeping separate arrays.
MongoDB 3.6 and greater
Ideally you would use features from MongoDB 3.6 with sub-pipeline processing here:
// Better date calculations
const oneDay = (1000 * 60 * 60 * 24);
var now = Date.now(),
end = new Date((now - (now % oneDay)) + oneDay),
start = new Date(end.valueOf() - (4 * oneDay));
User.aggregate([
{ "$match": { "organization": req.user.organization._id } },
{ "$lookup": {
"from": Ticket.collection.name,
"let": { "id": "$_id" },
"pipeline": [
{ "$match": {
"createdAt": { "$gte": start, "$lt": end },
"$expr": {
"$eq": [ "$$id", "$assignee_id" ]
}
}},
{ "$group": {
"_id": {
"status": "$status",
"date": {
"$dateFromParts": {
"year": { "$year": "$createdAt" },
"month": { "$month": "$createdAt" },
"day": { "$dayOfMonth": "$createdAt" }
}
}
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.date",
"data": {
"$push": {
"k": {
"$cond": [
{ "$eq": ["$_id.status", "Pending"] },
"assignedCount",
"resolvedCount"
]
},
"v": "$count"
}
}
}},
{ "$sort": { "_id": -1 } },
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "groupDate": "$_id", "assignedCount": 0, "resolvedCount": 0 },
{ "$arrayToObject": "$data" }
]
}
}}
],
"as": "data"
}},
{ "$project": { "data": 1 } }
])
From MongoDB 3.0 and upwards
Or where you lack those features we use a different pipeline process and a little data transformation after the results are returned from the server:
User.aggregate([
{ "$match": { "organization": req.user.organization._id } },
{ "$lookup": {
"from": Ticket.collection.name,
"localField": "_id",
"foreignField": "assignee_id",
"as": "data"
}},
{ "$unwind": "$data" },
{ "$match": {
"data.createdAt": { "$gte": start, "$lt": end }
}},
{ "$group": {
"_id": {
"userId": "$_id",
"date": {
"$add": [
{ "$subtract": [
{ "$subtract": [ "$data.createdAt", new Date(0) ] },
{ "$mod": [
{ "$subtract": [ "$data.createdAt", new Date(0) ] },
oneDay
]}
]},
new Date(0)
]
},
"status": "$data.status"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": {
"userId": "$_id.userId",
"date": "$_id.date"
},
"data": {
"$push": {
"k": {
"$cond": [
{ "$eq": [ "$_id.status", "Pending" ] },
"assignedCount",
"resolvedCount"
]
},
"v": "$count"
}
}
}},
{ "$sort": { "_id.userId": 1, "_id.date": -1 } },
{ "$group": {
"_id": "$_id.userId",
"data": {
"$push": {
"groupDate": "$_id.date",
"data": "$data"
}
}
}}
])
.then( results =>
results.map( ({ data, ...d }) =>
({
...d,
data: data.map(di =>
({
groupDate: di.groupDate,
assignedCount: 0,
resolvedCount: 0,
...di.data.reduce((acc,curr) => ({ ...acc, [curr.k]: curr.v }),{})
})
)
})
)
)
Which just really goes to show that even with the fancy features in modern releases, you really don't need them because there pretty much has always been ways to work around this. Even the JavaScript parts just had slightly longer winded versions before the current "object spread" syntax was available.
So that is really the direction you need to go in. What you certainly don't want is using "multiple" $lookup stages or even applying $filter conditions on what could potentially be large arrays. Also both forms here do their best to "filter down" the number of items "joined" from the foreign collection so as not to cause a breach of the BSON limit.
Particularly the "pre 3.6" version actually has a trick where $lookup + $unwind + $match occur in succession which you can see in the explain output. All stages actually combine into "one" stage there which solely returns only the items which match the conditions in the $match from the foreign collection. Keeping things "unwound" until we reduce further avoids BSON limit problems, as does the new form with MongoDB 3.6 where the "sub-pipeline" does all the document reduction and grouping before any results are returned.
Your one document sample would return like this:
{
"_id" : ObjectId("5aeb6b71709f43359e0888bb"),
"data" : [
{
"groupDate" : ISODate("2018-05-02T00:00:00Z"),
"assignedCount" : 1,
"resolvedCount" : 0
}
]
}
Once I expand the date selection to include that date, which of course the date selection can also be improved and corrected from your original form.
So it seems to make sense that your relationships are actually defined that way but it's just that you recorded them "twice". You don't need to and even if that's not the definition then you should actually instead record on the "child" rather than an array in the parent. We can juggle and merge the parent arrays, but that's counterproductive to actually establishing the data relations correctly and using them correctly as well.

How about something like this?
db.users.aggregate([
{
$lookup:{ // lookup assigned tickets
from:'tickets',
localField:'assignedTickets',
foreignField:'_id',
as:'assigned',
}
},
{
$lookup:{ // lookup resolved tickets
from:'tickets',
localField:'resolvedTickets',
foreignField:'_id',
as:'resolved',
}
},
{
$project:{
"tickets":{ // merge all tickets into one single array
$concatArrays:[
"$assigned",
"$resolved"
]
}
}
},
{
$unwind:'$tickets' // flatten the 'tickets' array into separate documents
},
{
$group:{ // group by 'createdAt' and 'assignee_id'
_id:{
groupDate:{
$dateFromParts:{
year:{ $year:'$tickets.createdAt' },
month:{ $month:'$tickets.createdAt' },
day:{ $dayOfMonth:'$tickets.createdAt' },
},
},
userId:'$tickets.assignee_id',
},
assignedCount:{ // get the count of assigned tickets
$sum:{
$cond:[
{ // by checking the 'type' field for a value of 'assigned'
$eq:[
'$tickets.type',
'assigned'
]
},
1, // if matching count 1
0 // else 0
]
}
},
resolvedCount:{
$sum:{
$cond:[
{ // by checking the 'type' field for a value of 'resolved'
$eq:[
'$tickets.type',
'resolved'
]
},
1, // if matching count 1
0 // else 0
]
}
},
},
},
{
$sort:{ // sort by 'groupDate' descending
'_id.groupDate':-1
},
},
{
$group:{
_id:'$_id.userId', // group again but only by userId
data:{
$push:{ // create an array
groupDate:'$_id.groupDate',
assignedCount:{
$sum:'$assignedCount'
},
resolvedCount:{
$sum:'$resolvedCount'
}
}
}
}
}
])

Related

mongoose populate pass found object reference key

I have a mongoose Group schema which contains invitee (array of sub document) and currentMove, invitee also contains currentMove and I want to get document with only sub document that have same currentMove.
Group.findById("5a03fa29fafa645c8a399353")
.populate({
path: 'invitee.user_id',
select: 'currentMove',
model:"User",
match: {
"currentMove":{
$eq: "$currentMove"
}
}
})
This generates unknown currentMove Object id for match query. I'm not sure if mongoose has this functionality. Can anyone help me, please?
In modern MongoDB releases it is far more efficient to use $lookup here instead of .populate(). Also the basic concept that you want to filter based on a comparison of fields is something that MongoDB does quite well with native operators, but it's not something you can easily transpose into .populate().
In fact the only way possible to actually use with .populate() would be to first retrieve all results, and then use Model.populate() with a $where clause on query all whilst processing the result array with Array.map() in order to apply the local value of each document to the conditions to "join" on.
It's all kind of messy, and involves pulling all results from the server and filtering locally. So $lookup is our best option here, where all of the "filtering" and "matching" actually takes place on the server without needing to pull unnecessary documents over the network just to obtain a result.
Sample Schema
You don't actually include a "schema" in your question, so we can only work with an approximation based on what parts you actually do include in the question. So my example here uses:
const userSchema = new Schema({
name: String,
currentMove: Number
})
const groupSchema = new Schema({
name: String,
topic: String,
currentMove: Number,
invitee: [{
user_id: { type: Schema.Types.ObjectId, ref: 'User' },
confirmed: { type: Boolean, default: false }
}]
});
Unwinding $lookup and $group
From here we have different approaches to the $lookup queries. The first basically involves applying $unwind both before and after the $lookup stage. This is partly since your "reference" is an embedded field within the array, and also partly because it's actually the most efficient query form to use here with a possible "join" result that could potentially exceed the BSON limit ( 16MB for the document ) being avoided:
Group.aggregate([
{ "$unwind": "$invitee" },
{ "$lookup": {
"from": User.collection.name,
"localField": "invitee.user_id",
"foreignField": "_id",
"as": "invitee.user_id"
}},
{ "$unwind": "$invitee.user_id" },
{ "$redact": {
"$cond": {
"if": { "$eq": ["$currentMove", "$invitee.user_id.currentMove"] },
"then": "$$KEEP",
"else": "$$PRUNE"
}
}},
{ "$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"topic": { "$first": "$topic" },
"currentMove": { "$first": "$currentMove" },
"invitee": { "$push": "$invitee" }
}}
]);
The key expression here is the $redact which is processed after the $lookup result is returned. This allows a logical comparison of the "currentMove" values from both the parent document and the "joined" detail for the User objects.
Since we $unwind the array content, we use $group with $push to reconstruct the array ( if you must ) and select the other fields of the original document using $first.
There are ways to examine the schema and generate such a stage, but that's not really in the scope of the question. An example can be seen on Querying after populate in Mongoose. Point being that if you want the fields returned, then you would construct this pipeline stage around using those expressions to return a document of the original shape.
Filter $lookup result
An alternate approach where you are certain that the "unfiltered" result of the "join" will not cause the document to exceed the BSON limit is to instead make a separate target array, and then reconstruct your "joined" array content using $map and $filter, and other array operators:
Group.aggregate([
{ "$lookup": {
"from": User.collection.name,
"localField": "invitee.user_id",
"foreignField": "_id",
"as": "inviteeT"
}},
{ "$addFields": {
"invitee": {
"$map": {
"input": {
"$filter": {
"input": "$inviteeT",
"as": "i",
"cond": { "$eq": ["$$i.currentMove","$currentMove"] }
}
},
"as": "i",
"in": {
"_id": {
"$arrayElemAt": [
"$invitee._id",
{ "$indexOfArray": ["$invitee.user_id", "$$i._id"] }
]
},
"user_id": "$$i",
"confirmed": {
"$arrayElemAt": [
"$invitee.confirmed",
{ "$indexOfArray": ["$invitee.user_id","$$i._id"] }
]
}
}
}
}
}},
{ "$project": { "inviteeT": 0 } },
{ "$match": { "invitee.0": { "$exists": true } } }
]);
Instead of the $redact which would be filtering "documents", we use $filter here with the expression to only return those members of the target array "inviteeT" which share the same "currentMove". Since this is just the "foreign" content, we "join" with the original array using $map and transposing the elements.
To do that "transposition" of values from the original array, we use the $arrayElemAt and $indexOfArray expressions. The $indexOfArray allows us to match up the target's "_id" values with the "user_id" values in the original array and get it's "index" position. We always know this returns a real match because the $lookup did that part for us.
The "index" value is then supplied to $arrayElemAt which similarly applies a "mapping" of the values as an array like "$invitee.confirmed" and returns the value matched at the same index. This is basically a "lookup" between the arrays.
Differing from the first pipeline example, we now still have the "inviteeT" array as well as our re-written "invitee" array courtesy of $addFields. So one way to get rid of that is to add an additional $project and exclude the unwanted "temporary" array. And of course since we did not $unwind and "filter", there are still possible results with no matching array entries at all. So the $match expression uses $exists to test for the 0 index being present in the array result, which means there is "at least one" result, and discards any documents with empty arrays.
MongoDB 3.6 "sub-query"
MongoDB 3.6 makes this a bit cleaner as a new syntax for $lookup allows a more expressive "pipeline" to be given in argument to select the results returned, rather than the simplistic "localField" and "foreignField" matching.
Group.aggregate([
{ "$lookup": {
"from": User.collection.name,
"let": {
"ids": "$invitee._id",
"users": "$invitee.user_id",
"confirmed": "$invitee.confirmed",
"currentMove": "$currentMove"
},
"pipeline": [
{ "$match": {
"$expr": {
"$and": [
{ "$in": ["$_id", "$$users"] },
{ "$eq": ["$currentMove", "$$currentMove"] }
]
}
}},
{ "$project": {
"_id": {
"$arrayElemAt": [
"$$ids",
{ "$indexOfArray": ["$$users", "$_id"] }
]
},
"user_id": "$$ROOT",
"confirmed": {
"$arrayElemAt": [
"$$confirmed",
{ "$indexOfArray": ["$$users", "$_id"] }
]
}
}}
],
"as": "invitee"
}},
{ "$match": { "invitee.0": { "$exists": true } } }
])
So there are some slightly "glitchy" things in there with the usage of mapping arrays of specific values for input due to how these are currently passed into the sub-pipeline via the "let" declaration. This should probably work cleaner, but on the current release candidate this is how it's actually required to be expressed in order to work.
With this new syntax the "let" allows us to declare "variables" from the current document which can then be referenced in the "pipeline" expression which will be executed in order to determine which results to return to the target array.
The $expr here essentially replaces the $redact or $filter conditions used before, as well as combining the "local" to "foreign" key matching which also requires us to declare such a variable. Here we mapped the "$invitee.user_id" values from the source document into a variable which we refer to as "$$users" in the rest of the expressions.
The $in operator here is a variant for the aggregation framework which returns a boolean condition where the first argument "value" is found in the second argument "array". So this is the "foreign key" filter part.
Since this is a "pipeline", we can add a $project stage in addition to the $match which selected the items from the foreign collection. So again we use a similar "transposition" technique to what was described before. This then gives us control of the "shape" of the documents returned in the array, so we don't manipulate the returned array "after" the $lookup like we did previously.
The same case applies though, since no matter what you do here the "sub-pipeline" can of course return no results when the filter conditions do not match. So again the same $exists test is used to discard those documents.
So it's all pretty cool, and once you get used to the power available in the server side "join" functionality of $lookup you likely will never look back. Whilst the syntax is a lot more terse than the "convenience" function that .populate() was introduced for, the reduced traffic load, far more advanced uses and general expressiveness basically make up for that.
As a complete example, I'm also including a self contained listing that demonstrates all of these. And if you run it with a MongoDB 3.6 compatible server attached, then you will even get that demonstration as well.
Needs a recent Node.js v8.x release to run with async/await ( or enable in other supported ), but since that's now the LTS release you really should be running that anyway. At least install one to test :)
const mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.Promise = global.Promise;
mongoose.set('debug',true);
const uri = 'mongodb://localhost/rollgroup',
options = { useMongoClient: true };
const userSchema = new Schema({
name: String,
currentMove: Number
})
const groupSchema = new Schema({
name: String,
topic: String,
currentMove: Number,
invitee: [{
user_id: { type: Schema.Types.ObjectId, ref: 'User' },
confirmed: { type: Boolean, default: false }
}]
});
const User = mongoose.model('User', userSchema);
const Group = mongoose.model('Group', groupSchema);
function log(data) {
console.log(JSON.stringify(data, undefined, 2))
}
(async function() {
try {
const conn = await mongoose.connect(uri,options);
let { version } = await conn.db.admin().command({'buildInfo': 1});
// Clean data
await Promise.all(
Object.entries(conn.models).map(([k,m]) => m.remove() )
);
// Add some users
let users = await User.insertMany([
{ name: 'Bill', currentMove: 1 },
{ name: 'Ted', currentMove: 2 },
{ name: 'Fred', currentMove: 3 },
{ name: 'Sally', currentMove: 4 },
{ name: 'Harry', currentMove: 5 }
]);
await Group.create({
name: 'Group1',
topic: 'This stuff',
currentMove: 3,
invitee: users.map( u =>
({ user_id: u._id, confirmed: (u.currentMove === 3) })
)
});
await (async function() {
console.log('Unwinding example');
let result = await Group.aggregate([
{ "$unwind": "$invitee" },
{ "$lookup": {
"from": User.collection.name,
"localField": "invitee.user_id",
"foreignField": "_id",
"as": "invitee.user_id"
}},
{ "$unwind": "$invitee.user_id" },
{ "$redact": {
"$cond": {
"if": { "$eq": ["$currentMove", "$invitee.user_id.currentMove"] },
"then": "$$KEEP",
"else": "$$PRUNE"
}
}},
{ "$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"topic": { "$first": "$topic" },
"currentMove": { "$first": "$currentMove" },
"invitee": { "$push": "$invitee" }
}}
]);
log(result);
})();
await (async function() {
console.log('Using $filter example');
let result = await Group.aggregate([
{ "$lookup": {
"from": User.collection.name,
"localField": "invitee.user_id",
"foreignField": "_id",
"as": "inviteeT"
}},
{ "$addFields": {
"invitee": {
"$map": {
"input": {
"$filter": {
"input": "$inviteeT",
"as": "i",
"cond": { "$eq": ["$$i.currentMove","$currentMove"] }
}
},
"as": "i",
"in": {
"_id": {
"$arrayElemAt": [
"$invitee._id",
{ "$indexOfArray": ["$invitee.user_id", "$$i._id"] }
]
},
"user_id": "$$i",
"confirmed": {
"$arrayElemAt": [
"$invitee.confirmed",
{ "$indexOfArray": ["$invitee.user_id","$$i._id"] }
]
}
}
}
}
}},
{ "$project": { "inviteeT": 0 } },
{ "$match": { "invitee.0": { "$exists": true } } }
]);
log(result);
})();
await (async function() {
if (parseFloat(version.match(/\d\.\d/)[0]) >= 3.6) {
console.log('New $lookup example. Yay!');
let result = await Group.collection.aggregate([
{ "$lookup": {
"from": User.collection.name,
"let": {
"ids": "$invitee._id",
"users": "$invitee.user_id",
"confirmed": "$invitee.confirmed",
"currentMove": "$currentMove"
},
"pipeline": [
{ "$match": {
"$expr": {
"$and": [
{ "$in": ["$_id", "$$users"] },
{ "$eq": ["$currentMove", "$$currentMove"] }
]
}
}},
{ "$project": {
"_id": {
"$arrayElemAt": [
"$$ids",
{ "$indexOfArray": ["$$users", "$_id"] }
]
},
"user_id": "$$ROOT",
"confirmed": {
"$arrayElemAt": [
"$$confirmed",
{ "$indexOfArray": ["$$users", "$_id"] }
]
}
}}
],
"as": "invitee"
}},
{ "$match": { "invitee.0": { "$exists": true } } }
]).toArray();
log(result);
}
})();
await (async function() {
console.log("Horrible populate example :(");
let results = await Group.find();
results = await Promise.all(
results.map( r =>
User.populate(r,{
path: 'invitee.user_id',
match: { "$where": `this.currentMove === ${r.currentMove}` }
})
)
);
console.log("All members still there");
log(results);
// Then we clean it for null values
results = results.map( r =>
Object.assign(r,{
invitee: r.invitee.filter(i => i.user_id !== null)
})
);
console.log("Now they are filtered");
log(results);
})();
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})()
Gives the output for each example as:
Mongoose: users.remove({}, {})
Mongoose: groups.remove({}, {})
Mongoose: users.insertMany([ { __v: 0, name: 'Bill', currentMove: 1, _id: 5a0afda01643cf41789e500a }, { __v: 0, name: 'Ted', currentMove: 2, _id: 5a0afda01643cf41789e500b }, { __v: 0, name: 'Fred', currentMove: 3, _id: 5a0afda01643cf41789e500c }, { __v: 0, name: 'Sally', currentMove: 4, _id: 5a0afda01643cf41789e500d }, { __v: 0, name: 'Harry', currentMove: 5, _id: 5a0afda01643cf41789e500e } ], {})
Mongoose: groups.insert({ name: 'Group1', topic: 'This stuff', currentMove: 3, _id: ObjectId("5a0afda01643cf41789e500f"), invitee: [ { user_id: ObjectId("5a0afda01643cf41789e500a"), _id: ObjectId("5a0afda01643cf41789e5014"), confirmed: false }, { user_id: ObjectId("5a0afda01643cf41789e500b"), _id: ObjectId("5a0afda01643cf41789e5013"), confirmed: false }, { user_id: ObjectId("5a0afda01643cf41789e500c"), _id: ObjectId("5a0afda01643cf41789e5012"), confirmed: true }, { user_id: ObjectId("5a0afda01643cf41789e500d"), _id: ObjectId("5a0afda01643cf41789e5011"), confirmed: false }, { user_id: ObjectId("5a0afda01643cf41789e500e"), _id: ObjectId("5a0afda01643cf41789e5010"), confirmed: false } ], __v: 0 })
Unwinding example
Mongoose: groups.aggregate([ { '$unwind': '$invitee' }, { '$lookup': { from: 'users', localField: 'invitee.user_id', foreignField: '_id', as: 'invitee.user_id' } }, { '$unwind': '$invitee.user_id' }, { '$redact': { '$cond': { if: { '$eq': [ '$currentMove', '$invitee.user_id.currentMove' ] }, then: '$$KEEP', else: '$$PRUNE' } } }, { '$group': { _id: '$_id', name: { '$first': '$name' }, topic: { '$first': '$topic' }, currentMove: { '$first': '$currentMove' }, invitee: { '$push': '$invitee' } } } ], {})
[
{
"_id": "5a0afda01643cf41789e500f",
"name": "Group1",
"topic": "This stuff",
"currentMove": 3,
"invitee": [
{
"user_id": {
"_id": "5a0afda01643cf41789e500c",
"__v": 0,
"name": "Fred",
"currentMove": 3
},
"_id": "5a0afda01643cf41789e5012",
"confirmed": true
}
]
}
]
Using $filter example
Mongoose: groups.aggregate([ { '$lookup': { from: 'users', localField: 'invitee.user_id', foreignField: '_id', as: 'inviteeT' } }, { '$addFields': { invitee: { '$map': { input: { '$filter': { input: '$inviteeT', as: 'i', cond: { '$eq': [ '$$i.currentMove', '$currentMove' ] } } }, as: 'i', in: { _id: { '$arrayElemAt': [ '$invitee._id', { '$indexOfArray': [ '$invitee.user_id', '$$i._id' ] } ] }, user_id: '$$i', confirmed: { '$arrayElemAt': [ '$invitee.confirmed', { '$indexOfArray': [ '$invitee.user_id', '$$i._id' ] } ] } } } } } }, { '$project': { inviteeT: 0 } }, { '$match': { 'invitee.0': { '$exists': true } } } ], {})
[
{
"_id": "5a0afda01643cf41789e500f",
"name": "Group1",
"topic": "This stuff",
"currentMove": 3,
"invitee": [
{
"_id": "5a0afda01643cf41789e5012",
"user_id": {
"_id": "5a0afda01643cf41789e500c",
"__v": 0,
"name": "Fred",
"currentMove": 3
},
"confirmed": true
}
],
"__v": 0
}
]
New $lookup example. Yay!
Mongoose: groups.aggregate([ { '$lookup': { from: 'users', let: { ids: '$invitee._id', users: '$invitee.user_id', confirmed: '$invitee.confirmed', currentMove: '$currentMove' }, pipeline: [ { '$match': { '$expr': { '$and': [ { '$in': [ '$_id', '$$users' ] }, { '$eq': [ '$currentMove', '$$currentMove' ] } ] } } }, { '$project': { _id: { '$arrayElemAt': [ '$$ids', { '$indexOfArray': [ '$$users', '$_id' ] } ] }, user_id: '$$ROOT', confirmed: { '$arrayElemAt': [ '$$confirmed', { '$indexOfArray': [ '$$users', '$_id' ] } ] } } } ], as: 'invitee' } }, { '$match': { 'invitee.0': { '$exists': true } } } ])
[
{
"_id": "5a0afda01643cf41789e500f",
"name": "Group1",
"topic": "This stuff",
"currentMove": 3,
"invitee": [
{
"_id": "5a0afda01643cf41789e5012",
"user_id": {
"_id": "5a0afda01643cf41789e500c",
"__v": 0,
"name": "Fred",
"currentMove": 3
},
"confirmed": true
}
],
"__v": 0
}
]
Horrible populate example :(
Mongoose: groups.find({}, { fields: {} })
Mongoose: users.find({ _id: { '$in': [ ObjectId("5a0afda01643cf41789e500a"), ObjectId("5a0afda01643cf41789e500b"), ObjectId("5a0afda01643cf41789e500c"), ObjectId("5a0afda01643cf41789e500d"), ObjectId("5a0afda01643cf41789e500e") ] }, '$where': 'this.currentMove === 3' }, { fields: {} })
All members still there
[
{
"_id": "5a0afda01643cf41789e500f",
"name": "Group1",
"topic": "This stuff",
"currentMove": 3,
"__v": 0,
"invitee": [
{
"user_id": null,
"_id": "5a0afda01643cf41789e5014",
"confirmed": false
},
{
"user_id": null,
"_id": "5a0afda01643cf41789e5013",
"confirmed": false
},
{
"user_id": {
"_id": "5a0afda01643cf41789e500c",
"__v": 0,
"name": "Fred",
"currentMove": 3
},
"_id": "5a0afda01643cf41789e5012",
"confirmed": true
},
{
"user_id": null,
"_id": "5a0afda01643cf41789e5011",
"confirmed": false
},
{
"user_id": null,
"_id": "5a0afda01643cf41789e5010",
"confirmed": false
}
]
}
]
Now they are filtered
[
{
"_id": "5a0afda01643cf41789e500f",
"name": "Group1",
"topic": "This stuff",
"currentMove": 3,
"__v": 0,
"invitee": [
{
"user_id": {
"_id": "5a0afda01643cf41789e500c",
"__v": 0,
"name": "Fred",
"currentMove": 3
},
"_id": "5a0afda01643cf41789e5012",
"confirmed": true
}
]
}
]
Using populate()
So using .populate() here is actually pretty horrible. Sure it looks like less, but it's actually doing a lot of things that simply are not needed, and all because the "join" does not happen on the server:
// Note that we cannot populate "here" since we need the returned value
let results = await Group.find();
// The value is only in context as we use `Array.map()` to process each result
results = await Promise.all(
results.map( r =>
User.populate(r,{
path: 'invitee.user_id',
match: { "$where": `this.currentMove === ${r.currentMove}` }
})
)
);
console.log("All members still there");
log(results);
// Then we clean it for null values
results = results.map( r =>
Object.assign(r,{
invitee: r.invitee.filter(i => i.user_id !== null)
})
);
console.log("Now they are filtered");
log(results);
So I also included that in the output above, as well as the whole code listing.
The problem becomes evident as you cannot "chain" the populate directly to the first query. You actually need to return the documents ( potentially ALL of them ) in order to use the current document value in a subsequent populate. And this MUST be processed for each document returned.
Not only that but populate() is NOT going to "filter" the array to only those which match, even with the query condition. All it does is set's the unmatched elements to null:
[
{
"_id": "5a0afa889f9f7e4064d8794d",
"name": "Group1",
"topic": "This stuff",
"currentMove": 3,
"__v": 0,
"invitee": [
{
"user_id": null,
"_id": "5a0afa889f9f7e4064d87952",
"confirmed": false
},
{
"user_id": null,
"_id": "5a0afa889f9f7e4064d87951",
"confirmed": false
},
{
"user_id": {
"_id": "5a0afa889f9f7e4064d8794a",
"__v": 0,
"name": "Fred",
"currentMove": 3
},
"_id": "5a0afa889f9f7e4064d87950",
"confirmed": true
},
{
"user_id": null,
"_id": "5a0afa889f9f7e4064d8794f",
"confirmed": false
},
{
"user_id": null,
"_id": "5a0afa889f9f7e4064d8794e",
"confirmed": false
}
]
}
]
This then needs an Array.filter() to be processed again for "each" document returned, which can finally remove the unwanted array items and give you the same result the other aggregation queries are doing.
So it's "really wasteful" and just not a good way to do things. Little point in having a database, when you're actually doing the majority of processing on the server. In fact, we may have well simply returned the populated result and then run an Array.filter() in order to remove the unwanted entries.
This is just not how you write fast and effective code. So the example here is sometimes "what looks simple" is actually doing a lot more damage than good.

How to access the fields from arrays of a object in two different collections?

This is locations collection data.
{
_id: "1",
location: "loc1",
sublocations: [
{
_id: 2,
sublocation: "subloc1",
},
{
_id: 3,
sublocation: "subloc2",
}
]
},
{
_id: "4",
location: "loc2",
sublocations: [
{
_id: 5,
sublocation: "subloc1",
},
{
_id: 6,
sublocation: "subloc2",
}
]
}
This is products collection data
{
_id: "1",
product: "product1",
prices: [
{
_id: 2,
sublocationid: 2, //ObjectId of object in sublocations array
price: 500
},
{
_id: 3,
sublocationid: 5, //ObjectId of object in sublocations array
price: 200
}
]
}
Now I need to get the sublocation in product schema in the prices array. Expected result is as below.
{
_id: "1",
product: "product1",
prices: [
{
_id: 2,
sublocationid: 3,
sublocation: "subloc2",
price: 500
},
{
_id: 3,
sublocationid: 5,
sublocation: "subloc1"
price: 200
}
]
}
To achieve it, I did it like in the following way.
First, performing aggregation on locations collection - $unwind the sublocations array and store the $out in the new collection.
Second, perform aggregation on 'products' collection - $unwind the prices, $lookup the sublocationid from the new collection and $group them.
Third, after getting data delete the data of new collection.
Is there any other simplified way? Please let me know if there is any.
If you want to stick with 3.4 version, you can try this query:
db.products.aggregate([
{
$unwind: {
"path": "$prices"
}
},
{
$lookup: {
"from": "locations",
"localField": "prices.sublocationid",
"foreignField": "sublocations._id",
"as": "locations"
}
},
{
$unwind: {
"path": "$locations"
}
},
{
$unwind: {
"path": "$locations.sublocations"
}
},
{
$addFields: {
"keep": {
"$eq": [
"$prices.sublocationid",
"$locations.sublocations._id"
]
}
}
},
{
$match: {
"keep": true
}
},
{
$addFields: {
"price": {
"_id": "$prices._id",
"sublocationid": "$prices.sublocationid",
"sublocation": "$locations.sublocations.sublocation",
"price": "$prices.price"
}
}
},
{
$group: {
"_id": "$_id",
"product": { "$first": "$product" },
"prices": { "$addToSet": "$price" }
}
}
]);
It's not as nice as 3.6 version though, because of a higher memory consumption.
You can try below aggregation query in 3.6 version.
Since both local field and foreign field are array you have to $unwind both to do equality comparison.
For this you will have to use new $lookup syntax.
$match with $expr provides comparsion between document fields to look up the location's sublocation document for each product's sublocation id.
$project to project the matching sublocation doc.
$addFields with $arrayElemAt to convert the looked up sublocation array into a document.
$group to push all prices with matching sublocation's document for each product.
db.products.aggregate[
{
"$unwind": "$prices"
},
{
"$lookup": {
"from": "locations",
"let": {
"prices": "$prices"
},
"pipeline": [
{
"$unwind": "$sublocations"
},
{
"$match": {
"$expr": [
"$$prices.sublocationid",
"$sublocations._id"
]
}
},
{
"$project": {
"sublocations": 1,
"_id": 0
}
}
],
"as": "prices.sublocations"
}
},
{
"$addFields": {
"prices.sublocations": {
"$arrayElemAt": [
"$prices.sublocations",
0
]
}
}
},
{
"$group": {
"_id": "$_id",
"product": {
"$first": "$product"
},
"prices": {
"$push": "$prices"
}
}
}
])

Mongo Group and sum with two fields

I have documents like:
{
"from":"abc#sss.ddd",
"to" :"ssd#dff.dff",
"email": "Hi hello"
}
How can we calculate count of sum "from and to" or "to and from"?
Like communication counts between two people?
I am able to calculate one way sum. I want to have sum both ways.
db.test.aggregate([
{ $group: {
"_id":{ "from": "$from", "to":"$to"},
"count":{$sum:1}
}
},
{
"$sort" :{"count":-1}
}
])
Since you need to calculate number of emails exchanged between 2 addresses, it would be fair to project a unified between field as following:
db.a.aggregate([
{ $match: {
to: { $exists: true },
from: { $exists: true },
email: { $exists: true }
}},
{ $project: {
between: { $cond: {
if: { $lte: [ { $strcasecmp: [ "$to", "$from" ] }, 0 ] },
then: [ { $toLower: "$to" }, { $toLower: "$from" } ],
else: [ { $toLower: "$from" }, { $toLower: "$to" } ] }
}
}},
{ $group: {
"_id": "$between",
"count": { $sum: 1 }
}},
{ $sort :{ count: -1 } }
])
Unification logic should be quite clear from the example: it is an alphabetically sorted array of both emails. The $match and $toLower parts are optional if you trust your data.
Documentation for operators used in the example:
$match
$exists
$project
$cond
$lte
$strcasecmp
$toLower
$group
$sum
$sort
You basically need to consider the _id for grouping as an "array" of the possible "to" and "from" values, and then of course "sort" them, so that in every document the combination is always in the same order.
Just as a side note, I want to add that "typically" when I am dealing with messaging systems like this, the "to" and "from" sender/recipients are usually both arrays to begin with anyway, so it usally forms the base of where different variations on this statement come from.
First, the most optimal MongoDB 3.2 statement, for single addresses
db.collection.aggregate([
// Join in array
{ "$project": {
"people": [ "$to", "$from" ],
}},
// Unwind array
{ "$unwind": "$people" },
// Sort array
{ "$sort": { "_id": 1, "people": 1 } },
// Group document
{ "$group": {
"_id": "$_id",
"people": { "$push": "$people" }
}},
// Group people and count
{ "$group": {
"_id": "$people",
"count": { "$sum": 1 }
}}
]);
Thats the basics, and now the only variations are in construction of the "people" array ( stage 1 only above ).
MongoDB 3.x and 2.6.x - Arrays
{ "$project": {
"people": { "$setUnion": [ "$to", "$from" ] }
}}
MongoDB 3.x and 2.6.x - Fields to array
{ "$project": {
"people": {
"$map": {
"input": ["A","B"],
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "A", "$$el" ] },
"$to",
"$from"
]
}
}
}
}}
MongoDB 2.4.x and 2.2.x - from fields
{ "$project": {
"to": 1,
"from": 1,
"type": { "$const": [ "A", "B" ] }
}},
{ "$unwind": "$type" },
{ "$group": {
"_id": "$_id",
"people": {
"$addToSet": {
"$cond": [
{ "$eq": [ "$type", "A" ] },
"$to",
"$from"
]
}
}
}}
But in all cases:
Get all recipients into a distinct array.
Order the array to a consistent order
Group on the "always in the same order" list of recipients.
Follow that and you cannot go wrong.

Filter subdocument by datetime

I've the following model
var messageSchema = new Schema({
creationDate: { type: Date, default: Date.now },
comment: { type: String },
author: { type: Schema.Types.ObjectId }
});
var conversationSchema = new Schema({
title: { type: String },
author: { type : Schema.Types.ObjectId },
members: [ { type: Schema.Types.ObjectId } ],
creationDate: { type: Date, default: Date.now },
lastUpdate: { type: Date, default: Date.now },
comments: [ messageSchema ]
});
I want to create two methods to get the comments generated after a date by user or by conversationId.
By User
I tried with the following method
var query = {
members : { $all : [ userId, otherUserId ], "$size" : 2 }
, comments : { $elemMatch : { creationDate : { $gte: from } } }
};
When there are no comments after the specified date (at from) the method returns [] or null
By conversationId
The same happen when I try to get by user id
var query = { _id : conversationId
, comments : { $elemMatch : { creationDate : { $gte: from } } }
};
Is there any way to make the method returns the conversation information with an empty comments?
Thank you!
Sounds like a couple of problems here, but stepping through them all
In order to get more than a single match "or" none from an array to need the aggregation framework of mapReduce to do this. You could try "projecting" with $elemMatch but this can only return the "first" match. i.e:
{ "a": [1,2,3] }
db.collection.find({ },{ "$elemMatch": { "$gte": 2 } })
{ "a": [2] }
So standard projection does not work for this. It can return an "empty" array but it an also only return the "first" that is matched.
Moving along, you also have this in your code:
{ $all : [ userId, otherUserId ], "$site" : 2 }
Where $site is not a valid operator. I think you mean $size but there are actuall "two" operators with that name and your intent may not be clear here.
If you mean that the array you are testing must have "only two" elements, then this is the operator for you. If you meant that the matched conversation between the two people had to be equal to both in the match, then $all does this anyway so the $size becomes redundant in either case unless you don't want anyone else in the conversation.
On to the aggregation problem. You need to "filter" the content of the array in a "non-destructive way" in order to get more than one match or an empty array.
The best approach for this is with modern MongoDB features available from 2.6, which allows the array content to be filtered without processing $unwind:
Model.aggregate(
[
{ "$match": {
"members": { "$all": [userId,otherUserId] }
}},
{ "$project": {
"title": 1,
"author": 1,
"members": 1,
"creationDate": 1,
"lastUpdate": 1,
"comments": {
"$setDifference": [
{ "$map": {
"input": "$comments",
"as": "c",
"in": { "$cond": [
{ "$gte": [ "$$c.creationDate", from ] },
"$$c",
false
]}
}},
[false]
]
}
}}
],
function(err,result) {
}
);
That uses $map which can process an expression against each array element. In this case the vallues are tested under the $cond ternary to either return the array element where the condition is true or otherwise return false as the element.
These are then "filtered" by the $setDifference operator which essentially compares the resulting array of $map to the other array [false]. This removes any false values from the result array and only leaves matched elements or no elements at all.
An alternate may have been $redact but since your document contains "creationDate" at multiple levels, then this messes with the logic used with it's $$DESCEND operator. This rules that action out.
In earlier versions "not destroying" the array needs to be treated with care. So you need to do much the same "filter" of results in order to get the "empty" array you want:
Model.aggregate(
[
{ "$match": {
"$and": [
{ "members": userId },
{ "members": otherUserId }
}},
{ "$unwind": "$comments" },
{ "$group": {
"_id": "$_id",
"title": { "$first": "$title" },
"author": { "$first": "$author" },
"members": { "$first": "$members" },
"creationDate": { "$first": "$creationDate" },
"lastUpdate": { "$first": "$lastUpdate" },
"comments": {
"$addToSet": {
"$cond": [
{ "$gte": [ "$comments.creationDate", from ] },
"$comments",
false
]
}
},
"matchedSize": {
"$sum": {
"$cond": [
{ "$gte": [ "$comments.creationDate", from ] },
1,
0
]
}
}
}},
{ "$unwind": "$comments" },
{ "$match": {
"$or": [
{ "comments": { "$ne": false } },
{ "matchedSize": 0 }
]
}},
{ "$group": {
"_id": "$_id",
"title": { "$first": "$title" },
"author": { "$first": "$author" },
"members": { "$first": "$members" },
"creationDate": { "$first": "$creationDate" },
"lastUpdate": { "$first": "$lastUpdate" },
"comments": { "$push": "$comments" }
}},
{ "$project": {
"title": 1,
"author": 1,
"members": 1,
"creationDate": 1,
"lastUpdate": 1,
"comments": {
"$cond": [
{ "$eq": [ "$comments", [false] ] },
{ "$const": [] },
"$comments"
]
}
}}
],
function(err,result) {
}
)
This does much of the same things, but longer. In order to look at the array content you need to $unwind the content. When you $group back, you look at each element to see if it matches the condition to decide what to return, also keeping a count of the matches.
This is going to put some ( one with $addToSet ) false results in the array or only an array with the entry false where there are no matches. So yo filter these out with $match but also testing on the matched "count" to see if no matches were found. If no match was found then you don't throw away that item.
Instead you replace the [false] arrays with empty arrays in a final $project.
So depending on your MongoDB version this is either "fast/easy" or "slow/hard" to process. Compelling reasons to update a version already many years old.
Working example
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/aggtest');
var memberSchema = new Schema({
name: { type: String }
});
var messageSchema = new Schema({
creationDate: { type: Date, default: Date.now },
comment: { type: String },
});
var conversationSchema = new Schema({
members: [ { type: Schema.Types.ObjectId } ],
comments: [messageSchema]
});
var Member = mongoose.model( 'Member', memberSchema );
var Conversation = mongoose.model( 'Conversation', conversationSchema );
async.waterfall(
[
// Clean
function(callback) {
async.each([Member,Conversation],function(model,callback) {
model.remove({},callback);
},
function(err) {
callback(err);
});
},
// add some people
function(callback) {
async.map(["bill","ted","fred"],function(name,callback) {
Member.create({ "name": name },callback);
},callback);
},
// Create a conversation
function(names,callback) {
var conv = new Conversation();
names.forEach(function(el) {
conv.members.push(el._id);
});
conv.save(function(err,conv) {
callback(err,conv,names)
});
},
// add some comments
function(conv,names,callback) {
async.eachSeries(names,function(name,callback) {
Conversation.update(
{ "_id": conv._id },
{ "$push": { "comments": { "comment": name.name } } },
callback
);
},function(err) {
callback(err,names);
});
},
function(names,callback) {
Conversation.findOne({},function(err,conv) {
callback(err,names,conv.comments[1].creationDate);
});
},
function(names,from,callback) {
var ids = names.map(function(el) {
return el._id
});
var pipeline = [
{ "$match": {
"$and": [
{ "members": ids[0] },
{ "members": ids[1] }
]
}},
{ "$project": {
"members": 1,
"comments": {
"$setDifference": [
{ "$map": {
"input": "$comments",
"as": "c",
"in": { "$cond": [
{ "$gte": [ "$$c.creationDate", from ] },
"$$c",
false
]}
}},
[false]
]
}
}}
];
//console.log(JSON.stringify(pipeline, undefined, 2 ));
Conversation.aggregate(
pipeline,
function(err,result) {
if(err) throw err;
console.log(JSON.stringify(result, undefined, 2 ));
callback(err);
}
)
}
],
function(err) {
if (err) throw err;
process.exit();
}
);
Which produces this output:
[
{
"_id": "55a63133dcbf671918b51a93",
"comments": [
{
"comment": "ted",
"_id": "55a63133dcbf671918b51a95",
"creationDate": "2015-07-15T10:08:51.217Z"
},
{
"comment": "fred",
"_id": "55a63133dcbf671918b51a96",
"creationDate": "2015-07-15T10:08:51.220Z"
}
],
"members": [
"55a63133dcbf671918b51a90",
"55a63133dcbf671918b51a91",
"55a63133dcbf671918b51a92"
]
}
]
Note the "comments" only contain the last two entries which are "greater than or equal" to the date which was used as input ( being the date from the second comment ).

How to find match in documents in Mongo and Mongo aggregation?

I have following json structure in mongo collection-
{
"students":[
{
"name":"ABC",
"fee":1233
},
{
"name":"PQR",
"fee":345
}
],
"studentDept":[
{
"name":"ABC",
"dept":"A"
},
{
"name":"XYZ",
"dept":"X"
}
]
},
{
"students":[
{
"name":"XYZ",
"fee":133
},
{
"name":"LMN",
"fee":56
}
],
"studentDept":[
{
"name":"XYZ",
"dept":"X"
},
{
"name":"LMN",
"dept":"Y"
},
{
"name":"ABC",
"dept":"P"
}
]
}
Now I want to calculate following output.
if students.name = studentDept.name
so my result should be as below
{
"name":"ABC",
"fee":1233,
"dept":"A",
},
{
"name":"XYZ",
"fee":133,
"dept":"X"
}
{
"name":"LMN",
"fee":56,
"dept":"Y"
}
Do I need to use mongo aggregation or is it possible to get above given output without using aggregation???
What you are really asking here is how to make MongoDB return something that is actually quite different from the form in which you store it in your collection. The standard query operations do allow a "limitted" form of "projection", but even as the title on the page shared in that link suggests, this is really only about "limiting" the fields to display in results based on what is present in your document already.
So any form of "alteration" requires some form of aggregation, which with both the aggregate and mapReduce operations allow to "re-shape" the document results into a form that is different from the input. Perhaps also the main thing people miss with the aggregation framework in particular, is that it is not just all about "aggregating", and in fact the "re-shaping" concept is core to it's implementation.
So in order to get results how you want, you can take an approach like this, which should be suitable for most cases:
db.collection.aggregate([
{ "$unwind": "$students" },
{ "$unwind": "$studentDept" },
{ "$group": {
"_id": "$students.name",
"tfee": { "$first": "$students.fee" },
"tdept": {
"$min": {
"$cond": [
{ "$eq": [
"$students.name",
"$studentDept.name"
]},
"$studentDept.dept",
false
]
}
}
}},
{ "$match": { "tdept": { "$ne": false } } },
{ "$sort": { "_id": 1 } },
{ "$project": {
"_id": 0,
"name": "$_id",
"fee": "$tfee",
"dept": "$tdept"
}}
])
Or alternately just "filter out" the cases where the two "name" fields do not match and then just project the content with the fields you want, if crossing content between documents is not important to you:
db.collection.aggregate([
{ "$unwind": "$students" },
{ "$unwind": "$studentDept" },
{ "$project": {
"_id": 0,
"name": "$students.name",
"fee": "$students.fee",
"dept": "$studentDept.dept",
"same": { "$eq": [ "$students.name", "$studentDept.name" ] }
}},
{ "$match": { "same": true } },
{ "$project": {
"name": 1,
"fee": 1,
"dept": 1
}}
])
From MongoDB 2.6 and upwards you can even do the same thing "inline" to the document between the two arrays. You still want to reshape that array content in your final output though, but possible done a little faster:
db.collection.aggregate([
// Compares entries in each array within the document
{ "$project": {
"students": {
"$map": {
"input": "$students",
"as": "stu",
"in": {
"$setDifference": [
{ "$map": {
"input": "$studentDept",
"as": "dept",
"in": {
"$cond": [
{ "$eq": [ "$$stu.name", "$$dept.name" ] },
{
"name": "$$stu.name",
"fee": "$$stu.fee",
"dept": "$$dept.dept"
},
false
]
}
}},
[false]
]
}
}
}
}},
// Students is now an array of arrays. So unwind it twice
{ "$unwind": "$students" },
{ "$unwind": "$students" },
// Rename the fields and exclude
{ "$project": {
"_id": 0,
"name": "$students.name",
"fee": "$students.fee",
"dept": "$students.dept"
}},
])
So where you want to essentially "alter" the structure of the output then you need to use one of the aggregation tools to do. And you can, even if you are not really aggregating anything.