Mongoose populate single item in array - mongodb

I have a model that has an array of dynamic references.
var postSchema = new Schema({
name: String,
targets: [{
kind: String,
item: { type: ObjectId, refPath: 'targets.kind' }
}]
});
I am using the targets property to store references to multiple different models, users, thread, attachments, etc.
Is it possible to populate only the references that I want?
Post.find({}).populate({
// Does not work
// match: { 'targets.kind': 'Thread' }, // I want to populate only the references that match. ex: Thread, User, Attachment
path: 'targets.item',
model: 'targets.kind',
select: '_id title',
});
Thanks

The one big lesson here should be that mongoose.set('debug', true) is your new "best friend". This will show the actual queries issued to MongoDB from the code you are writing, and it's very important because when you actually "see it", then it clears up any misconceptions you likely have.
The Logic Problem
Let's demonstrate why exactly what you are attempting fails:
const { Schema } = mongoose = require('mongoose');
const uri = 'mongodb://localhost:27017/polypop';
mongoose.set('debug', true);
mongoose.Promise = global.Promise;
const postSchema = new Schema({
name: String,
targets: [{
kind: String,
item: { type: Schema.Types.ObjectId, refPath: 'targets.kind' }
}]
});
const fooSchema = new Schema({
name: String
})
const barSchema = new Schema({
number: Number
});
const Post = mongoose.model('Post', postSchema);
const Foo = mongoose.model('Foo', fooSchema);
const Bar = mongoose.model('Bar', barSchema);
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
try {
const conn = await mongoose.connect(uri, { useNewUrlParser: true });
// Clean all data
await Promise.all(
Object.entries(conn.models).map(([k,m]) => m.deleteMany())
);
// Create some things
let [foo, bar] = await Promise.all(
[{ _t: 'Foo', name: 'Bill' }, { _t: 'Bar', number: 1 }]
.map(({ _t, ...d }) => mongoose.model(_t).create(d))
);
log([foo, bar]);
// Add a Post
let post = await Post.create({
name: 'My Post',
targets: [{ kind: 'Foo', item: foo }, { kind: 'Bar', item: bar }]
});
log(post);
let found = await Post.findOne();
log(found);
let result = await Post.findOne()
.populate({
match: { 'targets.kind': 'Foo' }, // here is the problem!
path: 'targets.item',
});
log(result);
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})()
So the comment there show the match is the problem with the logic, so let's look at the debug output and see why:
Mongoose: posts.deleteMany({}, {})
Mongoose: foos.deleteMany({}, {})
Mongoose: bars.deleteMany({}, {})
Mongoose: foos.insertOne({ _id: ObjectId("5bdbc70996ed8e3295b384a0"), name: 'Bill', __v: 0 })
Mongoose: bars.insertOne({ _id: ObjectId("5bdbc70996ed8e3295b384a1"), number: 1, __v: 0 })
[
{
"_id": "5bdbc70996ed8e3295b384a0",
"name": "Bill",
"__v": 0
},
{
"_id": "5bdbc70996ed8e3295b384a1",
"number": 1,
"__v": 0
}
]
Mongoose: posts.insertOne({ _id: ObjectId("5bdbc70996ed8e3295b384a2"), name: 'My Post', targets: [ { _id: ObjectId("5bdbc70996ed8e3295b384a4"), kind: 'Foo', item: ObjectId("5bdbc70996ed8e3295b384a0") }, { _id: ObjectId("5bdbc70996ed8e3295b384a3"), kind: 'Bar', item: ObjectId("5bdbc70996ed8e3295b384a1") } ], __v: 0 })
{
"_id": "5bdbc70996ed8e3295b384a2",
"name": "My Post",
"targets": [
{
"_id": "5bdbc70996ed8e3295b384a4",
"kind": "Foo",
"item": {
"_id": "5bdbc70996ed8e3295b384a0",
"name": "Bill",
"__v": 0
}
},
{
"_id": "5bdbc70996ed8e3295b384a3",
"kind": "Bar",
"item": {
"_id": "5bdbc70996ed8e3295b384a1",
"number": 1,
"__v": 0
}
}
],
"__v": 0
}
Mongoose: posts.findOne({}, { projection: {} })
{
"_id": "5bdbc70996ed8e3295b384a2",
"name": "My Post",
"targets": [
{
"_id": "5bdbc70996ed8e3295b384a4",
"kind": "Foo",
"item": "5bdbc70996ed8e3295b384a0"
},
{
"_id": "5bdbc70996ed8e3295b384a3",
"kind": "Bar",
"item": "5bdbc70996ed8e3295b384a1"
}
],
"__v": 0
}
Mongoose: posts.findOne({}, { projection: {} })
Mongoose: bars.find({ 'targets.kind': 'Foo', _id: { '$in': [ ObjectId("5bdbc70996ed8e3295b384a1") ] } }, { projection: {} })
Mongoose: foos.find({ 'targets.kind': 'Foo', _id: { '$in': [ ObjectId("5bdbc70996ed8e3295b384a0") ] } }, { projection: {} })
{
"_id": "5bdbc70996ed8e3295b384a2",
"name": "My Post",
"targets": [
{
"_id": "5bdbc70996ed8e3295b384a4",
"kind": "Foo",
"item": null
},
{
"_id": "5bdbc70996ed8e3295b384a3",
"kind": "Bar",
"item": null
}
],
"__v": 0
}
That's the full output to show that everything else is actually working, and in fact without the match you would get the populated data back for the items. But take a close look at the two queries being issued to the foo and bar collections:
Mongoose: bars.find({ 'targets.kind': 'Foo', _id: { '$in': [ ObjectId("5bdbc70996ed8e3295b384a1") ] } }, { projection: {} })
Mongoose: foos.find({ 'targets.kind': 'Foo', _id: { '$in': [ ObjectId("5bdbc70996ed8e3295b384a0") ] } }, { projection: {} })
So the 'targets.kind' that you include under match is actually being searched for on the foo and bar collections, and not in the posts collection as you seem to be expecting. Along with the rest of the output this should be giving you an idea of how populate() actually works, in that nothing ever says to specifically just return the "array entries" which are of kind: 'Foo' as the example goes.
This process of "filtering the array" actually isn't "really" even a natural MongoDB query, and with the exception of the "first and singular match" you actually would typically use .aggregate() and the $filter operator. You can get "singular" via the positional $ operator but if you wanted "all foos" where there was more than one, then it needs the $filter instead.
So the real core issue here is populate() is actually the wrong place and wrong operation to "filter the array". Instead you really want to "smartly" return only the array entries you want before you go doing anything else to "populate" the items.
Structural Problem
Noting from the listing above which is an allegory for what is hinted at in the question, there are "multiple models" being referred to in order to "join" and obtain the overall result. Whilst this may seem logical in "RDBMS land", it's certainly not the case nor practical or efficient to do so with MongoDB and the general "ilk" of "document databases".
The key thing to remember here is that "documents" in a "collection" need not all have the same "table structure" as you would with an RDBMS. The structure can vary, and whilst it's probably advisable to not "vary wildly", it's certainly very valid to store "polymorphic objects" within a single collection. Afterall, you actually want to reference all of these things back to the same parent, so why would they need to be in different collections? Simply put, they don't need to be at all:
const { Schema } = mongoose = require('mongoose');
const uri = 'mongodb://localhost:27017/polypop';
mongoose.set('debug', true);
mongoose.Promise = global.Promise;
const postSchema = new Schema({
name: String,
targets: [{
kind: String,
item: { type: Schema.Types.ObjectId, ref: 'Target' }
}]
});
const targetSchema = new Schema({});
const fooSchema = new Schema({
name: String
});
const barSchema = new Schema({
number: Number
});
const bazSchema = new Schema({
title: String
});
const log = data => console.log(JSON.stringify(data, undefined, 2));
const Post = mongoose.model('Post', postSchema);
const Target = mongoose.model('Target', targetSchema);
const Foo = Target.discriminator('Foo', fooSchema);
const Bar = Target.discriminator('Bar', barSchema);
const Baz = Target.discriminator('Baz', bazSchema);
(async function() {
try {
const conn = await mongoose.connect(uri,{ useNewUrlParser: true });
// Clean data - bit hacky but just a demo
await Promise.all(
Object.entries(conn.models).map(([k, m]) => m.deleteMany() )
);
// Insert some things
let [foo1, bar, baz, foo2] = await Promise.all(
[
{ _t: 'Foo', name: 'Bill' },
{ _t: 'Bar', number: 1 },
{ _t: 'Baz', title: 'Title' },
{ _t: 'Foo', name: 'Ted' }
].map(({ _t, ...d }) => mongoose.model(_t).create(d))
);
log([foo1, bar, baz, foo2]);
// Add a Post
let post = await Post.create({
name: 'My Post',
targets: [
{ kind: 'Foo', item: foo1 },
{ kind: 'Bar', item: bar },
{ kind: 'Baz', item: baz },
{ kind: 'Foo', item: foo2 }
]
});
log(post);
let found = await Post.findOne();
log(found);
let result1 = await Post.findOne()
.populate({
path: 'targets.item',
match: { __t: 'Foo' }
});
log(result1);
let result2 = await Post.aggregate([
// Only get documents with a matching entry
{ "$match": {
"targets.kind": "Foo"
}},
// Optionally filter the array
{ "$addFields": {
"targets": {
"$filter": {
"input": "$targets",
"cond": {
"$eq": [ "$$this.kind", "Foo" ]
}
}
}
}},
// Lookup from single source
{ "$lookup": {
"from": Target.collection.name,
"localField": "targets.item",
"foreignField": "_id",
"as": "matches"
}},
// Marry up arrays
{ "$project": {
"name": 1,
"targets": {
"$map": {
"input": "$targets",
"in": {
"kind": "$$this.kind",
"item": {
"$arrayElemAt": [
"$matches",
{ "$indexOfArray": [ "$matches._id", "$$this.item" ] }
]
}
}
}
}
}}
]);
log(result2);
let result3 = await Post.aggregate([
// Only get documents with a matching entry
{ "$match": {
"targets.kind": "Foo"
}},
// Optionally filter the array
{ "$addFields": {
"targets": {
"$filter": {
"input": "$targets",
"cond": {
"$eq": [ "$$this.kind", "Foo" ]
}
}
}
}},
// Lookup from single source with overkill of type check
{ "$lookup": {
"from": Target.collection.name,
"let": { "targets": "$targets" },
"pipeline": [
{ "$match": {
"$expr": {
"$in": [ "$_id", "$$targets.item" ]
},
"__t": "Foo"
}}
],
"as": "matches"
}},
// Marry up arrays
{ "$project": {
"name": 1,
"targets": {
"$map": {
"input": "$targets",
"in": {
"kind": "$$this.kind",
"item": {
"$arrayElemAt": [
"$matches",
{ "$indexOfArray": [ "$matches._id", "$$this.item" ] }
]
}
}
}
}
}}
]);
console.log(result3);
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})()
That's a bit longer and has a few more concepts to get around, but the basic principle is that instead of using "multiple collections" for the different types we're only going to use one. The "mongoose" method for this uses "discriminators" in the model setup which is all relevant to this part of the code:
const Post = mongoose.model('Post', postSchema);
const Target = mongoose.model('Target', targetSchema);
const Foo = Target.discriminator('Foo', fooSchema);
const Bar = Target.discriminator('Bar', barSchema);
const Baz = Target.discriminator('Baz', bazSchema);
Which is really simply calling .discriminator() from a "base model" for the "singular" collection rather than calling mongoose.model(). The really good thing about this is as far as the rest of your code is concerned, Baz and Bar etc are all just treated like a "model" transparently, but they are actually doing something really cool underneath.
So all of these "related things" ( they really are even if you don't think so yet ) are all actually kept in the same collection, but operations using the individual models take into account an "automatic" kind key. This is __t by default, but you can actually specify whatever you want in options.
The fact that these are all actually in the same collection though is really important, since you can basically readily query the same collection for the different types of data. Put simply:
Foo.find({})
Would actually call
targets.find({ __t: 'Foo' })
And does this automatically. But more importantly
Target.find({ __t: { "$in": [ 'Foo', 'Baz' ] } })
Would be returning all the expected results from a "single collection" with a "single request".
So taking a look at the revised populate() under this structure:
let result1 = await Post.findOne()
.populate({
path: 'targets.item',
match: { __t: 'Foo' }
});
log(result1);
This shows instead in the logs:
Mongoose: posts.findOne({}, { projection: {} })
Mongoose: targets.find({ __t: 'Foo', _id: { '$in': [ ObjectId("5bdbe2895b1b843fba050569"), ObjectId("5bdbe2895b1b843fba05056a"), ObjectId("5bdbe2895b1b843fba05056b"), ObjectId("5bdbe2895b1b843fba05056c") ] } }, { projection: {} })
Note how even though all "four" of the related ObjectId values are sent with the request the additional constraint of __t: 'Foo' also binds which document are actually returned and married up. The result then becomes self evident as only the 'Foo' entries populated. But also note the "catch":
{
"_id": "5bdbe2895b1b843fba05056d",
"name": "My Post",
"targets": [
{
"_id": "5bdbe2895b1b843fba050571",
"kind": "Foo",
"item": {
"__t": "Foo",
"_id": "5bdbe2895b1b843fba050569",
"name": "Bill",
"__v": 0
}
},
{
"_id": "5bdbe2895b1b843fba050570",
"kind": "Bar",
"item": null
},
{
"_id": "5bdbe2895b1b843fba05056f",
"kind": "Baz",
"item": null
},
{
"_id": "5bdbe2895b1b843fba05056e",
"kind": "Foo",
"item": {
"__t": "Foo",
"_id": "5bdbe2895b1b843fba05056c",
"name": "Ted",
"__v": 0
}
}
],
"__v": 0
}
Filtering after Populate
This is actually a longer topic and more fully answered elsewhere, but the basics here as shown in the output above is that populate() really still does absolutely nothing about actually "filtering" the results in the array to only the desired matches.
The other thing is that populate() really isn't that great an idea from a "performance" perspective, since what is really happening is "another query" ( in our second form we optimized to just one more ) or possibly "many queries" depending on your structure are actually being issued to the database and the results are being reconstructed together on the client.
Overall, you end up returning a lot more data than you actually need and at best you are relying on manual client side filtering in order to discard those unwanted results. So the "ideal" position is to have the "server" do that sort of thing instead, and only return the data you actually need.
The populate() method was added as a "convenience" to the mongoose API a "very" long time ago. Since then MongoDB has moved on and now sports $lookup as a "native" way for performing a "join" on the server with a single request.
There are different ways to do this but just touching on "two" closely related to the existing populate() functionality but with improvements:
let result2 = await Post.aggregate([
// Only get documents with a matching entry
{ "$match": {
"targets.kind": "Foo"
}},
// Optionally filter the array
{ "$addFields": {
"targets": {
"$filter": {
"input": "$targets",
"cond": {
"$eq": [ "$$this.kind", "Foo" ]
}
}
}
}},
// Lookup from single source
{ "$lookup": {
"from": Target.collection.name,
"localField": "targets.item",
"foreignField": "_id",
"as": "matches"
}},
// Marry up arrays
{ "$project": {
"name": 1,
"targets": {
"$map": {
"input": "$targets",
"in": {
"kind": "$$this.kind",
"item": {
"$arrayElemAt": [
"$matches",
{ "$indexOfArray": [ "$matches._id", "$$this.item" ] }
]
}
}
}
}
}}
]);
log(result2);
The two basic "optimizations" there are using $filter in order to "pre-discard" items from the array which do not actually match the type we want. This can be totally optional as covered with a bit more detail later, but where possible then it's probably a good thing to do since we won't even be looking for matching _id values in the foreign collection for anything but 'Foo' things.
The other of course is the $lookup itself, which means instead of a separate round trip to the server we actually just make one and the "join" is done before any response is returned. Here we just look for the matching _id values in the foreign collection to the target.items array entry values. We already filtered those for 'Foo', so that is all that gets returned:
{
"_id": "5bdbe6aa2c4a2240c16802e2",
"name": "My Post",
"targets": [
{
"kind": "Foo",
"item": {
"_id": "5bdbe6aa2c4a2240c16802de",
"__t": "Foo",
"name": "Bill",
"__v": 0
}
},
{
"kind": "Foo",
"item": {
"_id": "5bdbe6aa2c4a2240c16802e1",
"__t": "Foo",
"name": "Ted",
"__v": 0
}
}
]
}
For a "slight" variation on that we can actually even inspect the __t value within the $lookup expression using "sub-pipeline" processing with MongoDB 3.6 and greater. The main use case here would be if you choose to remove the kind from the parent Post altogether and simply rely on the "kind" information inherent to discriminator references used in storage:
let result3 = await Post.aggregate([
// Only get documnents with a matching entry
{ "$match": {
"targets.kind": "Foo"
}},
// Optionally filter the array
{ "$addFields": {
"targets": {
"$filter": {
"input": "$targets",
"cond": {
"$eq": [ "$$this.kind", "Foo" ]
}
}
}
}},
// Lookup from single source with overkill of type check
{ "$lookup": {
"from": Target.collection.name,
"let": { "targets": "$targets" },
"pipeline": [
{ "$match": {
"$expr": {
"$in": [ "$_id", "$$targets.item" ]
},
"__t": "Foo"
}}
],
"as": "matches"
}},
// Marry up arrays
{ "$project": {
"name": 1,
"targets": {
"$map": {
"input": "$targets",
"in": {
"kind": "$$this.kind",
"item": {
"$arrayElemAt": [
"$matches",
{ "$indexOfArray": [ "$matches._id", "$$this.item" ] }
]
}
}
}
}
}}
]);
log(result3);
This has the same "filtered" results and is similarly an "single request" and "single response".
The whole topic gets a bit wider, and even though aggregation pipelines may appear considerably more unwieldy than a simple populate() call, it's fairly trivial to write a wrapper which can abstract from your models and pretty much generate most of the data structure code required. You can see an overview of this in action at "Querying after populate in Mongoose", which in essence is the same question you are basically asking here once we sort out the initial issue of "multiple collection joins" and why you really don't need them.
The over caveat here is that $lookup actually has no way possible to "dynamically" determine which collection to "join" to. You need to include that information statically just as is done here, so this is another reason to actually favor "discriminators" over using multiple collections. It's not only "better performance", but it's actually the only way the most performant options will actually support what you are trying to do.
For reference, the "complete" (truncated due to max post length) output of the second listing would be:
Mongoose: posts.deleteMany({}, {})
Mongoose: targets.deleteMany({}, {})
Mongoose: targets.deleteMany({}, {})
Mongoose: targets.deleteMany({}, {})
Mongoose: targets.deleteMany({}, {})
Mongoose: targets.insertOne({ _id: ObjectId("5bdbe2895b1b843fba050569"), __t: 'Foo', name: 'Bill', __v: 0 })
Mongoose: targets.insertOne({ _id: ObjectId("5bdbe2895b1b843fba05056a"), __t: 'Bar', number: 1, __v: 0 })
Mongoose: targets.insertOne({ _id: ObjectId("5bdbe2895b1b843fba05056b"), __t: 'Baz', title: 'Title', __v: 0 })
Mongoose: targets.insertOne({ _id: ObjectId("5bdbe2895b1b843fba05056c"), __t: 'Foo', name: 'Ted', __v: 0 })
[
{
"_id": "5bdbe2895b1b843fba050569",
"__t": "Foo",
"name": "Bill",
"__v": 0
},
{
"_id": "5bdbe2895b1b843fba05056a",
"__t": "Bar",
"number": 1,
"__v": 0
},
{
"_id": "5bdbe2895b1b843fba05056b",
"__t": "Baz",
"title": "Title",
"__v": 0
},
{
"_id": "5bdbe2895b1b843fba05056c",
"__t": "Foo",
"name": "Ted",
"__v": 0
}
]
Mongoose: posts.insertOne({ _id: ObjectId("5bdbe2895b1b843fba05056d"), name: 'My Post', targets: [ { _id: ObjectId("5bdbe2895b1b843fba050571"), kind: 'Foo', item: ObjectId("5bdbe2895b1b843fba050569") }, { _id: ObjectId("5bdbe2895b1b843fba050570"), kind: 'Bar', item: ObjectId("5bdbe2895b1b843fba05056a") }, { _id: ObjectId("5bdbe2895b1b843fba05056f"), kind: 'Baz', item: ObjectId("5bdbe2895b1b843fba05056b") }, { _id: ObjectId("5bdbe2895b1b843fba05056e"), kind: 'Foo', item: ObjectId("5bdbe2895b1b843fba05056c") } ], __v: 0 })
{
"_id": "5bdbe2895b1b843fba05056d",
"name": "My Post",
"targets": [
{
"_id": "5bdbe2895b1b843fba050571",
"kind": "Foo",
"item": {
"_id": "5bdbe2895b1b843fba050569",
"__t": "Foo",
"name": "Bill",
"__v": 0
}
},
{
"_id": "5bdbe2895b1b843fba050570",
"kind": "Bar",
"item": {
"_id": "5bdbe2895b1b843fba05056a",
"__t": "Bar",
"number": 1,
"__v": 0
}
},
{
"_id": "5bdbe2895b1b843fba05056f",
"kind": "Baz",
"item": {
"_id": "5bdbe2895b1b843fba05056b",
"__t": "Baz",
"title": "Title",
"__v": 0
}
},
{
"_id": "5bdbe2895b1b843fba05056e",
"kind": "Foo",
"item": {
"_id": "5bdbe2895b1b843fba05056c",
"__t": "Foo",
"name": "Ted",
"__v": 0
}
}
],
"__v": 0
}
Mongoose: posts.findOne({}, { projection: {} })
{
"_id": "5bdbe2895b1b843fba05056d",
"name": "My Post",
"targets": [
{
"_id": "5bdbe2895b1b843fba050571",
"kind": "Foo",
"item": "5bdbe2895b1b843fba050569"
},
{
"_id": "5bdbe2895b1b843fba050570",
"kind": "Bar",
"item": "5bdbe2895b1b843fba05056a"
},
{
"_id": "5bdbe2895b1b843fba05056f",
"kind": "Baz",
"item": "5bdbe2895b1b843fba05056b"
},
{
"_id": "5bdbe2895b1b843fba05056e",
"kind": "Foo",
"item": "5bdbe2895b1b843fba05056c"
}
],
"__v": 0
}
Mongoose: posts.findOne({}, { projection: {} })
Mongoose: targets.find({ __t: 'Foo', _id: { '$in': [ ObjectId("5bdbe2895b1b843fba050569"), ObjectId("5bdbe2895b1b843fba05056a"), ObjectId("5bdbe2895b1b843fba05056b"), ObjectId("5bdbe2895b1b843fba05056c") ] } }, { projection: {} })
{
"_id": "5bdbe2895b1b843fba05056d",
"name": "My Post",
"targets": [
{
"_id": "5bdbe2895b1b843fba050571",
"kind": "Foo",
"item": {
"__t": "Foo",
"_id": "5bdbe2895b1b843fba050569",
"name": "Bill",
"__v": 0
}
},
{
"_id": "5bdbe2895b1b843fba050570",
"kind": "Bar",
"item": null
},
{
"_id": "5bdbe2895b1b843fba05056f",
"kind": "Baz",
"item": null
},
{
"_id": "5bdbe2895b1b843fba05056e",
"kind": "Foo",
"item": {
"__t": "Foo",
"_id": "5bdbe2895b1b843fba05056c",
"name": "Ted",
"__v": 0
}
}
],
"__v": 0
}
Mongoose: posts.aggregate([ { '$match': { 'targets.kind': 'Foo' } }, { '$addFields': { targets: { '$filter': { input: '$targets', cond: { '$eq': [ '$$this.kind', 'Foo' ] } } } } }, { '$lookup': { from: 'targets', localField: 'targets.item', foreignField: '_id', as: 'matches' } }, { '$project': { name: 1, targets: { '$map': { input: '$targets', in: { kind: '$$this.kind', item: { '$arrayElemAt': [ '$matches', { '$indexOfArray': [ '$matches._id', '$$this.item' ] } ] } } } } } } ], {})
[
{
"_id": "5bdbe2895b1b843fba05056d",
"name": "My Post",
"targets": [
{
"kind": "Foo",
"item": {
"_id": "5bdbe2895b1b843fba050569",
"__t": "Foo",
"name": "Bill",
"__v": 0
}
},
{
"kind": "Foo",
"item": {
"_id": "5bdbe2895b1b843fba05056c",
"__t": "Foo",
"name": "Ted",
"__v": 0
}
}
]
}
]
Mongoose: posts.aggregate([ { '$match': { 'targets.kind': 'Foo' } }, { '$addFields': { targets: { '$filter': { input: '$targets', cond: { '$eq': [ '$$this.kind', 'Foo' ] } } } } }, { '$lookup': { from: 'targets', let: { targets: '$targets' }, pipeline: [ { '$match': { '$expr': { '$in': [ '$_id', '$$targets.item' ] }, __t: 'Foo' } } ], as: 'matches' } }, { '$project': { name: 1, targets: { '$map': { input: '$targets', in: { kind: '$$this.kind', item: { '$arrayElemAt': [ '$matches', { '$indexOfArray': [ '$matches._id', '$$this.item' ] } ] } } } } } } ], {})

Related

Unable to populate and structure document with aggregate

So I have this schema which have foreign keys to other collections in the database. The document has around 60k posts and each post can have multiple categories and there are around 200 categories. So I'm trying to fetch and structure data based on the category's foreign key and populate the category details and count.
Here's how the main schema and category schema looks like:
const postSchema = new mongoose.Schema( {
post: {
type: mongoose.Schema.Types.ObjectId,
ref: 'PostDetails'
},
categories: [ {
category: {
type: mongoose.Schema.Types.ObjectId,
ref: 'Category'
},
subCategories: [ {
subCategory: {
type: mongoose.Schema.Types.ObjectId,
ref: 'Subcategory'
}
} ]
} ]
} );
const categorySchema = new mongoose.Schema( {
category: {
type: String,
},
categorySlug: {
type: String,
}
} );
I was successful in making the count but the returned data is not what I expected. The returned data shows the id of the categories and the count but no name and slug. Here's how it looks like:
[
{
"_id": [
"617acfd232c766589c23a90c"
],
"count": 876,
"category": []
}
]
I got the above output with the following query:
const aggregateStages = [
{
$group: {
_id: '$categories.category',
count: { $sum: 1 }
}
},
{
$lookup: {
from: "Category",
localField: "categories.category",
foreignField: "_id",
as: "category"
}
}
];
const categories = await Post.aggregate( aggregateStages ).exec();
I'm hoping to get the data as follows:
[
{
"_id": "617acfd232c766589c23a90c",
"count": 876,
"category": 'SomeCategory',
"categorySlug": 'some-category'
}
]
Where am I going wrong and how can I fix it?
SAMPLE DATA FROM DB AS REQUESTED BY MATT OESTREICH
POST DATA
{
"_id": "617adad39054bae2c983c34f",
"post": "617ad1c80597c78ed4cc151e",
"author": "617acc689b309fdbbbdfdfe0",
"categories": [{
"category": "617acfd232c766589c23a8d1",
"subCategories":[]
}]
}
CATEGORY DATA
{
"_id": "617acfd232c766589c23a8d1",
"category": "Lorem Ipsum",
"categorySlug": "lorem-ipsum"
}
Ok so, it looks like you can resolve this by using the $size operator. The $size operator will give you the length (or count) of elements in an array.
Live demo here
Database
db={
"post": [
{
"_id": ObjectId("617adad39054bae2c983c34f"),
"post": ObjectId("617ad1c80597c78ed4cc151e"),
"author": ObjectId("617acc689b309fdbbbdfdfe0"),
"categories": [
{
"category": ObjectId("617acfd232c766589c23a8d1"),
"subCategories": []
}
]
}
],
"categories": [
{
"_id": ObjectId("617acfd232c766589c23a8d1"),
"category": "Lorem Ipsum",
"categorySlug": "lorem-ipsum"
}
]
}
Query
db.post.aggregate([
{
"$lookup": {
"from": "categories",
"localField": "categories.category",
"foreignField": "_id",
"as": "found_categories"
}
},
{
"$project": {
_id: "$_id",
count: {
"$size": "$found_categories"
},
"category": {
"$first": "$found_categories.category"
},
"categorySlug": {
"$first": "$found_categories.categorySlug"
}
}
}
])
Result
[
{
"_id": ObjectId("617adad39054bae2c983c34f"),
"category": "Lorem Ipsum",
"categorySlug": "lorem-ipsum",
"count": 1
}
]
Although, I do not believe this will give you what you are looking for if more than one category is found. Please let me know if it doesn't work and I will try to help fix it.

How to get subdocuments of multiple documents in mongoose

I have a Module called Organisation with an array calls users in it that contains UserSchema objects. Now i need a query to get all users from all organisation documents in one array.
As you can see I am a beginner in mongodb and normaly use sql
But without joins I donĀ“t know what to do.
OrganisationModule:
const OrganisationSchema = new Schema({
name: { type: String, required: true },
users: [UserSchema],
version: String,
});
module.exports.Organisation = mongoose.model('Organisation', OrganisationSchema);
UserSchema:
module.exports.UserSchema = new Schema({
name: String,
roles: [String]
})
My first try:
routes.get('/', (req, res, next) => {
Organisation.find().populate('users').exec((err, users) => {
if (err) res.json(err.message)
else { res.json(users) }
});
The result:
[
{
"users": [
{
"roles": [ "coordinator" ],
"_id": "5aafcf80dd248f7ef86e0512",
"name": "Peter"
"__v": 0
}
],
"_id": "5aafcf80dd248f7ef86e05cf",
"name": "DEFAULT",
"__v": 1
},
{
"users": [
{
"roles": [ "admin", "coordinator" ],
"_id": "5aafcf80dd248f7ef86e0500",
"name": "Max"
"__v": 0
}
],
"_id": "5aafcf80dd248f7ef86e05ce",
"name": "Organisation_01",
"__v": 1
}
]
What I need:
[
{
"roles": [ "coordinator" ],
"_id": "5aafcf80dd248f7ef86e0512",
"name": "Peter"
"__v": 0
},
{
"roles": [ "admin", "coordinator" ],
"_id": "5aafcf80dd248f7ef86e0500",
"name": "Max"
"__v": 0
}
]
This
Organization.find(
{},
{_id: 0, users: 1}
)
Will return
[
{
users: {
roles: ['coordinator'],
_id: '5aafcf80dd248f7ef86e0512',
name: 'Peter',
....
},
},
{
users: {
roles: ['admin', 'coordinator'],
_id: '5aafcf80dd248f7ef86e0500',
name: 'Max',
....
},
},
];
This is not precisely what you want, but it is what I have found that most closely matches your need.
You can find more information here :
https://stackoverflow.com/a/42558955/11120444
https://stackoverflow.com/a/9601614/11120444
Else, there is an other approach
// Meaby you will need to add await
const users = Organization.find({}).map((item: any) => item.users);
In mongodb you can use $lookup to perform that operation.
Please study $lookup here: https://docs.mongodb.com/manual/reference/operator/aggregation/lookup/
In mongoose you can use populate()
For example:
Organization.find().populate('users')
Study Mongoose's Populate here: http://mongoosejs.com/docs/populate.html

mongoose populate pass found object reference key

I have a mongoose Group schema which contains invitee (array of sub document) and currentMove, invitee also contains currentMove and I want to get document with only sub document that have same currentMove.
Group.findById("5a03fa29fafa645c8a399353")
.populate({
path: 'invitee.user_id',
select: 'currentMove',
model:"User",
match: {
"currentMove":{
$eq: "$currentMove"
}
}
})
This generates unknown currentMove Object id for match query. I'm not sure if mongoose has this functionality. Can anyone help me, please?
In modern MongoDB releases it is far more efficient to use $lookup here instead of .populate(). Also the basic concept that you want to filter based on a comparison of fields is something that MongoDB does quite well with native operators, but it's not something you can easily transpose into .populate().
In fact the only way possible to actually use with .populate() would be to first retrieve all results, and then use Model.populate() with a $where clause on query all whilst processing the result array with Array.map() in order to apply the local value of each document to the conditions to "join" on.
It's all kind of messy, and involves pulling all results from the server and filtering locally. So $lookup is our best option here, where all of the "filtering" and "matching" actually takes place on the server without needing to pull unnecessary documents over the network just to obtain a result.
Sample Schema
You don't actually include a "schema" in your question, so we can only work with an approximation based on what parts you actually do include in the question. So my example here uses:
const userSchema = new Schema({
name: String,
currentMove: Number
})
const groupSchema = new Schema({
name: String,
topic: String,
currentMove: Number,
invitee: [{
user_id: { type: Schema.Types.ObjectId, ref: 'User' },
confirmed: { type: Boolean, default: false }
}]
});
Unwinding $lookup and $group
From here we have different approaches to the $lookup queries. The first basically involves applying $unwind both before and after the $lookup stage. This is partly since your "reference" is an embedded field within the array, and also partly because it's actually the most efficient query form to use here with a possible "join" result that could potentially exceed the BSON limit ( 16MB for the document ) being avoided:
Group.aggregate([
{ "$unwind": "$invitee" },
{ "$lookup": {
"from": User.collection.name,
"localField": "invitee.user_id",
"foreignField": "_id",
"as": "invitee.user_id"
}},
{ "$unwind": "$invitee.user_id" },
{ "$redact": {
"$cond": {
"if": { "$eq": ["$currentMove", "$invitee.user_id.currentMove"] },
"then": "$$KEEP",
"else": "$$PRUNE"
}
}},
{ "$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"topic": { "$first": "$topic" },
"currentMove": { "$first": "$currentMove" },
"invitee": { "$push": "$invitee" }
}}
]);
The key expression here is the $redact which is processed after the $lookup result is returned. This allows a logical comparison of the "currentMove" values from both the parent document and the "joined" detail for the User objects.
Since we $unwind the array content, we use $group with $push to reconstruct the array ( if you must ) and select the other fields of the original document using $first.
There are ways to examine the schema and generate such a stage, but that's not really in the scope of the question. An example can be seen on Querying after populate in Mongoose. Point being that if you want the fields returned, then you would construct this pipeline stage around using those expressions to return a document of the original shape.
Filter $lookup result
An alternate approach where you are certain that the "unfiltered" result of the "join" will not cause the document to exceed the BSON limit is to instead make a separate target array, and then reconstruct your "joined" array content using $map and $filter, and other array operators:
Group.aggregate([
{ "$lookup": {
"from": User.collection.name,
"localField": "invitee.user_id",
"foreignField": "_id",
"as": "inviteeT"
}},
{ "$addFields": {
"invitee": {
"$map": {
"input": {
"$filter": {
"input": "$inviteeT",
"as": "i",
"cond": { "$eq": ["$$i.currentMove","$currentMove"] }
}
},
"as": "i",
"in": {
"_id": {
"$arrayElemAt": [
"$invitee._id",
{ "$indexOfArray": ["$invitee.user_id", "$$i._id"] }
]
},
"user_id": "$$i",
"confirmed": {
"$arrayElemAt": [
"$invitee.confirmed",
{ "$indexOfArray": ["$invitee.user_id","$$i._id"] }
]
}
}
}
}
}},
{ "$project": { "inviteeT": 0 } },
{ "$match": { "invitee.0": { "$exists": true } } }
]);
Instead of the $redact which would be filtering "documents", we use $filter here with the expression to only return those members of the target array "inviteeT" which share the same "currentMove". Since this is just the "foreign" content, we "join" with the original array using $map and transposing the elements.
To do that "transposition" of values from the original array, we use the $arrayElemAt and $indexOfArray expressions. The $indexOfArray allows us to match up the target's "_id" values with the "user_id" values in the original array and get it's "index" position. We always know this returns a real match because the $lookup did that part for us.
The "index" value is then supplied to $arrayElemAt which similarly applies a "mapping" of the values as an array like "$invitee.confirmed" and returns the value matched at the same index. This is basically a "lookup" between the arrays.
Differing from the first pipeline example, we now still have the "inviteeT" array as well as our re-written "invitee" array courtesy of $addFields. So one way to get rid of that is to add an additional $project and exclude the unwanted "temporary" array. And of course since we did not $unwind and "filter", there are still possible results with no matching array entries at all. So the $match expression uses $exists to test for the 0 index being present in the array result, which means there is "at least one" result, and discards any documents with empty arrays.
MongoDB 3.6 "sub-query"
MongoDB 3.6 makes this a bit cleaner as a new syntax for $lookup allows a more expressive "pipeline" to be given in argument to select the results returned, rather than the simplistic "localField" and "foreignField" matching.
Group.aggregate([
{ "$lookup": {
"from": User.collection.name,
"let": {
"ids": "$invitee._id",
"users": "$invitee.user_id",
"confirmed": "$invitee.confirmed",
"currentMove": "$currentMove"
},
"pipeline": [
{ "$match": {
"$expr": {
"$and": [
{ "$in": ["$_id", "$$users"] },
{ "$eq": ["$currentMove", "$$currentMove"] }
]
}
}},
{ "$project": {
"_id": {
"$arrayElemAt": [
"$$ids",
{ "$indexOfArray": ["$$users", "$_id"] }
]
},
"user_id": "$$ROOT",
"confirmed": {
"$arrayElemAt": [
"$$confirmed",
{ "$indexOfArray": ["$$users", "$_id"] }
]
}
}}
],
"as": "invitee"
}},
{ "$match": { "invitee.0": { "$exists": true } } }
])
So there are some slightly "glitchy" things in there with the usage of mapping arrays of specific values for input due to how these are currently passed into the sub-pipeline via the "let" declaration. This should probably work cleaner, but on the current release candidate this is how it's actually required to be expressed in order to work.
With this new syntax the "let" allows us to declare "variables" from the current document which can then be referenced in the "pipeline" expression which will be executed in order to determine which results to return to the target array.
The $expr here essentially replaces the $redact or $filter conditions used before, as well as combining the "local" to "foreign" key matching which also requires us to declare such a variable. Here we mapped the "$invitee.user_id" values from the source document into a variable which we refer to as "$$users" in the rest of the expressions.
The $in operator here is a variant for the aggregation framework which returns a boolean condition where the first argument "value" is found in the second argument "array". So this is the "foreign key" filter part.
Since this is a "pipeline", we can add a $project stage in addition to the $match which selected the items from the foreign collection. So again we use a similar "transposition" technique to what was described before. This then gives us control of the "shape" of the documents returned in the array, so we don't manipulate the returned array "after" the $lookup like we did previously.
The same case applies though, since no matter what you do here the "sub-pipeline" can of course return no results when the filter conditions do not match. So again the same $exists test is used to discard those documents.
So it's all pretty cool, and once you get used to the power available in the server side "join" functionality of $lookup you likely will never look back. Whilst the syntax is a lot more terse than the "convenience" function that .populate() was introduced for, the reduced traffic load, far more advanced uses and general expressiveness basically make up for that.
As a complete example, I'm also including a self contained listing that demonstrates all of these. And if you run it with a MongoDB 3.6 compatible server attached, then you will even get that demonstration as well.
Needs a recent Node.js v8.x release to run with async/await ( or enable in other supported ), but since that's now the LTS release you really should be running that anyway. At least install one to test :)
const mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.Promise = global.Promise;
mongoose.set('debug',true);
const uri = 'mongodb://localhost/rollgroup',
options = { useMongoClient: true };
const userSchema = new Schema({
name: String,
currentMove: Number
})
const groupSchema = new Schema({
name: String,
topic: String,
currentMove: Number,
invitee: [{
user_id: { type: Schema.Types.ObjectId, ref: 'User' },
confirmed: { type: Boolean, default: false }
}]
});
const User = mongoose.model('User', userSchema);
const Group = mongoose.model('Group', groupSchema);
function log(data) {
console.log(JSON.stringify(data, undefined, 2))
}
(async function() {
try {
const conn = await mongoose.connect(uri,options);
let { version } = await conn.db.admin().command({'buildInfo': 1});
// Clean data
await Promise.all(
Object.entries(conn.models).map(([k,m]) => m.remove() )
);
// Add some users
let users = await User.insertMany([
{ name: 'Bill', currentMove: 1 },
{ name: 'Ted', currentMove: 2 },
{ name: 'Fred', currentMove: 3 },
{ name: 'Sally', currentMove: 4 },
{ name: 'Harry', currentMove: 5 }
]);
await Group.create({
name: 'Group1',
topic: 'This stuff',
currentMove: 3,
invitee: users.map( u =>
({ user_id: u._id, confirmed: (u.currentMove === 3) })
)
});
await (async function() {
console.log('Unwinding example');
let result = await Group.aggregate([
{ "$unwind": "$invitee" },
{ "$lookup": {
"from": User.collection.name,
"localField": "invitee.user_id",
"foreignField": "_id",
"as": "invitee.user_id"
}},
{ "$unwind": "$invitee.user_id" },
{ "$redact": {
"$cond": {
"if": { "$eq": ["$currentMove", "$invitee.user_id.currentMove"] },
"then": "$$KEEP",
"else": "$$PRUNE"
}
}},
{ "$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"topic": { "$first": "$topic" },
"currentMove": { "$first": "$currentMove" },
"invitee": { "$push": "$invitee" }
}}
]);
log(result);
})();
await (async function() {
console.log('Using $filter example');
let result = await Group.aggregate([
{ "$lookup": {
"from": User.collection.name,
"localField": "invitee.user_id",
"foreignField": "_id",
"as": "inviteeT"
}},
{ "$addFields": {
"invitee": {
"$map": {
"input": {
"$filter": {
"input": "$inviteeT",
"as": "i",
"cond": { "$eq": ["$$i.currentMove","$currentMove"] }
}
},
"as": "i",
"in": {
"_id": {
"$arrayElemAt": [
"$invitee._id",
{ "$indexOfArray": ["$invitee.user_id", "$$i._id"] }
]
},
"user_id": "$$i",
"confirmed": {
"$arrayElemAt": [
"$invitee.confirmed",
{ "$indexOfArray": ["$invitee.user_id","$$i._id"] }
]
}
}
}
}
}},
{ "$project": { "inviteeT": 0 } },
{ "$match": { "invitee.0": { "$exists": true } } }
]);
log(result);
})();
await (async function() {
if (parseFloat(version.match(/\d\.\d/)[0]) >= 3.6) {
console.log('New $lookup example. Yay!');
let result = await Group.collection.aggregate([
{ "$lookup": {
"from": User.collection.name,
"let": {
"ids": "$invitee._id",
"users": "$invitee.user_id",
"confirmed": "$invitee.confirmed",
"currentMove": "$currentMove"
},
"pipeline": [
{ "$match": {
"$expr": {
"$and": [
{ "$in": ["$_id", "$$users"] },
{ "$eq": ["$currentMove", "$$currentMove"] }
]
}
}},
{ "$project": {
"_id": {
"$arrayElemAt": [
"$$ids",
{ "$indexOfArray": ["$$users", "$_id"] }
]
},
"user_id": "$$ROOT",
"confirmed": {
"$arrayElemAt": [
"$$confirmed",
{ "$indexOfArray": ["$$users", "$_id"] }
]
}
}}
],
"as": "invitee"
}},
{ "$match": { "invitee.0": { "$exists": true } } }
]).toArray();
log(result);
}
})();
await (async function() {
console.log("Horrible populate example :(");
let results = await Group.find();
results = await Promise.all(
results.map( r =>
User.populate(r,{
path: 'invitee.user_id',
match: { "$where": `this.currentMove === ${r.currentMove}` }
})
)
);
console.log("All members still there");
log(results);
// Then we clean it for null values
results = results.map( r =>
Object.assign(r,{
invitee: r.invitee.filter(i => i.user_id !== null)
})
);
console.log("Now they are filtered");
log(results);
})();
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})()
Gives the output for each example as:
Mongoose: users.remove({}, {})
Mongoose: groups.remove({}, {})
Mongoose: users.insertMany([ { __v: 0, name: 'Bill', currentMove: 1, _id: 5a0afda01643cf41789e500a }, { __v: 0, name: 'Ted', currentMove: 2, _id: 5a0afda01643cf41789e500b }, { __v: 0, name: 'Fred', currentMove: 3, _id: 5a0afda01643cf41789e500c }, { __v: 0, name: 'Sally', currentMove: 4, _id: 5a0afda01643cf41789e500d }, { __v: 0, name: 'Harry', currentMove: 5, _id: 5a0afda01643cf41789e500e } ], {})
Mongoose: groups.insert({ name: 'Group1', topic: 'This stuff', currentMove: 3, _id: ObjectId("5a0afda01643cf41789e500f"), invitee: [ { user_id: ObjectId("5a0afda01643cf41789e500a"), _id: ObjectId("5a0afda01643cf41789e5014"), confirmed: false }, { user_id: ObjectId("5a0afda01643cf41789e500b"), _id: ObjectId("5a0afda01643cf41789e5013"), confirmed: false }, { user_id: ObjectId("5a0afda01643cf41789e500c"), _id: ObjectId("5a0afda01643cf41789e5012"), confirmed: true }, { user_id: ObjectId("5a0afda01643cf41789e500d"), _id: ObjectId("5a0afda01643cf41789e5011"), confirmed: false }, { user_id: ObjectId("5a0afda01643cf41789e500e"), _id: ObjectId("5a0afda01643cf41789e5010"), confirmed: false } ], __v: 0 })
Unwinding example
Mongoose: groups.aggregate([ { '$unwind': '$invitee' }, { '$lookup': { from: 'users', localField: 'invitee.user_id', foreignField: '_id', as: 'invitee.user_id' } }, { '$unwind': '$invitee.user_id' }, { '$redact': { '$cond': { if: { '$eq': [ '$currentMove', '$invitee.user_id.currentMove' ] }, then: '$$KEEP', else: '$$PRUNE' } } }, { '$group': { _id: '$_id', name: { '$first': '$name' }, topic: { '$first': '$topic' }, currentMove: { '$first': '$currentMove' }, invitee: { '$push': '$invitee' } } } ], {})
[
{
"_id": "5a0afda01643cf41789e500f",
"name": "Group1",
"topic": "This stuff",
"currentMove": 3,
"invitee": [
{
"user_id": {
"_id": "5a0afda01643cf41789e500c",
"__v": 0,
"name": "Fred",
"currentMove": 3
},
"_id": "5a0afda01643cf41789e5012",
"confirmed": true
}
]
}
]
Using $filter example
Mongoose: groups.aggregate([ { '$lookup': { from: 'users', localField: 'invitee.user_id', foreignField: '_id', as: 'inviteeT' } }, { '$addFields': { invitee: { '$map': { input: { '$filter': { input: '$inviteeT', as: 'i', cond: { '$eq': [ '$$i.currentMove', '$currentMove' ] } } }, as: 'i', in: { _id: { '$arrayElemAt': [ '$invitee._id', { '$indexOfArray': [ '$invitee.user_id', '$$i._id' ] } ] }, user_id: '$$i', confirmed: { '$arrayElemAt': [ '$invitee.confirmed', { '$indexOfArray': [ '$invitee.user_id', '$$i._id' ] } ] } } } } } }, { '$project': { inviteeT: 0 } }, { '$match': { 'invitee.0': { '$exists': true } } } ], {})
[
{
"_id": "5a0afda01643cf41789e500f",
"name": "Group1",
"topic": "This stuff",
"currentMove": 3,
"invitee": [
{
"_id": "5a0afda01643cf41789e5012",
"user_id": {
"_id": "5a0afda01643cf41789e500c",
"__v": 0,
"name": "Fred",
"currentMove": 3
},
"confirmed": true
}
],
"__v": 0
}
]
New $lookup example. Yay!
Mongoose: groups.aggregate([ { '$lookup': { from: 'users', let: { ids: '$invitee._id', users: '$invitee.user_id', confirmed: '$invitee.confirmed', currentMove: '$currentMove' }, pipeline: [ { '$match': { '$expr': { '$and': [ { '$in': [ '$_id', '$$users' ] }, { '$eq': [ '$currentMove', '$$currentMove' ] } ] } } }, { '$project': { _id: { '$arrayElemAt': [ '$$ids', { '$indexOfArray': [ '$$users', '$_id' ] } ] }, user_id: '$$ROOT', confirmed: { '$arrayElemAt': [ '$$confirmed', { '$indexOfArray': [ '$$users', '$_id' ] } ] } } } ], as: 'invitee' } }, { '$match': { 'invitee.0': { '$exists': true } } } ])
[
{
"_id": "5a0afda01643cf41789e500f",
"name": "Group1",
"topic": "This stuff",
"currentMove": 3,
"invitee": [
{
"_id": "5a0afda01643cf41789e5012",
"user_id": {
"_id": "5a0afda01643cf41789e500c",
"__v": 0,
"name": "Fred",
"currentMove": 3
},
"confirmed": true
}
],
"__v": 0
}
]
Horrible populate example :(
Mongoose: groups.find({}, { fields: {} })
Mongoose: users.find({ _id: { '$in': [ ObjectId("5a0afda01643cf41789e500a"), ObjectId("5a0afda01643cf41789e500b"), ObjectId("5a0afda01643cf41789e500c"), ObjectId("5a0afda01643cf41789e500d"), ObjectId("5a0afda01643cf41789e500e") ] }, '$where': 'this.currentMove === 3' }, { fields: {} })
All members still there
[
{
"_id": "5a0afda01643cf41789e500f",
"name": "Group1",
"topic": "This stuff",
"currentMove": 3,
"__v": 0,
"invitee": [
{
"user_id": null,
"_id": "5a0afda01643cf41789e5014",
"confirmed": false
},
{
"user_id": null,
"_id": "5a0afda01643cf41789e5013",
"confirmed": false
},
{
"user_id": {
"_id": "5a0afda01643cf41789e500c",
"__v": 0,
"name": "Fred",
"currentMove": 3
},
"_id": "5a0afda01643cf41789e5012",
"confirmed": true
},
{
"user_id": null,
"_id": "5a0afda01643cf41789e5011",
"confirmed": false
},
{
"user_id": null,
"_id": "5a0afda01643cf41789e5010",
"confirmed": false
}
]
}
]
Now they are filtered
[
{
"_id": "5a0afda01643cf41789e500f",
"name": "Group1",
"topic": "This stuff",
"currentMove": 3,
"__v": 0,
"invitee": [
{
"user_id": {
"_id": "5a0afda01643cf41789e500c",
"__v": 0,
"name": "Fred",
"currentMove": 3
},
"_id": "5a0afda01643cf41789e5012",
"confirmed": true
}
]
}
]
Using populate()
So using .populate() here is actually pretty horrible. Sure it looks like less, but it's actually doing a lot of things that simply are not needed, and all because the "join" does not happen on the server:
// Note that we cannot populate "here" since we need the returned value
let results = await Group.find();
// The value is only in context as we use `Array.map()` to process each result
results = await Promise.all(
results.map( r =>
User.populate(r,{
path: 'invitee.user_id',
match: { "$where": `this.currentMove === ${r.currentMove}` }
})
)
);
console.log("All members still there");
log(results);
// Then we clean it for null values
results = results.map( r =>
Object.assign(r,{
invitee: r.invitee.filter(i => i.user_id !== null)
})
);
console.log("Now they are filtered");
log(results);
So I also included that in the output above, as well as the whole code listing.
The problem becomes evident as you cannot "chain" the populate directly to the first query. You actually need to return the documents ( potentially ALL of them ) in order to use the current document value in a subsequent populate. And this MUST be processed for each document returned.
Not only that but populate() is NOT going to "filter" the array to only those which match, even with the query condition. All it does is set's the unmatched elements to null:
[
{
"_id": "5a0afa889f9f7e4064d8794d",
"name": "Group1",
"topic": "This stuff",
"currentMove": 3,
"__v": 0,
"invitee": [
{
"user_id": null,
"_id": "5a0afa889f9f7e4064d87952",
"confirmed": false
},
{
"user_id": null,
"_id": "5a0afa889f9f7e4064d87951",
"confirmed": false
},
{
"user_id": {
"_id": "5a0afa889f9f7e4064d8794a",
"__v": 0,
"name": "Fred",
"currentMove": 3
},
"_id": "5a0afa889f9f7e4064d87950",
"confirmed": true
},
{
"user_id": null,
"_id": "5a0afa889f9f7e4064d8794f",
"confirmed": false
},
{
"user_id": null,
"_id": "5a0afa889f9f7e4064d8794e",
"confirmed": false
}
]
}
]
This then needs an Array.filter() to be processed again for "each" document returned, which can finally remove the unwanted array items and give you the same result the other aggregation queries are doing.
So it's "really wasteful" and just not a good way to do things. Little point in having a database, when you're actually doing the majority of processing on the server. In fact, we may have well simply returned the populated result and then run an Array.filter() in order to remove the unwanted entries.
This is just not how you write fast and effective code. So the example here is sometimes "what looks simple" is actually doing a lot more damage than good.

Get length of array within array mongodb

I'm trying to count the number of votes per question for the following schema.
[
{
"_id": "564b9e13583087872176dbd2",
"question": "fav NFL team",
"choices": [
{
"text": "St. Louis Rams",
"_id": "564b9e13583087872176dbd7",
"votes": [
{
"ip": "::ffff:192.168.15.130",
"_id": "564b9e30583087872176dbd8"
},
{
"ip": "::ffff:192.168.1.1",
"_id": "564bb355e4e1b7200da92668"
}
]
},
{
"text": "Oakland Raiders",
"_id": "564b9e13583087872176dbd6",
"votes": [
{
"ip": "::ffff:192.168.1.135",
"_id": "564bb273e4e1b7200da92667"
}
]
},
{
"text": "Denver Broncos",
"_id": "564b9e13583087872176dbd5",
"votes": []
},
{
"text": "Kansas City Chiefs",
"_id": "564b9e13583087872176dbd4",
"votes": [
{
"ip": "::ffff:192.168.1.100",
"_id": "564bab48e4e1b7200da92666"
}
]
},
{
"text": "Detroit Lions",
"_id": "564b9e13583087872176dbd3",
"votes": [
{
"ip": "::ffff:192.168.15.1",
"_id": "564b9f41583087872176dbd9"
}
]
}
]
}
]
I'm assuming I am going to have to use aggregate and sum.
I was able to get the count for the choices array, but I'm not sure how to go deeper.
db.polls.aggregate([{$unwind: '$choices'}, {$group:{_id:'$_id', 'sum':{$sum:1}}}])
The vote count for "fav NFL team" would be 5.
Also, for reference here is my mongoose code that generated the schema
var mongoose = require('mongoose');
var voteSchema = new mongoose.Schema({
ip: 'String'
});
var choiceSchema = new mongoose.Schema({
text: String,
votes: [voteSchema]
});
exports.PollSchema = new mongoose.Schema({
question: {
type: String,
required: true
},
choices: [choiceSchema]
});
I figured out how to do it in mango, I needed another unwind.
db.polls.aggregate([
{$unwind: '$choices'},
{$unwind:'$choices.votes'},
{$group:{
_id:'$_id',
'sum':{
$sum:1
}
}}
])
And here it is in mongoose
Poll.aggregate([
{$unwind: '$choices'},
{$unwind: '$choices.votes'},
{$group:{
_id: '$_id',
'sum': {
$sum:1
}
}}
], function(err, result) {
if (err) {
console.log(err);
}
res.json(result);
});

Filter subdocument by datetime

I've the following model
var messageSchema = new Schema({
creationDate: { type: Date, default: Date.now },
comment: { type: String },
author: { type: Schema.Types.ObjectId }
});
var conversationSchema = new Schema({
title: { type: String },
author: { type : Schema.Types.ObjectId },
members: [ { type: Schema.Types.ObjectId } ],
creationDate: { type: Date, default: Date.now },
lastUpdate: { type: Date, default: Date.now },
comments: [ messageSchema ]
});
I want to create two methods to get the comments generated after a date by user or by conversationId.
By User
I tried with the following method
var query = {
members : { $all : [ userId, otherUserId ], "$size" : 2 }
, comments : { $elemMatch : { creationDate : { $gte: from } } }
};
When there are no comments after the specified date (at from) the method returns [] or null
By conversationId
The same happen when I try to get by user id
var query = { _id : conversationId
, comments : { $elemMatch : { creationDate : { $gte: from } } }
};
Is there any way to make the method returns the conversation information with an empty comments?
Thank you!
Sounds like a couple of problems here, but stepping through them all
In order to get more than a single match "or" none from an array to need the aggregation framework of mapReduce to do this. You could try "projecting" with $elemMatch but this can only return the "first" match. i.e:
{ "a": [1,2,3] }
db.collection.find({ },{ "$elemMatch": { "$gte": 2 } })
{ "a": [2] }
So standard projection does not work for this. It can return an "empty" array but it an also only return the "first" that is matched.
Moving along, you also have this in your code:
{ $all : [ userId, otherUserId ], "$site" : 2 }
Where $site is not a valid operator. I think you mean $size but there are actuall "two" operators with that name and your intent may not be clear here.
If you mean that the array you are testing must have "only two" elements, then this is the operator for you. If you meant that the matched conversation between the two people had to be equal to both in the match, then $all does this anyway so the $size becomes redundant in either case unless you don't want anyone else in the conversation.
On to the aggregation problem. You need to "filter" the content of the array in a "non-destructive way" in order to get more than one match or an empty array.
The best approach for this is with modern MongoDB features available from 2.6, which allows the array content to be filtered without processing $unwind:
Model.aggregate(
[
{ "$match": {
"members": { "$all": [userId,otherUserId] }
}},
{ "$project": {
"title": 1,
"author": 1,
"members": 1,
"creationDate": 1,
"lastUpdate": 1,
"comments": {
"$setDifference": [
{ "$map": {
"input": "$comments",
"as": "c",
"in": { "$cond": [
{ "$gte": [ "$$c.creationDate", from ] },
"$$c",
false
]}
}},
[false]
]
}
}}
],
function(err,result) {
}
);
That uses $map which can process an expression against each array element. In this case the vallues are tested under the $cond ternary to either return the array element where the condition is true or otherwise return false as the element.
These are then "filtered" by the $setDifference operator which essentially compares the resulting array of $map to the other array [false]. This removes any false values from the result array and only leaves matched elements or no elements at all.
An alternate may have been $redact but since your document contains "creationDate" at multiple levels, then this messes with the logic used with it's $$DESCEND operator. This rules that action out.
In earlier versions "not destroying" the array needs to be treated with care. So you need to do much the same "filter" of results in order to get the "empty" array you want:
Model.aggregate(
[
{ "$match": {
"$and": [
{ "members": userId },
{ "members": otherUserId }
}},
{ "$unwind": "$comments" },
{ "$group": {
"_id": "$_id",
"title": { "$first": "$title" },
"author": { "$first": "$author" },
"members": { "$first": "$members" },
"creationDate": { "$first": "$creationDate" },
"lastUpdate": { "$first": "$lastUpdate" },
"comments": {
"$addToSet": {
"$cond": [
{ "$gte": [ "$comments.creationDate", from ] },
"$comments",
false
]
}
},
"matchedSize": {
"$sum": {
"$cond": [
{ "$gte": [ "$comments.creationDate", from ] },
1,
0
]
}
}
}},
{ "$unwind": "$comments" },
{ "$match": {
"$or": [
{ "comments": { "$ne": false } },
{ "matchedSize": 0 }
]
}},
{ "$group": {
"_id": "$_id",
"title": { "$first": "$title" },
"author": { "$first": "$author" },
"members": { "$first": "$members" },
"creationDate": { "$first": "$creationDate" },
"lastUpdate": { "$first": "$lastUpdate" },
"comments": { "$push": "$comments" }
}},
{ "$project": {
"title": 1,
"author": 1,
"members": 1,
"creationDate": 1,
"lastUpdate": 1,
"comments": {
"$cond": [
{ "$eq": [ "$comments", [false] ] },
{ "$const": [] },
"$comments"
]
}
}}
],
function(err,result) {
}
)
This does much of the same things, but longer. In order to look at the array content you need to $unwind the content. When you $group back, you look at each element to see if it matches the condition to decide what to return, also keeping a count of the matches.
This is going to put some ( one with $addToSet ) false results in the array or only an array with the entry false where there are no matches. So yo filter these out with $match but also testing on the matched "count" to see if no matches were found. If no match was found then you don't throw away that item.
Instead you replace the [false] arrays with empty arrays in a final $project.
So depending on your MongoDB version this is either "fast/easy" or "slow/hard" to process. Compelling reasons to update a version already many years old.
Working example
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/aggtest');
var memberSchema = new Schema({
name: { type: String }
});
var messageSchema = new Schema({
creationDate: { type: Date, default: Date.now },
comment: { type: String },
});
var conversationSchema = new Schema({
members: [ { type: Schema.Types.ObjectId } ],
comments: [messageSchema]
});
var Member = mongoose.model( 'Member', memberSchema );
var Conversation = mongoose.model( 'Conversation', conversationSchema );
async.waterfall(
[
// Clean
function(callback) {
async.each([Member,Conversation],function(model,callback) {
model.remove({},callback);
},
function(err) {
callback(err);
});
},
// add some people
function(callback) {
async.map(["bill","ted","fred"],function(name,callback) {
Member.create({ "name": name },callback);
},callback);
},
// Create a conversation
function(names,callback) {
var conv = new Conversation();
names.forEach(function(el) {
conv.members.push(el._id);
});
conv.save(function(err,conv) {
callback(err,conv,names)
});
},
// add some comments
function(conv,names,callback) {
async.eachSeries(names,function(name,callback) {
Conversation.update(
{ "_id": conv._id },
{ "$push": { "comments": { "comment": name.name } } },
callback
);
},function(err) {
callback(err,names);
});
},
function(names,callback) {
Conversation.findOne({},function(err,conv) {
callback(err,names,conv.comments[1].creationDate);
});
},
function(names,from,callback) {
var ids = names.map(function(el) {
return el._id
});
var pipeline = [
{ "$match": {
"$and": [
{ "members": ids[0] },
{ "members": ids[1] }
]
}},
{ "$project": {
"members": 1,
"comments": {
"$setDifference": [
{ "$map": {
"input": "$comments",
"as": "c",
"in": { "$cond": [
{ "$gte": [ "$$c.creationDate", from ] },
"$$c",
false
]}
}},
[false]
]
}
}}
];
//console.log(JSON.stringify(pipeline, undefined, 2 ));
Conversation.aggregate(
pipeline,
function(err,result) {
if(err) throw err;
console.log(JSON.stringify(result, undefined, 2 ));
callback(err);
}
)
}
],
function(err) {
if (err) throw err;
process.exit();
}
);
Which produces this output:
[
{
"_id": "55a63133dcbf671918b51a93",
"comments": [
{
"comment": "ted",
"_id": "55a63133dcbf671918b51a95",
"creationDate": "2015-07-15T10:08:51.217Z"
},
{
"comment": "fred",
"_id": "55a63133dcbf671918b51a96",
"creationDate": "2015-07-15T10:08:51.220Z"
}
],
"members": [
"55a63133dcbf671918b51a90",
"55a63133dcbf671918b51a91",
"55a63133dcbf671918b51a92"
]
}
]
Note the "comments" only contain the last two entries which are "greater than or equal" to the date which was used as input ( being the date from the second comment ).