I have a model that has an array of dynamic references.
var postSchema = new Schema({
name: String,
targets: [{
kind: String,
item: { type: ObjectId, refPath: 'targets.kind' }
}]
});
I am using the targets property to store references to multiple different models, users, thread, attachments, etc.
Is it possible to populate only the references that I want?
Post.find({}).populate({
// Does not work
// match: { 'targets.kind': 'Thread' }, // I want to populate only the references that match. ex: Thread, User, Attachment
path: 'targets.item',
model: 'targets.kind',
select: '_id title',
});
Thanks
The one big lesson here should be that mongoose.set('debug', true) is your new "best friend". This will show the actual queries issued to MongoDB from the code you are writing, and it's very important because when you actually "see it", then it clears up any misconceptions you likely have.
The Logic Problem
Let's demonstrate why exactly what you are attempting fails:
const { Schema } = mongoose = require('mongoose');
const uri = 'mongodb://localhost:27017/polypop';
mongoose.set('debug', true);
mongoose.Promise = global.Promise;
const postSchema = new Schema({
name: String,
targets: [{
kind: String,
item: { type: Schema.Types.ObjectId, refPath: 'targets.kind' }
}]
});
const fooSchema = new Schema({
name: String
})
const barSchema = new Schema({
number: Number
});
const Post = mongoose.model('Post', postSchema);
const Foo = mongoose.model('Foo', fooSchema);
const Bar = mongoose.model('Bar', barSchema);
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
try {
const conn = await mongoose.connect(uri, { useNewUrlParser: true });
// Clean all data
await Promise.all(
Object.entries(conn.models).map(([k,m]) => m.deleteMany())
);
// Create some things
let [foo, bar] = await Promise.all(
[{ _t: 'Foo', name: 'Bill' }, { _t: 'Bar', number: 1 }]
.map(({ _t, ...d }) => mongoose.model(_t).create(d))
);
log([foo, bar]);
// Add a Post
let post = await Post.create({
name: 'My Post',
targets: [{ kind: 'Foo', item: foo }, { kind: 'Bar', item: bar }]
});
log(post);
let found = await Post.findOne();
log(found);
let result = await Post.findOne()
.populate({
match: { 'targets.kind': 'Foo' }, // here is the problem!
path: 'targets.item',
});
log(result);
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})()
So the comment there show the match is the problem with the logic, so let's look at the debug output and see why:
Mongoose: posts.deleteMany({}, {})
Mongoose: foos.deleteMany({}, {})
Mongoose: bars.deleteMany({}, {})
Mongoose: foos.insertOne({ _id: ObjectId("5bdbc70996ed8e3295b384a0"), name: 'Bill', __v: 0 })
Mongoose: bars.insertOne({ _id: ObjectId("5bdbc70996ed8e3295b384a1"), number: 1, __v: 0 })
[
{
"_id": "5bdbc70996ed8e3295b384a0",
"name": "Bill",
"__v": 0
},
{
"_id": "5bdbc70996ed8e3295b384a1",
"number": 1,
"__v": 0
}
]
Mongoose: posts.insertOne({ _id: ObjectId("5bdbc70996ed8e3295b384a2"), name: 'My Post', targets: [ { _id: ObjectId("5bdbc70996ed8e3295b384a4"), kind: 'Foo', item: ObjectId("5bdbc70996ed8e3295b384a0") }, { _id: ObjectId("5bdbc70996ed8e3295b384a3"), kind: 'Bar', item: ObjectId("5bdbc70996ed8e3295b384a1") } ], __v: 0 })
{
"_id": "5bdbc70996ed8e3295b384a2",
"name": "My Post",
"targets": [
{
"_id": "5bdbc70996ed8e3295b384a4",
"kind": "Foo",
"item": {
"_id": "5bdbc70996ed8e3295b384a0",
"name": "Bill",
"__v": 0
}
},
{
"_id": "5bdbc70996ed8e3295b384a3",
"kind": "Bar",
"item": {
"_id": "5bdbc70996ed8e3295b384a1",
"number": 1,
"__v": 0
}
}
],
"__v": 0
}
Mongoose: posts.findOne({}, { projection: {} })
{
"_id": "5bdbc70996ed8e3295b384a2",
"name": "My Post",
"targets": [
{
"_id": "5bdbc70996ed8e3295b384a4",
"kind": "Foo",
"item": "5bdbc70996ed8e3295b384a0"
},
{
"_id": "5bdbc70996ed8e3295b384a3",
"kind": "Bar",
"item": "5bdbc70996ed8e3295b384a1"
}
],
"__v": 0
}
Mongoose: posts.findOne({}, { projection: {} })
Mongoose: bars.find({ 'targets.kind': 'Foo', _id: { '$in': [ ObjectId("5bdbc70996ed8e3295b384a1") ] } }, { projection: {} })
Mongoose: foos.find({ 'targets.kind': 'Foo', _id: { '$in': [ ObjectId("5bdbc70996ed8e3295b384a0") ] } }, { projection: {} })
{
"_id": "5bdbc70996ed8e3295b384a2",
"name": "My Post",
"targets": [
{
"_id": "5bdbc70996ed8e3295b384a4",
"kind": "Foo",
"item": null
},
{
"_id": "5bdbc70996ed8e3295b384a3",
"kind": "Bar",
"item": null
}
],
"__v": 0
}
That's the full output to show that everything else is actually working, and in fact without the match you would get the populated data back for the items. But take a close look at the two queries being issued to the foo and bar collections:
Mongoose: bars.find({ 'targets.kind': 'Foo', _id: { '$in': [ ObjectId("5bdbc70996ed8e3295b384a1") ] } }, { projection: {} })
Mongoose: foos.find({ 'targets.kind': 'Foo', _id: { '$in': [ ObjectId("5bdbc70996ed8e3295b384a0") ] } }, { projection: {} })
So the 'targets.kind' that you include under match is actually being searched for on the foo and bar collections, and not in the posts collection as you seem to be expecting. Along with the rest of the output this should be giving you an idea of how populate() actually works, in that nothing ever says to specifically just return the "array entries" which are of kind: 'Foo' as the example goes.
This process of "filtering the array" actually isn't "really" even a natural MongoDB query, and with the exception of the "first and singular match" you actually would typically use .aggregate() and the $filter operator. You can get "singular" via the positional $ operator but if you wanted "all foos" where there was more than one, then it needs the $filter instead.
So the real core issue here is populate() is actually the wrong place and wrong operation to "filter the array". Instead you really want to "smartly" return only the array entries you want before you go doing anything else to "populate" the items.
Structural Problem
Noting from the listing above which is an allegory for what is hinted at in the question, there are "multiple models" being referred to in order to "join" and obtain the overall result. Whilst this may seem logical in "RDBMS land", it's certainly not the case nor practical or efficient to do so with MongoDB and the general "ilk" of "document databases".
The key thing to remember here is that "documents" in a "collection" need not all have the same "table structure" as you would with an RDBMS. The structure can vary, and whilst it's probably advisable to not "vary wildly", it's certainly very valid to store "polymorphic objects" within a single collection. Afterall, you actually want to reference all of these things back to the same parent, so why would they need to be in different collections? Simply put, they don't need to be at all:
const { Schema } = mongoose = require('mongoose');
const uri = 'mongodb://localhost:27017/polypop';
mongoose.set('debug', true);
mongoose.Promise = global.Promise;
const postSchema = new Schema({
name: String,
targets: [{
kind: String,
item: { type: Schema.Types.ObjectId, ref: 'Target' }
}]
});
const targetSchema = new Schema({});
const fooSchema = new Schema({
name: String
});
const barSchema = new Schema({
number: Number
});
const bazSchema = new Schema({
title: String
});
const log = data => console.log(JSON.stringify(data, undefined, 2));
const Post = mongoose.model('Post', postSchema);
const Target = mongoose.model('Target', targetSchema);
const Foo = Target.discriminator('Foo', fooSchema);
const Bar = Target.discriminator('Bar', barSchema);
const Baz = Target.discriminator('Baz', bazSchema);
(async function() {
try {
const conn = await mongoose.connect(uri,{ useNewUrlParser: true });
// Clean data - bit hacky but just a demo
await Promise.all(
Object.entries(conn.models).map(([k, m]) => m.deleteMany() )
);
// Insert some things
let [foo1, bar, baz, foo2] = await Promise.all(
[
{ _t: 'Foo', name: 'Bill' },
{ _t: 'Bar', number: 1 },
{ _t: 'Baz', title: 'Title' },
{ _t: 'Foo', name: 'Ted' }
].map(({ _t, ...d }) => mongoose.model(_t).create(d))
);
log([foo1, bar, baz, foo2]);
// Add a Post
let post = await Post.create({
name: 'My Post',
targets: [
{ kind: 'Foo', item: foo1 },
{ kind: 'Bar', item: bar },
{ kind: 'Baz', item: baz },
{ kind: 'Foo', item: foo2 }
]
});
log(post);
let found = await Post.findOne();
log(found);
let result1 = await Post.findOne()
.populate({
path: 'targets.item',
match: { __t: 'Foo' }
});
log(result1);
let result2 = await Post.aggregate([
// Only get documents with a matching entry
{ "$match": {
"targets.kind": "Foo"
}},
// Optionally filter the array
{ "$addFields": {
"targets": {
"$filter": {
"input": "$targets",
"cond": {
"$eq": [ "$$this.kind", "Foo" ]
}
}
}
}},
// Lookup from single source
{ "$lookup": {
"from": Target.collection.name,
"localField": "targets.item",
"foreignField": "_id",
"as": "matches"
}},
// Marry up arrays
{ "$project": {
"name": 1,
"targets": {
"$map": {
"input": "$targets",
"in": {
"kind": "$$this.kind",
"item": {
"$arrayElemAt": [
"$matches",
{ "$indexOfArray": [ "$matches._id", "$$this.item" ] }
]
}
}
}
}
}}
]);
log(result2);
let result3 = await Post.aggregate([
// Only get documents with a matching entry
{ "$match": {
"targets.kind": "Foo"
}},
// Optionally filter the array
{ "$addFields": {
"targets": {
"$filter": {
"input": "$targets",
"cond": {
"$eq": [ "$$this.kind", "Foo" ]
}
}
}
}},
// Lookup from single source with overkill of type check
{ "$lookup": {
"from": Target.collection.name,
"let": { "targets": "$targets" },
"pipeline": [
{ "$match": {
"$expr": {
"$in": [ "$_id", "$$targets.item" ]
},
"__t": "Foo"
}}
],
"as": "matches"
}},
// Marry up arrays
{ "$project": {
"name": 1,
"targets": {
"$map": {
"input": "$targets",
"in": {
"kind": "$$this.kind",
"item": {
"$arrayElemAt": [
"$matches",
{ "$indexOfArray": [ "$matches._id", "$$this.item" ] }
]
}
}
}
}
}}
]);
console.log(result3);
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})()
That's a bit longer and has a few more concepts to get around, but the basic principle is that instead of using "multiple collections" for the different types we're only going to use one. The "mongoose" method for this uses "discriminators" in the model setup which is all relevant to this part of the code:
const Post = mongoose.model('Post', postSchema);
const Target = mongoose.model('Target', targetSchema);
const Foo = Target.discriminator('Foo', fooSchema);
const Bar = Target.discriminator('Bar', barSchema);
const Baz = Target.discriminator('Baz', bazSchema);
Which is really simply calling .discriminator() from a "base model" for the "singular" collection rather than calling mongoose.model(). The really good thing about this is as far as the rest of your code is concerned, Baz and Bar etc are all just treated like a "model" transparently, but they are actually doing something really cool underneath.
So all of these "related things" ( they really are even if you don't think so yet ) are all actually kept in the same collection, but operations using the individual models take into account an "automatic" kind key. This is __t by default, but you can actually specify whatever you want in options.
The fact that these are all actually in the same collection though is really important, since you can basically readily query the same collection for the different types of data. Put simply:
Foo.find({})
Would actually call
targets.find({ __t: 'Foo' })
And does this automatically. But more importantly
Target.find({ __t: { "$in": [ 'Foo', 'Baz' ] } })
Would be returning all the expected results from a "single collection" with a "single request".
So taking a look at the revised populate() under this structure:
let result1 = await Post.findOne()
.populate({
path: 'targets.item',
match: { __t: 'Foo' }
});
log(result1);
This shows instead in the logs:
Mongoose: posts.findOne({}, { projection: {} })
Mongoose: targets.find({ __t: 'Foo', _id: { '$in': [ ObjectId("5bdbe2895b1b843fba050569"), ObjectId("5bdbe2895b1b843fba05056a"), ObjectId("5bdbe2895b1b843fba05056b"), ObjectId("5bdbe2895b1b843fba05056c") ] } }, { projection: {} })
Note how even though all "four" of the related ObjectId values are sent with the request the additional constraint of __t: 'Foo' also binds which document are actually returned and married up. The result then becomes self evident as only the 'Foo' entries populated. But also note the "catch":
{
"_id": "5bdbe2895b1b843fba05056d",
"name": "My Post",
"targets": [
{
"_id": "5bdbe2895b1b843fba050571",
"kind": "Foo",
"item": {
"__t": "Foo",
"_id": "5bdbe2895b1b843fba050569",
"name": "Bill",
"__v": 0
}
},
{
"_id": "5bdbe2895b1b843fba050570",
"kind": "Bar",
"item": null
},
{
"_id": "5bdbe2895b1b843fba05056f",
"kind": "Baz",
"item": null
},
{
"_id": "5bdbe2895b1b843fba05056e",
"kind": "Foo",
"item": {
"__t": "Foo",
"_id": "5bdbe2895b1b843fba05056c",
"name": "Ted",
"__v": 0
}
}
],
"__v": 0
}
Filtering after Populate
This is actually a longer topic and more fully answered elsewhere, but the basics here as shown in the output above is that populate() really still does absolutely nothing about actually "filtering" the results in the array to only the desired matches.
The other thing is that populate() really isn't that great an idea from a "performance" perspective, since what is really happening is "another query" ( in our second form we optimized to just one more ) or possibly "many queries" depending on your structure are actually being issued to the database and the results are being reconstructed together on the client.
Overall, you end up returning a lot more data than you actually need and at best you are relying on manual client side filtering in order to discard those unwanted results. So the "ideal" position is to have the "server" do that sort of thing instead, and only return the data you actually need.
The populate() method was added as a "convenience" to the mongoose API a "very" long time ago. Since then MongoDB has moved on and now sports $lookup as a "native" way for performing a "join" on the server with a single request.
There are different ways to do this but just touching on "two" closely related to the existing populate() functionality but with improvements:
let result2 = await Post.aggregate([
// Only get documents with a matching entry
{ "$match": {
"targets.kind": "Foo"
}},
// Optionally filter the array
{ "$addFields": {
"targets": {
"$filter": {
"input": "$targets",
"cond": {
"$eq": [ "$$this.kind", "Foo" ]
}
}
}
}},
// Lookup from single source
{ "$lookup": {
"from": Target.collection.name,
"localField": "targets.item",
"foreignField": "_id",
"as": "matches"
}},
// Marry up arrays
{ "$project": {
"name": 1,
"targets": {
"$map": {
"input": "$targets",
"in": {
"kind": "$$this.kind",
"item": {
"$arrayElemAt": [
"$matches",
{ "$indexOfArray": [ "$matches._id", "$$this.item" ] }
]
}
}
}
}
}}
]);
log(result2);
The two basic "optimizations" there are using $filter in order to "pre-discard" items from the array which do not actually match the type we want. This can be totally optional as covered with a bit more detail later, but where possible then it's probably a good thing to do since we won't even be looking for matching _id values in the foreign collection for anything but 'Foo' things.
The other of course is the $lookup itself, which means instead of a separate round trip to the server we actually just make one and the "join" is done before any response is returned. Here we just look for the matching _id values in the foreign collection to the target.items array entry values. We already filtered those for 'Foo', so that is all that gets returned:
{
"_id": "5bdbe6aa2c4a2240c16802e2",
"name": "My Post",
"targets": [
{
"kind": "Foo",
"item": {
"_id": "5bdbe6aa2c4a2240c16802de",
"__t": "Foo",
"name": "Bill",
"__v": 0
}
},
{
"kind": "Foo",
"item": {
"_id": "5bdbe6aa2c4a2240c16802e1",
"__t": "Foo",
"name": "Ted",
"__v": 0
}
}
]
}
For a "slight" variation on that we can actually even inspect the __t value within the $lookup expression using "sub-pipeline" processing with MongoDB 3.6 and greater. The main use case here would be if you choose to remove the kind from the parent Post altogether and simply rely on the "kind" information inherent to discriminator references used in storage:
let result3 = await Post.aggregate([
// Only get documnents with a matching entry
{ "$match": {
"targets.kind": "Foo"
}},
// Optionally filter the array
{ "$addFields": {
"targets": {
"$filter": {
"input": "$targets",
"cond": {
"$eq": [ "$$this.kind", "Foo" ]
}
}
}
}},
// Lookup from single source with overkill of type check
{ "$lookup": {
"from": Target.collection.name,
"let": { "targets": "$targets" },
"pipeline": [
{ "$match": {
"$expr": {
"$in": [ "$_id", "$$targets.item" ]
},
"__t": "Foo"
}}
],
"as": "matches"
}},
// Marry up arrays
{ "$project": {
"name": 1,
"targets": {
"$map": {
"input": "$targets",
"in": {
"kind": "$$this.kind",
"item": {
"$arrayElemAt": [
"$matches",
{ "$indexOfArray": [ "$matches._id", "$$this.item" ] }
]
}
}
}
}
}}
]);
log(result3);
This has the same "filtered" results and is similarly an "single request" and "single response".
The whole topic gets a bit wider, and even though aggregation pipelines may appear considerably more unwieldy than a simple populate() call, it's fairly trivial to write a wrapper which can abstract from your models and pretty much generate most of the data structure code required. You can see an overview of this in action at "Querying after populate in Mongoose", which in essence is the same question you are basically asking here once we sort out the initial issue of "multiple collection joins" and why you really don't need them.
The over caveat here is that $lookup actually has no way possible to "dynamically" determine which collection to "join" to. You need to include that information statically just as is done here, so this is another reason to actually favor "discriminators" over using multiple collections. It's not only "better performance", but it's actually the only way the most performant options will actually support what you are trying to do.
For reference, the "complete" (truncated due to max post length) output of the second listing would be:
Mongoose: posts.deleteMany({}, {})
Mongoose: targets.deleteMany({}, {})
Mongoose: targets.deleteMany({}, {})
Mongoose: targets.deleteMany({}, {})
Mongoose: targets.deleteMany({}, {})
Mongoose: targets.insertOne({ _id: ObjectId("5bdbe2895b1b843fba050569"), __t: 'Foo', name: 'Bill', __v: 0 })
Mongoose: targets.insertOne({ _id: ObjectId("5bdbe2895b1b843fba05056a"), __t: 'Bar', number: 1, __v: 0 })
Mongoose: targets.insertOne({ _id: ObjectId("5bdbe2895b1b843fba05056b"), __t: 'Baz', title: 'Title', __v: 0 })
Mongoose: targets.insertOne({ _id: ObjectId("5bdbe2895b1b843fba05056c"), __t: 'Foo', name: 'Ted', __v: 0 })
[
{
"_id": "5bdbe2895b1b843fba050569",
"__t": "Foo",
"name": "Bill",
"__v": 0
},
{
"_id": "5bdbe2895b1b843fba05056a",
"__t": "Bar",
"number": 1,
"__v": 0
},
{
"_id": "5bdbe2895b1b843fba05056b",
"__t": "Baz",
"title": "Title",
"__v": 0
},
{
"_id": "5bdbe2895b1b843fba05056c",
"__t": "Foo",
"name": "Ted",
"__v": 0
}
]
Mongoose: posts.insertOne({ _id: ObjectId("5bdbe2895b1b843fba05056d"), name: 'My Post', targets: [ { _id: ObjectId("5bdbe2895b1b843fba050571"), kind: 'Foo', item: ObjectId("5bdbe2895b1b843fba050569") }, { _id: ObjectId("5bdbe2895b1b843fba050570"), kind: 'Bar', item: ObjectId("5bdbe2895b1b843fba05056a") }, { _id: ObjectId("5bdbe2895b1b843fba05056f"), kind: 'Baz', item: ObjectId("5bdbe2895b1b843fba05056b") }, { _id: ObjectId("5bdbe2895b1b843fba05056e"), kind: 'Foo', item: ObjectId("5bdbe2895b1b843fba05056c") } ], __v: 0 })
{
"_id": "5bdbe2895b1b843fba05056d",
"name": "My Post",
"targets": [
{
"_id": "5bdbe2895b1b843fba050571",
"kind": "Foo",
"item": {
"_id": "5bdbe2895b1b843fba050569",
"__t": "Foo",
"name": "Bill",
"__v": 0
}
},
{
"_id": "5bdbe2895b1b843fba050570",
"kind": "Bar",
"item": {
"_id": "5bdbe2895b1b843fba05056a",
"__t": "Bar",
"number": 1,
"__v": 0
}
},
{
"_id": "5bdbe2895b1b843fba05056f",
"kind": "Baz",
"item": {
"_id": "5bdbe2895b1b843fba05056b",
"__t": "Baz",
"title": "Title",
"__v": 0
}
},
{
"_id": "5bdbe2895b1b843fba05056e",
"kind": "Foo",
"item": {
"_id": "5bdbe2895b1b843fba05056c",
"__t": "Foo",
"name": "Ted",
"__v": 0
}
}
],
"__v": 0
}
Mongoose: posts.findOne({}, { projection: {} })
{
"_id": "5bdbe2895b1b843fba05056d",
"name": "My Post",
"targets": [
{
"_id": "5bdbe2895b1b843fba050571",
"kind": "Foo",
"item": "5bdbe2895b1b843fba050569"
},
{
"_id": "5bdbe2895b1b843fba050570",
"kind": "Bar",
"item": "5bdbe2895b1b843fba05056a"
},
{
"_id": "5bdbe2895b1b843fba05056f",
"kind": "Baz",
"item": "5bdbe2895b1b843fba05056b"
},
{
"_id": "5bdbe2895b1b843fba05056e",
"kind": "Foo",
"item": "5bdbe2895b1b843fba05056c"
}
],
"__v": 0
}
Mongoose: posts.findOne({}, { projection: {} })
Mongoose: targets.find({ __t: 'Foo', _id: { '$in': [ ObjectId("5bdbe2895b1b843fba050569"), ObjectId("5bdbe2895b1b843fba05056a"), ObjectId("5bdbe2895b1b843fba05056b"), ObjectId("5bdbe2895b1b843fba05056c") ] } }, { projection: {} })
{
"_id": "5bdbe2895b1b843fba05056d",
"name": "My Post",
"targets": [
{
"_id": "5bdbe2895b1b843fba050571",
"kind": "Foo",
"item": {
"__t": "Foo",
"_id": "5bdbe2895b1b843fba050569",
"name": "Bill",
"__v": 0
}
},
{
"_id": "5bdbe2895b1b843fba050570",
"kind": "Bar",
"item": null
},
{
"_id": "5bdbe2895b1b843fba05056f",
"kind": "Baz",
"item": null
},
{
"_id": "5bdbe2895b1b843fba05056e",
"kind": "Foo",
"item": {
"__t": "Foo",
"_id": "5bdbe2895b1b843fba05056c",
"name": "Ted",
"__v": 0
}
}
],
"__v": 0
}
Mongoose: posts.aggregate([ { '$match': { 'targets.kind': 'Foo' } }, { '$addFields': { targets: { '$filter': { input: '$targets', cond: { '$eq': [ '$$this.kind', 'Foo' ] } } } } }, { '$lookup': { from: 'targets', localField: 'targets.item', foreignField: '_id', as: 'matches' } }, { '$project': { name: 1, targets: { '$map': { input: '$targets', in: { kind: '$$this.kind', item: { '$arrayElemAt': [ '$matches', { '$indexOfArray': [ '$matches._id', '$$this.item' ] } ] } } } } } } ], {})
[
{
"_id": "5bdbe2895b1b843fba05056d",
"name": "My Post",
"targets": [
{
"kind": "Foo",
"item": {
"_id": "5bdbe2895b1b843fba050569",
"__t": "Foo",
"name": "Bill",
"__v": 0
}
},
{
"kind": "Foo",
"item": {
"_id": "5bdbe2895b1b843fba05056c",
"__t": "Foo",
"name": "Ted",
"__v": 0
}
}
]
}
]
Mongoose: posts.aggregate([ { '$match': { 'targets.kind': 'Foo' } }, { '$addFields': { targets: { '$filter': { input: '$targets', cond: { '$eq': [ '$$this.kind', 'Foo' ] } } } } }, { '$lookup': { from: 'targets', let: { targets: '$targets' }, pipeline: [ { '$match': { '$expr': { '$in': [ '$_id', '$$targets.item' ] }, __t: 'Foo' } } ], as: 'matches' } }, { '$project': { name: 1, targets: { '$map': { input: '$targets', in: { kind: '$$this.kind', item: { '$arrayElemAt': [ '$matches', { '$indexOfArray': [ '$matches._id', '$$this.item' ] } ] } } } } } } ], {})
I try to find the best way to store different type entities.
I have a generic entity Person and two type of person PersonType1 and PersonType2 that inheritance from Person entity its field.
I need to store this people data to mongo db.
is it better to create a single collection or a collection to draw each type ?
What you seem to be basically talking about here is the general persistence of Polymorphism where classes vary from a base class in differing properties.
This is generally handled by a discriminator pattern in common implementations of data persistence, and is typically therefore within the same "collection", especially in a document oriented database that handles the differing properties quite well.
So generally speaking, under MongoDB the concept of storing in a single collection for these "inherrited" objects is basically a good idea. There are advantages to having things that share common properties or other related data in the same collection, as a demonstration is warranted for.
Not an endorsement, but a simple example can be demonstrated through the .discriminator() constructor available to the mongoose library. Therefore as an example of benefits it is suited ( for me ) for a quick demonstration:
var util = require('util'),
async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/school');
// Util function for base type
function AbstractPersonSchema() {
Schema.apply(this,arguments);
this.add({
name: String,
age: Number,
sex: { type: String, enum: [ 'Male', 'Female' ] }
});
}
util.inherits( AbstractPersonSchema, Schema );
// Schema definitions
var personSchema = new AbstractPersonSchema();
var studentSchema = new AbstractPersonSchema({
courses: [String]
});
var teacherSchema = new AbstractPersonSchema({
department: String,
teaches: [String]
});
// Model assignments
var Person = mongoose.model( 'Person', personSchema ),
Student = Person.discriminator( 'Student', studentSchema ),
Teacher = Person.discriminator( 'Teacher', teacherSchema );
var normalPeople = [
{ "name": "Bill", "age": 48, "sex": "Male" },
{ "name": "Sarah", "age": 24, "sex": "Female" }
];
var students = [
{
"name": "Ted",
"age": 21,
"sex": "Male",
"courses": ["Math","Science","English"]
},
{
"name": "Julie",
"age": 22,
"sex": "Female",
"courses": ["Art","History","English"]
}
];
var teachers = [
{
"name": "Harry",
"age": 35,
"sex": "Male",
"department": "Maths",
"teaches": ["Math","Science","English"]
},
{
"name": "Sally",
"age": 32,
"sex": "Female",
"department": "History",
"teaches": ["English","History"]
}
];
async.series(
[
// Example cleanup
function(callback) {
Person.remove({},callback);
},
function(callback) {
async.parallel(
[
function(callback) {
async.each(normalPeople,function(person,callback) {
Person.create(person,callback);
},callback);
},
function(callback) {
async.each(students,function(student,callback) {
Student.create(student,callback);
},callback);
},
function(callback) {
async.each(teachers,function(teacher,callback) {
Teacher.create(teacher,callback);
},callback);
}
],
callback
);
},
function(callback) {
console.log("Teachers per subject");
Teacher.aggregate(
[
{ "$unwind": "$teaches" },
{ "$group": {
"_id": "$teaches",
"count": { "$sum": 1 }
}}
],
function(err,result) {
if (err) callback(err);
console.log(result);
callback();
}
);
},
function(callback) {
console.log("Students and teachers in subject");
Person.aggregate(
[
{ "$match": { "__t": { "$in": ["Teacher","Student"] } } },
{ "$project": {
"name": 1,
"__t": 1,
"subject": {
"$cond": [
{ "$eq": [ "$__t", "Teacher" ] },
"$teaches",
"$courses"
]
}
}},
{ "$unwind": "$subject" },
{ "$group": {
"_id": "$subject",
"teachers": {
"$addToSet": {
"$cond": [
{ "$eq": [ "$__t", "Teacher" ] },
"$name",
false
]
}
},
"students": {
"$addToSet": {
"$cond": [
{ "$eq": [ "$__t", "Student" ] },
"$name",
false
]
}
}
}},
{ "$project": {
"teachers": { "$setDifference": [ "$teachers", [false] ] },
"students": { "$setDifference": [ "$students", [false] ] }
}}
],
function(err,results) {
if (err) callback(err);
console.log(results);
callback();
}
);
},
function(callback) {
console.log("Average age of students");
Student.aggregate(
[
{ "$group": {
"_id": null,
"average_age": { "$avg": "$age" }
}}
],
function(err,results) {
if (err) throw err;
console.log(results);
callback();
}
)
},
function(callback) {
console.log("Average age of normal people");
Person.aggregate(
[
{ "$match": { "__t": { "$exists": false } } },
{ "$group": {
"_id": null,
"average_age": { "$avg": "$age" }
}}
],
function(err,results) {
if (err) throw err;
console.log(results);
callback();
}
);
}
],
function(err) {
if (err) throw err;
mongoose.disconnect();
}
);
Which produces the output:
Teachers per subject
[ { _id: 'History', count: 1 },
{ _id: 'English', count: 2 },
{ _id: 'Science', count: 1 },
{ _id: 'Math', count: 1 } ]
Students and teachers in subject
[ { _id: 'History', teachers: [ 'Sally' ], students: [ 'Julie' ] },
{ _id: 'Art', teachers: [], students: [ 'Julie' ] },
{ _id: 'English',
teachers: [ 'Sally', 'Harry' ],
students: [ 'Julie', 'Ted' ] },
{ _id: 'Science', teachers: [ 'Harry' ], students: [ 'Ted' ] },
{ _id: 'Math', teachers: [ 'Harry' ], students: [ 'Ted' ] } ]
Average age of students
[ { _id: null, average_age: 21.5 } ]
Average age of normal people
[ { _id: null, average_age: 36 } ]
The particular things to notice there with usage of the discriminator pattern are that there are several "Model" defintions there for 'Person', 'Student' and 'Teacher'. Each one of these is instantiated from it's own properties, inheriting from the base 'Person' definition.
Upon construction and storage, the approach taken here is that all items are essentially stored in the same physical collection ( "people" in the plural ), yet all have differing properties and there is a definition there in the coding that assigns the .discriminator() method based on the standard collection.
What this does it essentially add a field to each inherited "class" which reprsents the "model" type here:
{ "name": "Ted", "__t": "Student" }
The task now falls to the library implementation to read that "discriminator" value and correctly assign the defined "class/model" upon reading the information from the database. With that data in place, this is a possibilty for libraries to correctly cast the data as a typed object to the intended class.
Implementations can also benefit from what the example also shows here. Notice the usage of differing queries using each of 'Person', 'Student` and 'Teacher' models here. Wherever the 'Teacher' or 'Student' model is called, the libary can "automagically" filter out results that do not match it's discriminator type. This is useful for query operations to be coded specifically on the class/model type of the objects they wish to operate on, and thus obviating the need to obscure code with noisy type checks where not required.
On the other hand, the base class/model can still be used with specific checks coded on the type discriminator value. This allows you to generally inspect all inherited types, or a sub-set of information as is required in a broader sense that dealing with a singular inheritance path.
So where such correlation is desired, then it asbsolutely makes sense to store in the same collection and utilize a discriminator pattern. Only if you never intend to do such analysis at all, does it then make any sense to separate the storage into separate collections.
Remember that this is MongoDB and not a relational database. If you want to use the data "all together" then you design to keep it "all together" because you cannot perform joins, and you need to design around that concept.