Unable to populate and structure document with aggregate - mongodb

So I have this schema which have foreign keys to other collections in the database. The document has around 60k posts and each post can have multiple categories and there are around 200 categories. So I'm trying to fetch and structure data based on the category's foreign key and populate the category details and count.
Here's how the main schema and category schema looks like:
const postSchema = new mongoose.Schema( {
post: {
type: mongoose.Schema.Types.ObjectId,
ref: 'PostDetails'
},
categories: [ {
category: {
type: mongoose.Schema.Types.ObjectId,
ref: 'Category'
},
subCategories: [ {
subCategory: {
type: mongoose.Schema.Types.ObjectId,
ref: 'Subcategory'
}
} ]
} ]
} );
const categorySchema = new mongoose.Schema( {
category: {
type: String,
},
categorySlug: {
type: String,
}
} );
I was successful in making the count but the returned data is not what I expected. The returned data shows the id of the categories and the count but no name and slug. Here's how it looks like:
[
{
"_id": [
"617acfd232c766589c23a90c"
],
"count": 876,
"category": []
}
]
I got the above output with the following query:
const aggregateStages = [
{
$group: {
_id: '$categories.category',
count: { $sum: 1 }
}
},
{
$lookup: {
from: "Category",
localField: "categories.category",
foreignField: "_id",
as: "category"
}
}
];
const categories = await Post.aggregate( aggregateStages ).exec();
I'm hoping to get the data as follows:
[
{
"_id": "617acfd232c766589c23a90c",
"count": 876,
"category": 'SomeCategory',
"categorySlug": 'some-category'
}
]
Where am I going wrong and how can I fix it?
SAMPLE DATA FROM DB AS REQUESTED BY MATT OESTREICH
POST DATA
{
"_id": "617adad39054bae2c983c34f",
"post": "617ad1c80597c78ed4cc151e",
"author": "617acc689b309fdbbbdfdfe0",
"categories": [{
"category": "617acfd232c766589c23a8d1",
"subCategories":[]
}]
}
CATEGORY DATA
{
"_id": "617acfd232c766589c23a8d1",
"category": "Lorem Ipsum",
"categorySlug": "lorem-ipsum"
}

Ok so, it looks like you can resolve this by using the $size operator. The $size operator will give you the length (or count) of elements in an array.
Live demo here
Database
db={
"post": [
{
"_id": ObjectId("617adad39054bae2c983c34f"),
"post": ObjectId("617ad1c80597c78ed4cc151e"),
"author": ObjectId("617acc689b309fdbbbdfdfe0"),
"categories": [
{
"category": ObjectId("617acfd232c766589c23a8d1"),
"subCategories": []
}
]
}
],
"categories": [
{
"_id": ObjectId("617acfd232c766589c23a8d1"),
"category": "Lorem Ipsum",
"categorySlug": "lorem-ipsum"
}
]
}
Query
db.post.aggregate([
{
"$lookup": {
"from": "categories",
"localField": "categories.category",
"foreignField": "_id",
"as": "found_categories"
}
},
{
"$project": {
_id: "$_id",
count: {
"$size": "$found_categories"
},
"category": {
"$first": "$found_categories.category"
},
"categorySlug": {
"$first": "$found_categories.categorySlug"
}
}
}
])
Result
[
{
"_id": ObjectId("617adad39054bae2c983c34f"),
"category": "Lorem Ipsum",
"categorySlug": "lorem-ipsum",
"count": 1
}
]
Although, I do not believe this will give you what you are looking for if more than one category is found. Please let me know if it doesn't work and I will try to help fix it.

Related

How to use current field in second $match?

Let's say i have 2 collections
// Post collection:
[
{
"_id": "somepost1",
"author": "firstuser",
"title": "First post"
},
{
"_id": "somepost2",
"author": "firstuser",
"title": "Second post"
},
{
"_id": "somepost3",
"author": "firstuser",
"title": "Third post"
}
]
// User collection:
[
{
"_id": "firstuser",
"nickname": "John",
"posts": {
"voted": []
}
},
{
"_id": "seconduser",
"nickname": "Bob",
"posts": {
"voted": [
{
"_id": "somepost1",
"vote": "1"
},
{
"_id": "somepost3",
"vote": "-1"
}
]
}
}
]
And i need to get this result:
[
{
"_id": "somepost1",
"author": {
"_id": "firstuser",
"nickname": "John"
},
"title": "First post",
"myvote": "1"
},
{
"_id": "somepost2",
"author": {
"_id": "firstuser",
"nickname": "John"
},
"title": "Second post",
"voted": "0"
},
{
"_id": "somepost3",
"author": {
"_id": "firstuser",
"nickname": "John"
},
"title": "Third post",
"myvote": "-1"
}
]
How can i make a request with aggregation, which will display this output with dynamic _id of elements?
I have problem with using current _id of post in second $match and setting "myvote" to 0 if there are no element in "posts.voted" associated with current post.
Here what i've tried: https://mongoplayground.net/p/v70ZUioVSpQ
db.post.aggregate([
{
$match: {
author: "firstuser"
}
},
{
$lookup: {
from: "user",
localField: "author",
foreignField: "_id",
as: "author"
}
},
{
$addFields: {
author: {
$arrayElemAt: [
"$author",
0
]
}
}
},
{
$lookup: {
from: "user",
localField: "_id",
foreignField: "posts.voted._id",
as: "Results"
}
},
{
$unwind: "$Results"
},
{
$unwind: "$Results.posts.voted"
},
{
$match: {
"Results.posts.voted._id": "ID OF CURRENT POST"
}
},
{
$project: {
_id: 1,
author: {
_id: 1,
nickname: 1
},
title: 1,
myvote: "$Results.posts.voted.vote"
}
}
])
From the $match docs:
The query syntax is identical to the read operation query syntax
The query syntax does not allow usage of document values. which is what you're trying to do.
What we can do is use $expr within the $match stage, this allows us to use aggregation oprerators, thus also giving access to the document values. like so:
{
$match: {
$expr: {
$eq: ['$Results.posts.voted._id', '$_id'],
}
},
},

How can i optimize my query i have written to find the Users and there last order details using aggregate, it shows me timeout as the dataset is large

I have a query as below, what it does it creates a link between two documents and find the last order date and users details like email, phone, etc. but on large data set it shows me timeout error any help would be much appreciated, and thanks in advance for the help
db.users.aggregate([
{
"$lookup": {
"from": "orders",
"let": {
"id": "$_id"
},
"pipeline": [
{
"$addFields": {
"owner": {
"$toObjectId": "$owner"
}
}
},
{
"$match": {
$expr: {
$eq: [
"$owner",
"$$id"
]
}
}
},
],
"as": "orders"
}
},
{
"$unwind": {
path: "$orders",
preserveNullAndEmptyArrays: false,
includeArrayIndex: "arrayIndex"
}
},
{
"$group": {
"_id": "$_id",
"order": {
"$last": "$orders.createdAt"
},
"userInfo": {
"$mergeObjects": {
name: "$name",
email: "$email",
phone: "$phone",
orderCount: "$orderCount"
}
}
}
},
{
"$project": {
name: "$userInfo.name",
email: "$userInfo.email",
phone: "$userInfo.phone",
orderCount: "$userInfo.orderCount",
lastOrder: "$order",
}
}
]
)
my documents look like the following for orders
{
"_id": ObjectId("607fbeeb0a752a66a7af40eb"),
"address": {
"loc": [
-1,
3
],
"_id": "5d35d55d3d081f486d0d401c",
"apartment": "",
"description": "ACcdg dfef"
},
"approvedAt": ISODate("2021-04-21T11:28:05.295+05:30"),
"assignedAt": null,
"billingAddress": {
"description": ""
},
"createdAt": ISODate("2021-04-21T11:28:04.449+05:30"),
"creditCard": "",
"deliveryDate": "04/21/21",
"deliveryDateObj": ISODate("2021-04-21T12:27:58.746+05:30"),
"owner": "609bd5831b912947ea51a9ac",
"products": [
"5a070c079b"
],
"updatedAt": ISODate("2021-04-21T11:28:05.295+05:30"),
}
and for users, it is like below
{
"_id": ObjectId("609bd5831b912947ea51a9ac"),
"updatedAt": ISODate("2021-05-12T18:47:55.291+05:30"),
"createdAt": ISODate("2021-05-12T18:47:55.213+05:30"),
"email": "1012#gmail.com",
"phone": "123",
"dob": "1996-04-10",
"password": "",
"stripeID": "",
"__t": "Customer",
"name": {
"first": "A",
"last": "b"
},
"orderCount": 1,
"__v": 0,
"forgottenPassword": ""
}
convert _id to string in lookup's let and you can remove $addFields from lookup pipeline
add $project stage in lookup pipeline and show only required fields
$project to show required fields and get last / max createdAt date use $max, you don't need to $unwind and $group operation
db.users.aggregate([
{
$lookup: {
from: "orders",
let: { id: { $toString: "$_id" } },
pipeline: [
{ $match: { $expr: { $eq: ["$owner", "$$id"] } } },
{
$project: {
_id: 0,
createdAt: 1
}
}
],
"as": "orders"
}
},
{
$project: {
email: 1,
name: 1,
orderCount: { $size: "$orders" },
phone: 1,
lastOrder: { $max: "$orders.createdAt" }
}
}
])
Playground
SUGGESTION:
You can save owner id in orders as objectId instead of string and whenever new order arrive store it as objectId, you can prevent conversation operator $toString operation
create an index in owner field to make lookup process faster.
I have figured out that after using createIndex for the owner field which is used to compare the owner in the orders from the users _id filed, so just after adding an db.orders.createIndex({ owner: 1 }), the query will run much faster and smoother

Querying a referenced document in MongoDB using Mongoose

This is a document in the collection BlogPosts:
{
_id: ObjectId("..."),
post_title: "Hello World!",
post_body: "",
comments: [
{ user_id: ObjectId("123"), body: "nice post!" },
{ user_id: ObjectId("456"), body: "awesome!" },
]
}
I would like to display comments with the user's first name, which is found in the referenced document in the Users collection:
{
_id: ObjectId("123"),
first_name: "Marion",
last_name: "Smith",
email_address: "marion#example.com",
password: "..."
}
Is there a way to retrieve the BlogPosts document while including first_name from this referenced data?
For example, I'm looking for an output like this (each comment has a first name):
{
_id: ObjectId("..."),
post_title: "Hello World!",
post_body: "",
comments: [
{ user_id: ObjectId("..."), first_name: "Marion", body: "nice post!" },
{ user_id: ObjectId("..."), first_name: "Margaret", body: "awesome!" },
]
}
I'm using Mongoose.
You can use below aggregation
db.collection.aggregate([
{ "$unwind": "$comments" },
{ "$lookup": {
"from": "users",
"let": { "userId": "$comments.user_id" },
"pipeline": [{ "$match": { "$expr": { "$eq": ["$$userId", "$_id"] } } }],
"as": "user"
}},
{ "$addFields": {
"comments.first_name": { "$arrayElemAt": ["$user.first_name", 0] }
}},
{ "$group": {
"_id": "$_id",
"comments": { "$push": "$comments" },
"post_title": { "$first": "$post_title" },
"post_body": { "$first": "$post_body" }
}}
])
I've since found a more straightforward approach, using just Populate.
BlogPosts
.findOne({_id: req.params.id})
.populate('comments.user_id', ['first_name', 'last_name'])
.then(post => console.log(post))
In the schema for BlogPosts, a ref should be defined for the comments.user_id field:
const User = require('./User.model.js');
const blogPostSchema = new Schema({
post_title: { type: String },
post_body: { type: String },
comments: [{
user_id: {
type: Schema.ObjectId,
ref: 'User' <-------- here
}
}]
});
const BlogPost = mongoose.model('BlogPost', blogPostSchema);
module.exports = BlogPost;

Mongoose populate single item in array

I have a model that has an array of dynamic references.
var postSchema = new Schema({
name: String,
targets: [{
kind: String,
item: { type: ObjectId, refPath: 'targets.kind' }
}]
});
I am using the targets property to store references to multiple different models, users, thread, attachments, etc.
Is it possible to populate only the references that I want?
Post.find({}).populate({
// Does not work
// match: { 'targets.kind': 'Thread' }, // I want to populate only the references that match. ex: Thread, User, Attachment
path: 'targets.item',
model: 'targets.kind',
select: '_id title',
});
Thanks
The one big lesson here should be that mongoose.set('debug', true) is your new "best friend". This will show the actual queries issued to MongoDB from the code you are writing, and it's very important because when you actually "see it", then it clears up any misconceptions you likely have.
The Logic Problem
Let's demonstrate why exactly what you are attempting fails:
const { Schema } = mongoose = require('mongoose');
const uri = 'mongodb://localhost:27017/polypop';
mongoose.set('debug', true);
mongoose.Promise = global.Promise;
const postSchema = new Schema({
name: String,
targets: [{
kind: String,
item: { type: Schema.Types.ObjectId, refPath: 'targets.kind' }
}]
});
const fooSchema = new Schema({
name: String
})
const barSchema = new Schema({
number: Number
});
const Post = mongoose.model('Post', postSchema);
const Foo = mongoose.model('Foo', fooSchema);
const Bar = mongoose.model('Bar', barSchema);
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
try {
const conn = await mongoose.connect(uri, { useNewUrlParser: true });
// Clean all data
await Promise.all(
Object.entries(conn.models).map(([k,m]) => m.deleteMany())
);
// Create some things
let [foo, bar] = await Promise.all(
[{ _t: 'Foo', name: 'Bill' }, { _t: 'Bar', number: 1 }]
.map(({ _t, ...d }) => mongoose.model(_t).create(d))
);
log([foo, bar]);
// Add a Post
let post = await Post.create({
name: 'My Post',
targets: [{ kind: 'Foo', item: foo }, { kind: 'Bar', item: bar }]
});
log(post);
let found = await Post.findOne();
log(found);
let result = await Post.findOne()
.populate({
match: { 'targets.kind': 'Foo' }, // here is the problem!
path: 'targets.item',
});
log(result);
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})()
So the comment there show the match is the problem with the logic, so let's look at the debug output and see why:
Mongoose: posts.deleteMany({}, {})
Mongoose: foos.deleteMany({}, {})
Mongoose: bars.deleteMany({}, {})
Mongoose: foos.insertOne({ _id: ObjectId("5bdbc70996ed8e3295b384a0"), name: 'Bill', __v: 0 })
Mongoose: bars.insertOne({ _id: ObjectId("5bdbc70996ed8e3295b384a1"), number: 1, __v: 0 })
[
{
"_id": "5bdbc70996ed8e3295b384a0",
"name": "Bill",
"__v": 0
},
{
"_id": "5bdbc70996ed8e3295b384a1",
"number": 1,
"__v": 0
}
]
Mongoose: posts.insertOne({ _id: ObjectId("5bdbc70996ed8e3295b384a2"), name: 'My Post', targets: [ { _id: ObjectId("5bdbc70996ed8e3295b384a4"), kind: 'Foo', item: ObjectId("5bdbc70996ed8e3295b384a0") }, { _id: ObjectId("5bdbc70996ed8e3295b384a3"), kind: 'Bar', item: ObjectId("5bdbc70996ed8e3295b384a1") } ], __v: 0 })
{
"_id": "5bdbc70996ed8e3295b384a2",
"name": "My Post",
"targets": [
{
"_id": "5bdbc70996ed8e3295b384a4",
"kind": "Foo",
"item": {
"_id": "5bdbc70996ed8e3295b384a0",
"name": "Bill",
"__v": 0
}
},
{
"_id": "5bdbc70996ed8e3295b384a3",
"kind": "Bar",
"item": {
"_id": "5bdbc70996ed8e3295b384a1",
"number": 1,
"__v": 0
}
}
],
"__v": 0
}
Mongoose: posts.findOne({}, { projection: {} })
{
"_id": "5bdbc70996ed8e3295b384a2",
"name": "My Post",
"targets": [
{
"_id": "5bdbc70996ed8e3295b384a4",
"kind": "Foo",
"item": "5bdbc70996ed8e3295b384a0"
},
{
"_id": "5bdbc70996ed8e3295b384a3",
"kind": "Bar",
"item": "5bdbc70996ed8e3295b384a1"
}
],
"__v": 0
}
Mongoose: posts.findOne({}, { projection: {} })
Mongoose: bars.find({ 'targets.kind': 'Foo', _id: { '$in': [ ObjectId("5bdbc70996ed8e3295b384a1") ] } }, { projection: {} })
Mongoose: foos.find({ 'targets.kind': 'Foo', _id: { '$in': [ ObjectId("5bdbc70996ed8e3295b384a0") ] } }, { projection: {} })
{
"_id": "5bdbc70996ed8e3295b384a2",
"name": "My Post",
"targets": [
{
"_id": "5bdbc70996ed8e3295b384a4",
"kind": "Foo",
"item": null
},
{
"_id": "5bdbc70996ed8e3295b384a3",
"kind": "Bar",
"item": null
}
],
"__v": 0
}
That's the full output to show that everything else is actually working, and in fact without the match you would get the populated data back for the items. But take a close look at the two queries being issued to the foo and bar collections:
Mongoose: bars.find({ 'targets.kind': 'Foo', _id: { '$in': [ ObjectId("5bdbc70996ed8e3295b384a1") ] } }, { projection: {} })
Mongoose: foos.find({ 'targets.kind': 'Foo', _id: { '$in': [ ObjectId("5bdbc70996ed8e3295b384a0") ] } }, { projection: {} })
So the 'targets.kind' that you include under match is actually being searched for on the foo and bar collections, and not in the posts collection as you seem to be expecting. Along with the rest of the output this should be giving you an idea of how populate() actually works, in that nothing ever says to specifically just return the "array entries" which are of kind: 'Foo' as the example goes.
This process of "filtering the array" actually isn't "really" even a natural MongoDB query, and with the exception of the "first and singular match" you actually would typically use .aggregate() and the $filter operator. You can get "singular" via the positional $ operator but if you wanted "all foos" where there was more than one, then it needs the $filter instead.
So the real core issue here is populate() is actually the wrong place and wrong operation to "filter the array". Instead you really want to "smartly" return only the array entries you want before you go doing anything else to "populate" the items.
Structural Problem
Noting from the listing above which is an allegory for what is hinted at in the question, there are "multiple models" being referred to in order to "join" and obtain the overall result. Whilst this may seem logical in "RDBMS land", it's certainly not the case nor practical or efficient to do so with MongoDB and the general "ilk" of "document databases".
The key thing to remember here is that "documents" in a "collection" need not all have the same "table structure" as you would with an RDBMS. The structure can vary, and whilst it's probably advisable to not "vary wildly", it's certainly very valid to store "polymorphic objects" within a single collection. Afterall, you actually want to reference all of these things back to the same parent, so why would they need to be in different collections? Simply put, they don't need to be at all:
const { Schema } = mongoose = require('mongoose');
const uri = 'mongodb://localhost:27017/polypop';
mongoose.set('debug', true);
mongoose.Promise = global.Promise;
const postSchema = new Schema({
name: String,
targets: [{
kind: String,
item: { type: Schema.Types.ObjectId, ref: 'Target' }
}]
});
const targetSchema = new Schema({});
const fooSchema = new Schema({
name: String
});
const barSchema = new Schema({
number: Number
});
const bazSchema = new Schema({
title: String
});
const log = data => console.log(JSON.stringify(data, undefined, 2));
const Post = mongoose.model('Post', postSchema);
const Target = mongoose.model('Target', targetSchema);
const Foo = Target.discriminator('Foo', fooSchema);
const Bar = Target.discriminator('Bar', barSchema);
const Baz = Target.discriminator('Baz', bazSchema);
(async function() {
try {
const conn = await mongoose.connect(uri,{ useNewUrlParser: true });
// Clean data - bit hacky but just a demo
await Promise.all(
Object.entries(conn.models).map(([k, m]) => m.deleteMany() )
);
// Insert some things
let [foo1, bar, baz, foo2] = await Promise.all(
[
{ _t: 'Foo', name: 'Bill' },
{ _t: 'Bar', number: 1 },
{ _t: 'Baz', title: 'Title' },
{ _t: 'Foo', name: 'Ted' }
].map(({ _t, ...d }) => mongoose.model(_t).create(d))
);
log([foo1, bar, baz, foo2]);
// Add a Post
let post = await Post.create({
name: 'My Post',
targets: [
{ kind: 'Foo', item: foo1 },
{ kind: 'Bar', item: bar },
{ kind: 'Baz', item: baz },
{ kind: 'Foo', item: foo2 }
]
});
log(post);
let found = await Post.findOne();
log(found);
let result1 = await Post.findOne()
.populate({
path: 'targets.item',
match: { __t: 'Foo' }
});
log(result1);
let result2 = await Post.aggregate([
// Only get documents with a matching entry
{ "$match": {
"targets.kind": "Foo"
}},
// Optionally filter the array
{ "$addFields": {
"targets": {
"$filter": {
"input": "$targets",
"cond": {
"$eq": [ "$$this.kind", "Foo" ]
}
}
}
}},
// Lookup from single source
{ "$lookup": {
"from": Target.collection.name,
"localField": "targets.item",
"foreignField": "_id",
"as": "matches"
}},
// Marry up arrays
{ "$project": {
"name": 1,
"targets": {
"$map": {
"input": "$targets",
"in": {
"kind": "$$this.kind",
"item": {
"$arrayElemAt": [
"$matches",
{ "$indexOfArray": [ "$matches._id", "$$this.item" ] }
]
}
}
}
}
}}
]);
log(result2);
let result3 = await Post.aggregate([
// Only get documents with a matching entry
{ "$match": {
"targets.kind": "Foo"
}},
// Optionally filter the array
{ "$addFields": {
"targets": {
"$filter": {
"input": "$targets",
"cond": {
"$eq": [ "$$this.kind", "Foo" ]
}
}
}
}},
// Lookup from single source with overkill of type check
{ "$lookup": {
"from": Target.collection.name,
"let": { "targets": "$targets" },
"pipeline": [
{ "$match": {
"$expr": {
"$in": [ "$_id", "$$targets.item" ]
},
"__t": "Foo"
}}
],
"as": "matches"
}},
// Marry up arrays
{ "$project": {
"name": 1,
"targets": {
"$map": {
"input": "$targets",
"in": {
"kind": "$$this.kind",
"item": {
"$arrayElemAt": [
"$matches",
{ "$indexOfArray": [ "$matches._id", "$$this.item" ] }
]
}
}
}
}
}}
]);
console.log(result3);
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})()
That's a bit longer and has a few more concepts to get around, but the basic principle is that instead of using "multiple collections" for the different types we're only going to use one. The "mongoose" method for this uses "discriminators" in the model setup which is all relevant to this part of the code:
const Post = mongoose.model('Post', postSchema);
const Target = mongoose.model('Target', targetSchema);
const Foo = Target.discriminator('Foo', fooSchema);
const Bar = Target.discriminator('Bar', barSchema);
const Baz = Target.discriminator('Baz', bazSchema);
Which is really simply calling .discriminator() from a "base model" for the "singular" collection rather than calling mongoose.model(). The really good thing about this is as far as the rest of your code is concerned, Baz and Bar etc are all just treated like a "model" transparently, but they are actually doing something really cool underneath.
So all of these "related things" ( they really are even if you don't think so yet ) are all actually kept in the same collection, but operations using the individual models take into account an "automatic" kind key. This is __t by default, but you can actually specify whatever you want in options.
The fact that these are all actually in the same collection though is really important, since you can basically readily query the same collection for the different types of data. Put simply:
Foo.find({})
Would actually call
targets.find({ __t: 'Foo' })
And does this automatically. But more importantly
Target.find({ __t: { "$in": [ 'Foo', 'Baz' ] } })
Would be returning all the expected results from a "single collection" with a "single request".
So taking a look at the revised populate() under this structure:
let result1 = await Post.findOne()
.populate({
path: 'targets.item',
match: { __t: 'Foo' }
});
log(result1);
This shows instead in the logs:
Mongoose: posts.findOne({}, { projection: {} })
Mongoose: targets.find({ __t: 'Foo', _id: { '$in': [ ObjectId("5bdbe2895b1b843fba050569"), ObjectId("5bdbe2895b1b843fba05056a"), ObjectId("5bdbe2895b1b843fba05056b"), ObjectId("5bdbe2895b1b843fba05056c") ] } }, { projection: {} })
Note how even though all "four" of the related ObjectId values are sent with the request the additional constraint of __t: 'Foo' also binds which document are actually returned and married up. The result then becomes self evident as only the 'Foo' entries populated. But also note the "catch":
{
"_id": "5bdbe2895b1b843fba05056d",
"name": "My Post",
"targets": [
{
"_id": "5bdbe2895b1b843fba050571",
"kind": "Foo",
"item": {
"__t": "Foo",
"_id": "5bdbe2895b1b843fba050569",
"name": "Bill",
"__v": 0
}
},
{
"_id": "5bdbe2895b1b843fba050570",
"kind": "Bar",
"item": null
},
{
"_id": "5bdbe2895b1b843fba05056f",
"kind": "Baz",
"item": null
},
{
"_id": "5bdbe2895b1b843fba05056e",
"kind": "Foo",
"item": {
"__t": "Foo",
"_id": "5bdbe2895b1b843fba05056c",
"name": "Ted",
"__v": 0
}
}
],
"__v": 0
}
Filtering after Populate
This is actually a longer topic and more fully answered elsewhere, but the basics here as shown in the output above is that populate() really still does absolutely nothing about actually "filtering" the results in the array to only the desired matches.
The other thing is that populate() really isn't that great an idea from a "performance" perspective, since what is really happening is "another query" ( in our second form we optimized to just one more ) or possibly "many queries" depending on your structure are actually being issued to the database and the results are being reconstructed together on the client.
Overall, you end up returning a lot more data than you actually need and at best you are relying on manual client side filtering in order to discard those unwanted results. So the "ideal" position is to have the "server" do that sort of thing instead, and only return the data you actually need.
The populate() method was added as a "convenience" to the mongoose API a "very" long time ago. Since then MongoDB has moved on and now sports $lookup as a "native" way for performing a "join" on the server with a single request.
There are different ways to do this but just touching on "two" closely related to the existing populate() functionality but with improvements:
let result2 = await Post.aggregate([
// Only get documents with a matching entry
{ "$match": {
"targets.kind": "Foo"
}},
// Optionally filter the array
{ "$addFields": {
"targets": {
"$filter": {
"input": "$targets",
"cond": {
"$eq": [ "$$this.kind", "Foo" ]
}
}
}
}},
// Lookup from single source
{ "$lookup": {
"from": Target.collection.name,
"localField": "targets.item",
"foreignField": "_id",
"as": "matches"
}},
// Marry up arrays
{ "$project": {
"name": 1,
"targets": {
"$map": {
"input": "$targets",
"in": {
"kind": "$$this.kind",
"item": {
"$arrayElemAt": [
"$matches",
{ "$indexOfArray": [ "$matches._id", "$$this.item" ] }
]
}
}
}
}
}}
]);
log(result2);
The two basic "optimizations" there are using $filter in order to "pre-discard" items from the array which do not actually match the type we want. This can be totally optional as covered with a bit more detail later, but where possible then it's probably a good thing to do since we won't even be looking for matching _id values in the foreign collection for anything but 'Foo' things.
The other of course is the $lookup itself, which means instead of a separate round trip to the server we actually just make one and the "join" is done before any response is returned. Here we just look for the matching _id values in the foreign collection to the target.items array entry values. We already filtered those for 'Foo', so that is all that gets returned:
{
"_id": "5bdbe6aa2c4a2240c16802e2",
"name": "My Post",
"targets": [
{
"kind": "Foo",
"item": {
"_id": "5bdbe6aa2c4a2240c16802de",
"__t": "Foo",
"name": "Bill",
"__v": 0
}
},
{
"kind": "Foo",
"item": {
"_id": "5bdbe6aa2c4a2240c16802e1",
"__t": "Foo",
"name": "Ted",
"__v": 0
}
}
]
}
For a "slight" variation on that we can actually even inspect the __t value within the $lookup expression using "sub-pipeline" processing with MongoDB 3.6 and greater. The main use case here would be if you choose to remove the kind from the parent Post altogether and simply rely on the "kind" information inherent to discriminator references used in storage:
let result3 = await Post.aggregate([
// Only get documnents with a matching entry
{ "$match": {
"targets.kind": "Foo"
}},
// Optionally filter the array
{ "$addFields": {
"targets": {
"$filter": {
"input": "$targets",
"cond": {
"$eq": [ "$$this.kind", "Foo" ]
}
}
}
}},
// Lookup from single source with overkill of type check
{ "$lookup": {
"from": Target.collection.name,
"let": { "targets": "$targets" },
"pipeline": [
{ "$match": {
"$expr": {
"$in": [ "$_id", "$$targets.item" ]
},
"__t": "Foo"
}}
],
"as": "matches"
}},
// Marry up arrays
{ "$project": {
"name": 1,
"targets": {
"$map": {
"input": "$targets",
"in": {
"kind": "$$this.kind",
"item": {
"$arrayElemAt": [
"$matches",
{ "$indexOfArray": [ "$matches._id", "$$this.item" ] }
]
}
}
}
}
}}
]);
log(result3);
This has the same "filtered" results and is similarly an "single request" and "single response".
The whole topic gets a bit wider, and even though aggregation pipelines may appear considerably more unwieldy than a simple populate() call, it's fairly trivial to write a wrapper which can abstract from your models and pretty much generate most of the data structure code required. You can see an overview of this in action at "Querying after populate in Mongoose", which in essence is the same question you are basically asking here once we sort out the initial issue of "multiple collection joins" and why you really don't need them.
The over caveat here is that $lookup actually has no way possible to "dynamically" determine which collection to "join" to. You need to include that information statically just as is done here, so this is another reason to actually favor "discriminators" over using multiple collections. It's not only "better performance", but it's actually the only way the most performant options will actually support what you are trying to do.
For reference, the "complete" (truncated due to max post length) output of the second listing would be:
Mongoose: posts.deleteMany({}, {})
Mongoose: targets.deleteMany({}, {})
Mongoose: targets.deleteMany({}, {})
Mongoose: targets.deleteMany({}, {})
Mongoose: targets.deleteMany({}, {})
Mongoose: targets.insertOne({ _id: ObjectId("5bdbe2895b1b843fba050569"), __t: 'Foo', name: 'Bill', __v: 0 })
Mongoose: targets.insertOne({ _id: ObjectId("5bdbe2895b1b843fba05056a"), __t: 'Bar', number: 1, __v: 0 })
Mongoose: targets.insertOne({ _id: ObjectId("5bdbe2895b1b843fba05056b"), __t: 'Baz', title: 'Title', __v: 0 })
Mongoose: targets.insertOne({ _id: ObjectId("5bdbe2895b1b843fba05056c"), __t: 'Foo', name: 'Ted', __v: 0 })
[
{
"_id": "5bdbe2895b1b843fba050569",
"__t": "Foo",
"name": "Bill",
"__v": 0
},
{
"_id": "5bdbe2895b1b843fba05056a",
"__t": "Bar",
"number": 1,
"__v": 0
},
{
"_id": "5bdbe2895b1b843fba05056b",
"__t": "Baz",
"title": "Title",
"__v": 0
},
{
"_id": "5bdbe2895b1b843fba05056c",
"__t": "Foo",
"name": "Ted",
"__v": 0
}
]
Mongoose: posts.insertOne({ _id: ObjectId("5bdbe2895b1b843fba05056d"), name: 'My Post', targets: [ { _id: ObjectId("5bdbe2895b1b843fba050571"), kind: 'Foo', item: ObjectId("5bdbe2895b1b843fba050569") }, { _id: ObjectId("5bdbe2895b1b843fba050570"), kind: 'Bar', item: ObjectId("5bdbe2895b1b843fba05056a") }, { _id: ObjectId("5bdbe2895b1b843fba05056f"), kind: 'Baz', item: ObjectId("5bdbe2895b1b843fba05056b") }, { _id: ObjectId("5bdbe2895b1b843fba05056e"), kind: 'Foo', item: ObjectId("5bdbe2895b1b843fba05056c") } ], __v: 0 })
{
"_id": "5bdbe2895b1b843fba05056d",
"name": "My Post",
"targets": [
{
"_id": "5bdbe2895b1b843fba050571",
"kind": "Foo",
"item": {
"_id": "5bdbe2895b1b843fba050569",
"__t": "Foo",
"name": "Bill",
"__v": 0
}
},
{
"_id": "5bdbe2895b1b843fba050570",
"kind": "Bar",
"item": {
"_id": "5bdbe2895b1b843fba05056a",
"__t": "Bar",
"number": 1,
"__v": 0
}
},
{
"_id": "5bdbe2895b1b843fba05056f",
"kind": "Baz",
"item": {
"_id": "5bdbe2895b1b843fba05056b",
"__t": "Baz",
"title": "Title",
"__v": 0
}
},
{
"_id": "5bdbe2895b1b843fba05056e",
"kind": "Foo",
"item": {
"_id": "5bdbe2895b1b843fba05056c",
"__t": "Foo",
"name": "Ted",
"__v": 0
}
}
],
"__v": 0
}
Mongoose: posts.findOne({}, { projection: {} })
{
"_id": "5bdbe2895b1b843fba05056d",
"name": "My Post",
"targets": [
{
"_id": "5bdbe2895b1b843fba050571",
"kind": "Foo",
"item": "5bdbe2895b1b843fba050569"
},
{
"_id": "5bdbe2895b1b843fba050570",
"kind": "Bar",
"item": "5bdbe2895b1b843fba05056a"
},
{
"_id": "5bdbe2895b1b843fba05056f",
"kind": "Baz",
"item": "5bdbe2895b1b843fba05056b"
},
{
"_id": "5bdbe2895b1b843fba05056e",
"kind": "Foo",
"item": "5bdbe2895b1b843fba05056c"
}
],
"__v": 0
}
Mongoose: posts.findOne({}, { projection: {} })
Mongoose: targets.find({ __t: 'Foo', _id: { '$in': [ ObjectId("5bdbe2895b1b843fba050569"), ObjectId("5bdbe2895b1b843fba05056a"), ObjectId("5bdbe2895b1b843fba05056b"), ObjectId("5bdbe2895b1b843fba05056c") ] } }, { projection: {} })
{
"_id": "5bdbe2895b1b843fba05056d",
"name": "My Post",
"targets": [
{
"_id": "5bdbe2895b1b843fba050571",
"kind": "Foo",
"item": {
"__t": "Foo",
"_id": "5bdbe2895b1b843fba050569",
"name": "Bill",
"__v": 0
}
},
{
"_id": "5bdbe2895b1b843fba050570",
"kind": "Bar",
"item": null
},
{
"_id": "5bdbe2895b1b843fba05056f",
"kind": "Baz",
"item": null
},
{
"_id": "5bdbe2895b1b843fba05056e",
"kind": "Foo",
"item": {
"__t": "Foo",
"_id": "5bdbe2895b1b843fba05056c",
"name": "Ted",
"__v": 0
}
}
],
"__v": 0
}
Mongoose: posts.aggregate([ { '$match': { 'targets.kind': 'Foo' } }, { '$addFields': { targets: { '$filter': { input: '$targets', cond: { '$eq': [ '$$this.kind', 'Foo' ] } } } } }, { '$lookup': { from: 'targets', localField: 'targets.item', foreignField: '_id', as: 'matches' } }, { '$project': { name: 1, targets: { '$map': { input: '$targets', in: { kind: '$$this.kind', item: { '$arrayElemAt': [ '$matches', { '$indexOfArray': [ '$matches._id', '$$this.item' ] } ] } } } } } } ], {})
[
{
"_id": "5bdbe2895b1b843fba05056d",
"name": "My Post",
"targets": [
{
"kind": "Foo",
"item": {
"_id": "5bdbe2895b1b843fba050569",
"__t": "Foo",
"name": "Bill",
"__v": 0
}
},
{
"kind": "Foo",
"item": {
"_id": "5bdbe2895b1b843fba05056c",
"__t": "Foo",
"name": "Ted",
"__v": 0
}
}
]
}
]
Mongoose: posts.aggregate([ { '$match': { 'targets.kind': 'Foo' } }, { '$addFields': { targets: { '$filter': { input: '$targets', cond: { '$eq': [ '$$this.kind', 'Foo' ] } } } } }, { '$lookup': { from: 'targets', let: { targets: '$targets' }, pipeline: [ { '$match': { '$expr': { '$in': [ '$_id', '$$targets.item' ] }, __t: 'Foo' } } ], as: 'matches' } }, { '$project': { name: 1, targets: { '$map': { input: '$targets', in: { kind: '$$this.kind', item: { '$arrayElemAt': [ '$matches', { '$indexOfArray': [ '$matches._id', '$$this.item' ] } ] } } } } } } ], {})

How to resolve the many-to-many relation keeping the order of ID array in mongoDB

I have two collections posts and tags on mongoDB.
There is a many-to-many relationship between these collections.
A post can belong to some tags, and a tag can contain some posts.
I am looking for an efficient query method to join posts to tags keeping the order of postIds.
If the data schema is inappropriate, I can change it.
The mongoDB version is 3.6.5
Sample data
db.posts.insertMany([
{ _id: 'post001', title: 'this is post001' },
{ _id: 'post002', title: 'this is post002' },
{ _id: 'post003', title: 'this is post003' }
])
db.tags.insertMany([
{ _id: 'tag001', postIds: ['post003', 'post001', 'post002'] }
])
Desired result
{
"_id": "tag001",
"postIds": [ "post003", "post001", "post002" ],
"posts": [
{ "_id": "post003", "title": "this is post003" },
{ "_id": "post001", "title": "this is post001" },
{ "_id": "post002", "title": "this is post002" }
]
}
What I tried
I tried a query which use $lookup.
db.tags.aggregate([
{ $lookup: {
from: 'posts',
localField: 'postIds',
foreignField: '_id',
as: 'posts'
}}
])
However I got a result which is different from I want.
{
"_id": "tag001",
"postIds": [ "post003", "post001", "post002" ],
"posts": [
{ "_id": "post001", "title": "this is post001" },
{ "_id": "post002", "title": "this is post002" },
{ "_id": "post003", "title": "this is post003" }
]
}
In MongoDB you would attempt to model your data such that you avoid joins (as in $lookups) alltogether, e.g. by storing the tags alongside the posts.
db.posts.insertMany([
{ _id: 'post001', title: 'this is post001', tags: [ "tag001", "tag002" ] },
{ _id: 'post002', title: 'this is post002', tags: [ "tag001" ] },
{ _id: 'post003', title: 'this is post003', tags: [ "tag002" ] }
])
With this structure in place you could get the desired result like this:
db.posts.aggregate([{
$unwind: "$tags"
}, {
$group: {
_id: "$tags",
postsIds: {
$push: "$_id"
},
posts: {
$push: "$$ROOT"
}
}
}])
In this case, I would doubt that you even need the postIds field in the result as it would be contained in the posts array anyway.
You can use a combination of $map and $filter to re-order elements in the posts array in a projection stage:
db.tags.aggregate([
{ $lookup: {
from: 'posts',
localField: 'postIds',
foreignField: '_id',
as: 'posts'
} },
{ $project: {
_id: 1,
postIds: 1,
posts: { $map: {
input: "$postIds",
as: "postId",
in: {
$arrayElemAt: [ { $filter: {
input: "$posts",
as: "post",
cond: { $eq: ["$$post._id", "$$postId"] }
} }, 0 ]
}
} }
} }
])
The missing posts will be filled with null to keep index consistent with postIds.