MongoDB multiple $lookup and $group output - mongodb

I'm quite a newbie with MongoDB and I'm trying to retrieve a kind-of leaderboard based on two related collections and a third one, referencing one of the two, based on its different property.
Schema can be found here
Consider a schema like the following one:
tree: { _id, company_id: string, company_name }
link: { _id, company_id: string, url: string }
analytics: { _id, tree_id: string, link_id: string, views: number, clicks: number, date: string }
A analytics document can have tree_id, views or link_id, clicks at once.
What I'm trying to achieve right now is a kind-of a "leaderboard" of the total clicks + views, starting from analytics collection, joining it with both tree and link, and finally retrieving the sum of clicks and views.
I have already managed to retrieve the sum of them for a specific company_id, with the following code
db.analytics.aggregate([{
$lookup: {
from: "trees",
as: "trees",
localField: "tree_id",
foreignField: "_id"
}
}, {
$lookup: {
from: "links",
as: "links",
localField: "link_id",
foreignField: "_id"
}
}, {
$match: {
$or: [
{"trees.company_id": "1"},
{"links.company_id": "1"}
]
}
}, {
$group: {
_id: null,
views_count: {
$sum: "$views"
},
clicks_count: {
$sum: "$clicks"
}
}
}])
But I can't find a way to get a list of results like
{ company_id: 1, company_name: "foo", clicks: 100, views: 200 },
{ company_id: 2, company_name: "bar", clicks: 200, views: 200 }
and so on.
What I've tried so far is grouping by different _id, which is not working as I would expect
db.analytics.aggregate([{
$lookup: {
from: "trees",
as: "trees",
localField: "tree_id",
foreignField: "_id"
}
}, {
$lookup: {
from: "links",
as: "links",
localField: "link_id",
foreignField: "_id"
}
}, {
$group: {
_id: "$trees.company_id",
views_count: {
$sum: "$views"
},
clicks_count: {
$sum: "$clicks"
}
}
}])
Which does not assign clicks_count to a specific entry, but outputs something like
{ "_id" : [ "1" ], "views_count" : 6, "clicks_count" : 0 }
{ "_id" : [ ], "views_count" : 0, "clicks_count" : 48 }
{ "_id" : [ "2" ], "views_count" : 10, "clicks_count" : 0 }
I'm not even sure that this schema could be the best solution, so I will also appreciate any design suggestions or similar stuff.
Based on the comment below, I tried to deconstruct trees before grouping results, but it ended outputting the company_id, views_count only, without counting clicks, as following
{ "_id" : "2", "views_count" : 10, "clicks_count" : 0 }
{ "_id" : "1", "views_count" : 6, "clicks_count" : 0 }

$addFields to add company field, check condition if trees.company_id not empty [] then return trees otherwise return links
$arrayElemAt to get first element from array
$group by company_id and sum your counts
db.analytics.aggregate([
{ $lookup: { //... } },
{ $lookup: { //... } },
{
$addFields: {
company: {
$arrayElemAt: [
{ $cond: [{ $ne: ["$trees.company_id", []] }, "$trees", "$links"] },
0
]
}
}
},
{
$group: {
_id: "$company.company_id",
company_name: { $first: "$company.company_name" },
views_count: { $sum: "$views" },
clicks_count: { $sum: "$clicks" }
}
}
])
Playground

Related

MongoDB lookup when foreign field is an array

I've searched the internet and StackOverflow, but I cannot find the answer or even the question.
I have two collections, reports and users. I want my query to return all reports and indicate if the specified user has that report as a favorite in their array.
Reports Collection
{ _id: 1, name:"Report One"}
{ _id: 2, name:"Report Two"}
{ _id: 3, name:"Report Three"}
Users Collection
{_id: 1, name:"Mike", favorites: [1,3]}
{_id: 2, name:"Tim", favorites: [2,3]}
Desired Result for users.name="Mike"
{ _id: 1, name:"Report One", favorite: true}
{ _id: 2, name:"Report Two", favorite: false}
{ _id: 3, name:"Report Three", favorite: true}
All of the answers I can find use $unwind on the local (reports) field, but in this case the local field isn't an array. The foreign field is the array.
How can I unwind the foreign field? Is there a better way to do this?
I saw online that someone suggested making another collection favorites that would contain:
{ _id: 1, userId: 1, reportId: 1 }
{ _id: 2, userId: 1, reportId: 3 }
{ _id: 3, userId: 2, reportId: 2 }
{ _id: 4, userId: 2, reportId: 3 }
This method seems like it should be unnessesary. It should be simple to join onto an ID in a foreign array, right?
You can use $lookup with custom pipeline which will give you 0 or 1 result and then use $size to convert an array to single boolean value:
db.reports.aggregate([
{
$lookup: {
from: "users",
let: { report_id: "$_id" },
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: [ "$name", "Mike" ] },
{ $in: [ "$$report_id", "$favorites" ] }
]
}
}
}
],
as: "users"
}
},
{
$project: {
_id: 1,
name: 1,
favorite: { $eq: [ { $size: "$users" }, 1 ] }
}
}
])
Alternatively if you need to use MongoDB version lower than 3.6 you can use regular $lookup and then use $filter to get only those users where name is Mike:
db.reports.aggregate([
{
$lookup: {
from: "users",
localField: "_id",
foreignField: "favorites",
as: "users"
}
},
{
$project: {
_id: 1,
name: 1,
favorite: { $eq: [ { $size: { $filter: { input: "$users", as: "u", cond: { $eq: [ "$$u.name", "Mike" ] } } } }, 1 ] }
}
}
])
"_id" : ObjectId("611fc392cfadfbba65d4f4bd"),
"t_name" : "Bahadur",
"t_age" : "22",
"trch" : "java",
"StudentsDetails" : [
{
"_id" : ObjectId("611fc41ccfadfbba65d4f4be"),
"s_name" : "Asin",
"s_age" : "18",
"trch" : "java",
"tsid" : ObjectId("611fc392cfadfbba65d4f4bd")
},
{
"_id" : ObjectId("611fc8f1a815fb2c737ae31f"),
"s_name" : "sonu",
"s_age" : "18",
"tsid" : ObjectId("611fc392cfadfbba65d4f4bd")
},
{
"_id" : ObjectId("611fc915a815fb2c737ae320"),
"s_name" : "monu",
"s_age" : "19",
"tsid" : ObjectId("611fc392cfadfbba65d4f4bd")
}
]
}
Create Trainer Collection
Create Scholar Collection
//query
db.Trainer.aggregate(
[`enter code here`
{`enter code here`
$lookup:`enter code here`
{`enter code here`
from: "scholar",`enter code here`
localField: "_id",`enter code here`
foreignField: "tsid",`enter code here`
as: "StudentsDetails"`enter code here`
}`enter code here`
}`enter code here`
]`enter code here`
).pretty();

Issues with merging arrays of objects in MoongoDb

I trying to build an aggregation quarry in MoongoDb that will merge arrays from
2 different collection (one of the collections is of type TTL). And I facing with 2 issues that I can’t resolve.
First Issue:
I would like to merge the TakenSeats fields of my temp collations and permanent collection and set the result instead of my correct TakenSeats field, Using my aggregation in the bottom i manage to merge the arrays with the $push operator, But I cant replace the result field with the TakenSeats field that is in my permanent document.
Second Issue:
In case that I don’t have any documents in my temp collection, how can I still receive the document from the permanent one?
Sample of document in the permanent collection: (extracting data from one document)
{
"_id" : ObjectId("5b6b656818883ec018d1542d"),
"showsHall" : [
ObjectId("5b64cb758ad5f81a6cb7e6ae")
],
"movie" : [
ObjectId("5b6b614218883ec018d15428")
],
"takenSeats" : [
{
"id" : 11
},
{
"id" : 12
}
],
"showDate" : "8/14/2018",
"showStartTime" : "3:00 PM",
"showEndTime" : "5:00 PM",
"creteDate" : ISODate("2018-08-08T21:49:28.020Z"),
"__v" : 0
}
From the TTL collection: (extracting data from multiple documents)
{
"_id" : ObjectId("5b6f35023f64851baa70c61b"),
"createdAt" : ISODate("2018-08-11T19:12:02.951Z"),
"showId" : [
ObjectId("5b6b656818883ec018d1542d")
],
"takenSeats" : [
{
"id" : 22
},
{
"id" : 25
}
]
}
This is the aggregation that I used:
db.getCollection('shows').aggregate([
{ $match: { _id: ObjectId("5b6b656818883ec018d1542d") } },
{
$lookup: {
from: "temp",
localField: "_id",
foreignField: "showId",
as: "fromItems"
}
},
{ $unwind: "$fromItems" },
{ "$project": {"takenSeats": { "$setUnion": ["$takenSeats", "$fromItems.takenSeats"]}, _id: 1, showsHall: 1, movie: 1, takenSeats: 1 , showDate: 1, showStartTime: 1, showEndTime: 1 }},
{$unwind:"$takenSeats"},
{$group:{_id: "$_id", takenSeats: {$push : "$takenSeats"} }},
])
Result:
[Edit]
I manage to maintain my original data with $first operator.
But now i cant resolve issue no 2 (prevent result if null), I tried to use preserveNullAndEmptyArrays
in both of the unwind stages but the result is that it pushes an empty array.
My wanted result is that it should push to a new array only if there is values to push
This is my aggregation :
db.getCollection('shows').aggregate([
{ $match: { _id: ObjectId("5b6b656818883ec018d1542d") } },
{
$lookup: {
from: "temp",
localField: "_id",
foreignField: "showId",
as: "fromItems"
}
},
{ $unwind:{path:"$fromItems" ,preserveNullAndEmptyArrays:true}},
{ "$project": {"takenSeats": { "$setUnion": ["$takenSeats", "$fromItems.takenSeats"]}, _id: 1, showsHall: 1, movie: 1, showDate: 1, showStartTime: 1, showEndTime: 1 }},
{$unwind:{path:"$takenSeats" ,preserveNullAndEmptyArrays:true}},
,
{$group:{
_id: "$_id",
showsHall : { $first: '$showsHall' },
movie : { $first: '$movie' },
showDate : { $first: '$showDate' },
showStartTime : { $first: '$showStartTime' },
showEndTime : { $first: '$showEndTime' },
takenSeats: {$push : "$takenSeats"}
}
}
])
This is the result that i getting if there is no documents in the temp collection
{
"_id" : ObjectId("5b6b656818883ec018d1542d"),
"showsHall" : [
ObjectId("5b64cb758ad5f81a6cb7e6ae")
],
"movie" : [
ObjectId("5b6b614218883ec018d15428")
],
"showDate" : "8/14/2018",
"showStartTime" : "3:00 PM",
"showEndTime" : "5:00 PM",
"takenSeats" : [
null
]
}
Here Please add ifNull Condition for solution 2
db.getCollection('shows').aggregate([
{ $match: { _id: ObjectId("5b6b656818883ec018d1542d") } },
{
$lookup: {
from: "tempShows",
localField: "_id",
foreignField: "showId",
as: "fromItems"
}
},
{ $unwind:{path:"$fromItems" ,preserveNullAndEmptyArrays:true}},
{ "$project": {"takenSeats": { $ifNull: [{ "$setUnion": ["$takenSeats", "$fromItems.takenSeats"]}, '$takenSeats'] } ,_id: 1, showsHall: 1, movie: 1, showDate: 1, showStartTime: 1, showEndTime: 1 }},
{$unwind:{path:"$takenSeats" ,preserveNullAndEmptyArrays:true}},
{$group:{
_id: "$_id",
showsHall : { $first: '$showsHall' },
movie : { $first: '$movie' },
showDate : { $first: '$showDate' },
showStartTime : { $first: '$showStartTime' },
showEndTime : { $first: '$showEndTime' },
takenSeats: {$push : "$takenSeats"}
}
}
])

Mongodb - $group with $addToSet and then $lookup

I've got the following query
db.getCollection('transportations').aggregate(
{
$group: {
_id: null,
departure_city_id: { $addToSet: "$departure.city_id" },
departure_station_id: { $addToSet: "$departure.station_id" }
}
}
);
and the result is
{
"_id" : null,
"departure_city_id" : [
ObjectId("5a2f5378334c4442ab5a63ea"),
ObjectId("59dae1efe408157cc1585fea"),
ObjectId("5a5bbfdc35628410f9fdcde9")
],
"departure_station_id" : [
ObjectId("5a2f53d1334c4442ab5a63ee"),
ObjectId("5a2f53c5334c4442ab5a63ed"),
ObjectId("5a5bc13435628410f9fdcdea")
]
}
Now i want to lookup each departure_city_id with the collection "areas" to get the "name" of the area and each departure_station_id with the collection "stations" to get also the "name" of the station
The result could be something like this
{
"_id" : null,
"departure_city_id" : [
{
_id: ObjectId("5a2f5378334c4442ab5a63ea"),
name: "City 1
},
{
_id: ObjectId("59dae1efe408157cc1585fea"),
name: "City 2
},
{
_id: ObjectId("5a5bbfdc35628410f9fdcde9"),
name: "City 3
}
],
"departure_station_id" : [
{
_id: ObjectId("5a2f53d1334c4442ab5a63ee"),
name: "Station 1
},
{
_id: ObjectId("5a2f53c5334c4442ab5a63ed"),
name: "Station 2
},
{
_id: ObjectId("5a5bc13435628410f9fdcdea"),
name: "Station 3
}
]
}
The $lookup aggregation pipeline stage NOW works directly with an array (on 3.3.4 version).
See: lookup between local (multiple)array of values and foreign (single) value
The answer of the question is just:
db.getCollection('transportations').aggregate(
{
$group: {
_id: null,
departure_city_id: { $addToSet: "$departure.city_id" },
departure_station_id: { $addToSet: "$departure.station_id" }
}
},
{
$lookup: {
from: "areas",
localField: "departure_city_id",
foreignField: "_id",
as: "departure_city_id"
}
},
{
$lookup: {
from: "stations",
localField: "departure_station_id",
foreignField: "_id",
as: "departure_station_id"
}
}
)

Aggregate pipeline Match -> Lookup -> Unwind -> Match issue

I am puzzled as to why the code below doesn't work. Can anyone explain, please?
For some context: My goal is to get the score associated with an answer option for a survey database where answers are stored in a separate collection from the questions. The questions collection contains an array of answer options, and these answer options have a score.
Running this query:
db.answers.aggregate([
{
$match: {
userId: "abc",
questionId: ObjectId("598be01d4efd70a81c1c5ad4")
}
},
{
$lookup: {
from: "questions",
localField: "questionId",
foreignField: "_id",
as: "question"
}
},
{
$unwind: "$question"
},
{
$unwind: "$question.options"
},
{
$unwind: "$answers"
}
])
I get:
{
"_id" : ObjectId("598e588e0c5e24452c9ee769"),
"userId" : "abc",
"questionId" : ObjectId("598be01d4efd70a81c1c5ad4"),
"answers" : {
"id" : 20
},
"question" : {
"_id" : ObjectId("598be01d4efd70a81c1c5ad4"),
"options" : {
"id" : 10,
"score" : "12"
}
}
}
{
"_id" : ObjectId("598e588e0c5e24452c9ee769"),
"userId" : "abc",
"questionId" : ObjectId("598be01d4efd70a81c1c5ad4"),
"answers" : {
"id" : 20
},
"question" : {
"_id" : ObjectId("598be01d4efd70a81c1c5ad4"),
"options" : {
"id" : 20,
"score" : "4"
}
}
}
All great. If I now add to the original query a match that's supposed to find the answer option having the same id as the answer (e.g. questions.options.id == answers.id), things don't work as I would expect.
The final pipeline is:
db.answers.aggregate([
{
$match: {
userId: "abc",
questionId: ObjectId("598be01d4efd70a81c1c5ad4")
}
},
{
$lookup: {
from: "questions",
localField: "questionId",
foreignField: "_id",
as: "question"
}
},
{
$unwind: "$question"
},
{
$unwind: "$question.options"
},
{
$unwind: "$answers"
},
{
$match: {
"question.options.id": "$answers.id"
}
},
{
$project: {
_id: 0,
score: "$question.options.score"
}
}
])
This returns an empty result. But if I change the RHS of the $match from "$answers.id" to 20, it returns the expected score: 4. I tried everything I could think of, but couldn't get it to work and can't understand why it doesn't work.
I was able to get it to work with the following pipeline:
{
$match: {
userId: "abc",
questionId: ObjectId("598be01d4efd70a81c1c5ad4")
}
},
{
$lookup: {
from: "questions",
localField: "questionId",
foreignField: "_id",
as: "question"
}
},
{
$unwind: "$question"
},
{
$unwind: "$question.options"
},
{
$unwind: "$answers"
},
{
$addFields: {
areEqual: { $eq: [ "$question.options.id", "$answers.id" ] }
}
},
{
$match: {
areEqual: true
}
},
{
$project: {
_id: 0,
score: "$question.options.score"
}
}
I think the reason it didn't work with a direct match is the fact that questions.options.id doesn't actually reference the intended field... I needed to use $questions.options.id which wouldn't work as a LHS of a $match, hence the need to add an extra helper attribute.

MongoDB's aggregation from nested key returns nothing

(Edit : this question was edited to better reflect the issue, which might be a little more complicated than the proposed related question.)
Let's say I have these two collections
products
{
_id: 'AAAA',
components: [
{ type: 'foo', items: [
{ itemId: 'item1', qty: 2 },
{ itemId: 'item2', qty: 1 }
] },
{ type: 'bar', items: [
{ itemId: 'item3', qty: 8 }
] }
]
}
items
{
_id: 'item1',
name: 'Foo Item'
}
{
_id: 'item2',
name: 'Bar Item'
}
{
_id: 'item3',
name: 'Buz Item'
}
And that I perform this query
db['products'].aggregate([
{ $lookup: {
from: 'items',
localField: 'components.items.itemId',
foreignField: '_id',
as: 'componentItems'
} }
]);
I get this
{
_id: 'AAAA',
components: [
{ type: 'foo', items: [
{ itemId: 'item1', qty: 2 },
{ itemId: 'item2', qty: 1 }
] }
{ type: 'bar', items: [
{ itemId: 'item3', qty: 8 }
] }
],
componentItems: [ ]
}
Why doesn't the aggregation read the local field value? How can I retrieve the foreign document without losing my original document structure?
Edit
I have read the jira issue and seen the proposed answer, however I don't know how this applies. This is not merely an array, but values from an object, inside an array. I am not sure how I can unwind this, and how to put it back together without losing the document structure.
Edit 2
The problem that I have is that I'm not sure how to group the results back together. With this query :
db['products'].aggregate([
{ $unwind: '$components' },
{ $unwind: '$components.items' },
{ $lookup: {
from: 'items',
localField: 'components.items.itemId',
foreignField: '_id',
as: 'componentsItems'
} }
]);
I get the "correct" result of
{ "_id" : "AAAA", "components" : { "type" : "foo", "items" : { "itemId" : "item1", "qty" : 2 } }, "componentsItems" : [ { "_id" : "item1", "name" : "Foo Item" } ] }
{ "_id" : "AAAA", "components" : { "type" : "foo", "items" : { "itemId" : "item2", "qty" : 1 } }, "componentsItems" : [ { "_id" : "item2", "name" : "Bar Item" } ] }
{ "_id" : "AAAA", "components" : { "type" : "bar", "items" : { "itemId" : "item3", "qty" : 8 } }, "componentsItems" : [ { "_id" : "item3", "name" : "Buz Item" } ] }
But, while I can unwind components.items, I cannot seem to unto this, as $group complains that
"the group aggregate field name 'components.items' cannot be used because $group's field names cannot contain '.'"
db['products'].aggregate([
{ $unwind: '$components' },
{ $unwind: '$components.items' },
{ $lookup: {
from: 'items',
localField: 'components.items.itemId',
foreignField: '_id',
as: 'componentsItems'
} },
{ "$group": {
"components.type": "$components.type",
"components.items": { $push: "$components.items" },
"componentsItems": { $push: "$componentsItems" }
} },
{ "$group": {
"_id": "$_id",
"components": { $push: "$components" },
"componentsItems": { $push: "$componentsItems" }
} }
]);
Edit 3
This query is, thus far, the closest that I found, except that components are not grouped back by type.
db['products'].aggregate([
{ $unwind: '$components' },
{ $unwind: '$components.items' },
{ $lookup: {
from: 'items',
localField: 'components.items.itemId',
foreignField: '_id',
as: 'componentsItems'
} },
{ $unwind: '$componentsItems' },
{ $group: {
"_id": "$_id",
"components": {
$push: {
"type": "$components.type",
"items": "$components.items"
}
},
"componentsItems": { $addToSet: "$componentsItems" }
} }
]);
Also: I am concerned that using $unwind and $group may affect the order of the components, which should be preserved. AFAIK, MongoDB preserve array order when storing documents. I'd hate for this functionality to be broken by the awkwardness of $lookup.
Here is my long and awkward solution :
db['products'].aggregate([
// unwind all... because $lookup cannot work with multi-values
{ $unwind: '$components' },
{ $unwind: '$components.items' },
// lookup... This is a 1:1 relationship but who cares, right?
{ $lookup: {
from: 'items',
localField: 'components.items.itemId',
foreignField: '_id',
as: 'componentsItems'
} },
// our 1:1 relationship is now an array, so this is required
// before grouping, so we don't end up with array of arrays
{ $unwind: '$componentsItems' },
// Group 1: put "components.items" in a temporary array
// and filter duplicates from "componentsItems"
{ $group: {
"_id": {
"i": "$_id",
"t": "$components.type"
},
"items": {
$push: "$components.items"
},
"componentsItems": { $addToSet: "$componentsItems" }
} },
// undo $push...
{ $unwind: "$componentsItems" },
// Group 2: put everything back together
{ $group: {
"_id": "$_id.i",
"items": {
$push: {
"type": "$_id.t",
"items": "$items"
}
},
"componentsItems": { $push: "$componentsItems" }
} }
]);
Edit
A better solution :
db['products'].aggregate([
// Return document, added a collection of "itemId"
{ $project: {
"_id": 1,
"components": 1,
"componentItemId": "$components.items.itemId"
} },
// Since there was two arrays, the field is an array of arrays...
{ $unwind: "$componentItemId" },
{ $unwind: "$componentItemId" },
// make 1:1 lookup...
{ $lookup: {
from: 'items',
localField: 'componentItemId',
foreignField: '_id',
as: 'componentsItems'
} },
// ... extract the 1:1 reference...
{ $unwind: "$componentsItems" },
// group back, ignoring the "componentItemId" field
{ $group: {
"_id": "$_id",
"components": { $first: "$components" },
"componentItems": { $addToSet: "$componentsItems" }
}}
]);
I'm not sure if there is yet a better solution, and I am concerned about performance, but this seems to be the only solutions I can think of.
The downside is that documents cannot be dynamic, and this query will need to be modified whenever the schema changes.
Update
This seems to be resolved in MongoDB 3.3.4 (not release at the time of writing this answer).