Lookup and aggregate multiple levels of subdocument in Mongodb - mongodb

I've tried many answers to similar problems using $lookup, $unwind, and $match, but I can't get this to work for my sub-sub-subdocument situation.
I have this collection, Things:
{
"_id" : ObjectId("5a7241f7912cfc256468cb27"),
"name" : "Fortress of Solitude",
"alias" : "fortress_of_solitude",
},
{
"_id" : ObjectId("5a7247ec548c9ad042f579e2"),
"name" : "Batcave",
"alias" : "batcave",
},
{
"_id" : ObjectId("6a7247bc548c9ad042f579e8"),
"name" : "Oz",
"alias" : "oz",
},
and this one-document collection, Venues:
{
"_id" : ObjectId("5b9acabbbf71f39223f8de6e"),
"name" : "The Office",
"floors" : [
{
"name" : "1st Floor",
"places" : [
{
"name" : "Front Entrance",
"alias" : "front_entrance"
}
]
},
{
"name" : "2nd Floor",
"places" : [
{
"name" : "Batcave",
"alias" : "batcave"
},
{
"name" : "Oz",
"alias" : "oz"
}
]
}
]
}
I want to return all the Things, but with the Venue's floors.places.name aggregated with each Thing if it exists if the aliases match between Things and Venues. So, I want to return:
{
"_id" : ObjectId("5a7241f7912cfc256468cb27"),
"name" : "Fortress of Solitude",
"alias" : "fortress_of_solitude",
<-- nothing added here because
<-- it's not found in Venues
},
{
"_id" : ObjectId("5a7247ec548c9ad042f579e2"),
"name" : "Batcave",
"alias" : "batcave",
"floors" : [ <-- this should be
{ <-- returned
"places" : [ <-- because
{ <-- the alias
name" : "Batcave" <-- matches
} <-- in Venues
] <--
} <--
] <--
},
{
"_id" : ObjectId("6a7247bc548c9ad042f579e8"),
"name" : "Oz",
"alias" : "oz",
"floors" : [ <-- this should be
{ <-- returned
"places" : [ <-- because
{ <-- the alias
name" : "Oz" <-- matches
} <-- in Venues
] <--
} <--
] <--
}
I've gotten as far as the following query, but it only returns the entire Venues.floors array as an aggregate onto each Thing, which is way too much extraneous data aggregated. I just want to merge each relevant floor.place sub-subsubdocument from Venues into its corresponding Thing if it exists in Venues.
db.getCollection('things').aggregate([
{$lookup: {from: "venues",localField: "alias",foreignField: "floors.places.alias",as: "matches"}},
{
$replaceRoot: { newRoot: { $mergeObjects: [ { $arrayElemAt: [ "$matches", 0 ] }, "$$ROOT" ] } }
},
{ $project: { matches: 0 } }
])
I'm struggling with existing answers, which seem to change at MongoDB version 3.2, 3.4, 3.6, or 4.2 to include or not include $unwind, $pipeline, and other terms. Can someone explain how to get a sub-sub-subdocument aggregated like this? Thanks!

You can try this :
db.things.aggregate([
{
$lookup:
{
from: "venues",
let: { alias: "$alias" },
pipeline: [
{ $unwind: { path: "$floors", preserveNullAndEmptyArrays: true } },
{ $match: { $expr: { $in: ['$$alias', '$floors.places.alias'] } } },
/** Below stages are only if you've docs like doc 2 in Venues */
{ $addFields: { 'floors.places': { $filter: { input: '$floors.places', cond: { $eq: ['$$this.alias', '$$alias'] } } } } },
{ $group: { _id: '$_id', name: { $first: '$name' }, floors: { $push: '$floors' } } },
{$project : {'floors.places.alias': 1, _id :0}} // Optional
],
as: "matches"
}
}
])
Test : MongoDB-Playground

Since MongoDB v3.6, we may perform uncorrelated sub-queries which gives us more flexibility to join two collections.
Try this:
db.things.aggregate([
{
$lookup: {
from: "venues",
let: {
"alias": "$alias"
},
pipeline: [
{
$unwind: "$floors"
},
{
$project: {
_id: 0,
places: {
$filter: {
input: "$floors.places",
cond: {
$eq: [
"$$alias",
"$$this.alias"
]
}
}
}
}
},
{
$match: {
"places.0": {
$exists: true
}
}
},
{
$unset: "places.name"
}
],
as: "floors"
}
}
])
MongoPlayground

Related

Query nested array from document

Given the following document data in collection called 'blah'...
[
{
"_id" : ObjectId("60913f55987438922d5f0db6"),
"procedureCode" : "code1",
"description" : "Description 1",
"coding" : [
{
"system" : "ABC",
"code" : "L111"
},
{
"system" : "DEFG",
"code" : "S222"
}
]
},
{
"_id" : ObjectId("60913f55987438922d5f0dbc"),
"procedureCode" : "code2",
"description" : "Description 2",
"coding" : [
{
"system" : "ABC",
"code" : "L999"
},
{
"system" : "DEFG",
"code" : "X3333"
}
]
}
]
What I want to get is all of the coding elements where system is ABC for all parents, and an array of codes like so.
[
{ "code": "L111" },
{ "code": "L999" },
]
If I use db.getCollection('blah').find({"coding.system": "ABC"}) I get the parent document with any child in the coding array of ICD.
If I use...
db.getCollection("blah")
.find({ "coding.system": "ABC" })
.projection({ "coding.code": 1 })
I do get the parent documents which have a child with a system of "ABC", but the coding for "DEFG" seems to come along for the ride too.
{
"_id" : ObjectId("60913f55987438922d5f0db6"),
"coding" : [
{
"code" : "L989"
},
{
"code" : "S102"
}
]
},
{
"_id" : ObjectId("60913f55987438922d5f0dbc"),
"coding" : [
{
"code" : "L989"
},
{
"code" : "X382"
}
]
}
I have also tried experimenting with:
db.getCollection("blah").aggregate(
{ $unwind: "$coding" },
{ $match: { "system": "ICD" } }
);
.. as per this page: mongoDB query to find the document in nested array
... but go no where fast with that approach. i.e. no records at all.
What query do I need, please, to achieve something like this..?
[
{ "code": "L111" },
{ "code": "L999" },
...
]
or even better, this..?
[
"L111",
"L999",
...
]
db.collection.aggregate([
{
$match: { "coding.system": "ABC" }
},
{
$unwind: "$coding"
},
{
$match: { "coding.system": "ABC" }
},
{
$project: { code: "$coding.code" }
}
])
mongoplayground
db.collection.aggregate([
{
$match: { "coding.system": "ABC" }
},
{
$unwind: "$coding"
},
{
$match: { "coding.system": "ABC" }
},
{
$group: {
_id: null,
coding: { $push: "$coding.code" }
}
}
])
mongoplayground
Instead of $unwind, $match you can also use $filter:
db.collection.aggregate([
{ $match: { "coding.system": "ABC" } },
{
$project: {
coding: {
$filter: {
input: "$coding",
cond: { $eq: [ "$$this.system", "ABC" ] }
}
}
}
}
])

How to $lookup by avoiding null values in mongodb aggregate

In here i'm using $lookup to to a left join from other collections, the query works fine but when some records missing values it returns
errmsg : $in requires an array as a second argument, found: null
Heres the querying document structure :
{
"no" : "2020921008981",
"sale" : {
"soldItems" : [
{
"itemId" : "5b55ac7f0550de00210a3b24",
},
{
"itemId" : "5b55ac7f0550de00215584re",
}
],
"bills" : [
{
"billNo" : "2020921053467",
"insurancePlanId" : "160",
},
{
"billNo" : "2020921053467",
"insurancePlanId" : "170",
}
],
"visitIds" : [
5b55ac7f0550de00210a3b24, 5b55ac7f0550de00210a3b24
]
}
}
the query :
db.case.aggregate([
{
$lookup: {
from: "insurance",
let: { ipids: "$sale.bill.insurancePlanId" },
pipeline: [
{
$unwind: "$coveragePlans"
},
{
$match: { $expr: { $in: ["$coveragePlans._id", "$$ipids"] } }
},
{
$project: { _id: 0, name: 1 }
}
],
as: "insurances"
}
},
{
$lookup: {
from: "item",
let: { iid: "$salesOrder.purchaseItems.itemRefId" },
pipeline: [
{
$match: {
$expr: {
$in: ["$_id", {
$map: {
input: "$$iid",
in: { $toObjectId: "$$this" }
}
}
]
}
}
}
],
as: "items"
}
}
])
insurance collection :
{
"_id" : ObjectId("5b55aca20550de00210a6d25"),
"name" : "HIJKL"
"coveragePlans" : [
{
"_id" : "160",
"name" : "UVWZ",
},
{
"_id" : "161",
"name" : "LMNO",
}
]
},
{
"_id" : ObjectId("5b55aca20550de00210a6d25"),
"name" : "WXYZ"
"coveragePlans" : [
{
"_id" : "169",
"name" : "5ABC",
},
{
"_id" : "170",
"name" : "4XYZ",
}
]
}
item collection :
{
"_id" : ObjectId("5b55ac7f0550de00210a3b24"),
"code" : "ABCDE"
},
{
"_id" : ObjectId("5b55ac7f0550de00215584re"),
"code" : "PQRST"
}
How to avoid this and do null checks effectively before pipe-lining into the next stages? Tried with { $match: { "fieldName": { $exists: true, $ne: null } } } but it returns mongo error regarding the format. If its the way to go please mention the stage i should put that.. Thanks in advance
You can use $ifNull operator
let: { ipids: {$ifNull:["$sale.bill.insurancePlanId", [] ]} },
EDIT: To skip empty "$salesOrder.purchaseItems.itemRefId" values
let: { iid: {$filter: {input:"$salesOrder.purchaseItems.itemRefId", cond:{$ne:["$$this", ""]}}} },
You can get around that by not using $in.
It looks like this $map is executed separately for every document in the items collection. If you were to run the map in an $addFields stage, you could used the simple form of lookup to match the added field to _id, which would automagically handle missing, null, and array.
Remove the added field with a $project stage if necessary.
db.case.aggregate([
{$lookup: {
from: "insurance",
let: { ipids: "$sale.bill.insurancePlanId" },
pipeline: [
{$unwind: "$coveragePlans"},
{$match: { $expr: { $in: ["$coveragePlans._id", "$$ipids"] } }},
{$project: { _id: 0, name: 1 }}
],
as: "insurances"
}}
{$addFields:{
matchArray:{$map: {
input: "$$iid",
in: { $toObjectId: "$$this" }
}}
}},
{$lookup: {
from: "item",
localField: "matchArray",
foreignField:"_id",
as: "items"
}},
{$project:{
arrayField: 0
}}
])

Conditional lookup for nested localfield

I want to get user's friends and their information. But only wanna join for users where "status" equals to 2.
Here is my object
{
"user_id" : ObjectId("5d2f574b1c27807fd7eb133d"),
"relationship" : [
{
"user_id" : ObjectId("5d2f57661c27807fd7eb133e"),
"status" : 2
},
{
"user_id" : ObjectId("5d2f57c01c27807fd7eb133f"),
"status" : 1
}
]
}
My query
db.getCollection('relationships').aggregate([
{$match: {
user_id: ObjectId("5d2f574b1c27807fd7eb133d")}
},
{ $lookup: {
from: "users",
as: "users",
let: { id: "$_id" },
pipeline: [
{ $match: {
$expr: { $and: [
{ $eq: [ "$relationship.user_id", "$$id" ] },
{ $eq: [ "$relationship.status", 2 ] }
] }
} }
],
} }
])
expected output
{
"user_id" : ObjectId("5d2f574b1c27807fd7eb133d"),
"friends" : [
{
"_id" : ObjectId("5d2f57661c27807fd7eb133e"),
"name" : "Mike"
}
]
}
What's the proper way to do this ?

mongodb aggregation lookup with multiple conditions and ids

Having the following collections and data on them
db.a.insert([
{ "_id" : ObjectId("5b56989172ebcb00105e8f41"), "items" : [{id:ObjectId("5b56989172ebcb00105e8f41"), "instock" : 120}]},
{ "_id" : ObjectId("5b56989172ebcb00105e8f42"), "items" : [{id:ObjectId("5b56989172ebcb00105e8f42"), "instock" : 120}] },
{ "_id" : ObjectId("5b56989172ebcb00105e8f43"), "items" : [{ObjectId("5b56989172ebcb00105e8f43"), "instock" : 80}] }
])
db.b.insert([
{ "_id" : ObjectId("5b56989172ebcb00105e8f41")},
{ "_id" : ObjectId("5b56989172ebcb00105e8f42")},
{ "_id" : ObjectId("5b56989172ebcb00105e8f43")},
{ "_id" : ObjectId("5b56989172ebcb00105e8f44")},
{ "_id" : ObjectId("5b56989172ebcb00105e8f45")}
])
executing an lookup aggregation like
db.b.aggregate([
{
$lookup:
{
from: "b",
let: { bId: "$_id", qty: 100 },
pipeline: [
{ $match:
{ $expr:
{ $and:
[
{ $eq: [ "$items.id", "$$bId" ] },
{ $gte: [ "$instock", "$$qty" ] }
]
}
}
}
],
as: "a"
}
}
])
does not bring any results in the expected lookup operation. Is there any restriction to use ObjectId as a comparison? In the official documentations does not say any about it and it works like a charm with any other kind of data type, like strings
I am not sure if this is a bug in mongodb or not but the query only works after adding an $unwind stage first.
db.b.aggregate([
{
$lookup:
{
from: "a",
let: { bId: "$_id", qty: 100 },
pipeline: [
{
$unwind: {
path: "$items"
}
},
{ $match:
{ $expr:
{ $and:
[
{ $eq: [ "$items.id", "$$bId" ] },
{ $gte: [ "$items.instock", "$$qty" ] },
]
}
}
}
],
as: "a"
}
}
]);
Note: Join Conditions and Uncorrelated Sub-queries were added in mongo 3.6

Mongo how to $lookup with DBRef

I have a trouble(/(愒o愒)/~~). Suppose that collection A is
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"bid" : [
DBRef("B", ObjectId("582abcd85d2dfa67f44127e0")),
DBRef("B", ObjectId("582abcd85d2dfa67f44127e1"))
]
}
and Collection B:
{
"_id" : ObjectId("582abcd85d2dfa67f44127e0"),
"status" : NumberInt(1),
"seq" : NumberInt(0)
},
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"status" : NumberInt(1),
"seq" : NumberInt(0)
}
I don't know how to $lookup the 'bid'. I tried
db.A.aggregate(
[
{$unwind: {path: "$bid"}},
{$lookup: {from: "B", localField: "bid", foreignField: "_id", as: "bs"}},
]
)
and
db.A.aggregate(
[
{$unwind: {path: "$bid"}},
{$lookup: {from: "B", localField: "bid.$id", foreignField: "_id", as: "bs"}},
]
)
but it doesn't work. Anybody can help? Thanks.
Actually, the other answer is wrong. It is possible to do a lookup on a DBref field within your aggregator, and you don't need mapreduce for that.
Solution
db.A.aggregate([
{
$project: {
B_fk: {
$map: {
input: {
$map: {
input:"$bid",
in: {
$arrayElemAt: [{$objectToArray: "$$this"}, 1]
},
}
},
in: "$$this.v"}},
}
},
{
$lookup: {
from:"B",
localField:"B_fk",
foreignField:"_id",
as:"B"
}
}
])
result
{
"_id" : ObjectId("59bb79df1e9c00162566f581"),
"B_fk" : null,
"B" : [ ]
},
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"B_fk" : [
ObjectId("582abcd85d2dfa67f44127e0"),
ObjectId("582abcd85d2dfa67f44127e1")
],
"B" : [
{
"_id" : ObjectId("582abcd85d2dfa67f44127e0"),
"status" : NumberInt("1"),
"seq" : NumberInt("0")
}
]
}
Short Explanation
Loop through the DBRefs with $map, break each DBref into an array, keep only the $id field, then get rid of the k:v format with $$this.v, keeping only the ObjectId and removing all the rest. You can now lookup on the ObjectId.
Step-by-step Explanation
Within the aggregator, a DBRef BSON type can be handled like an object, with two or three fields (ref, id, and db).
If you do:
db.A.aggregate([
{
$project: {
First_DBref_as_array: {$objectToArray:{$arrayElemAt:["$bid",0]}},
Second_DBref_as_array: {$objectToArray:{$arrayElemAt:["$bid",1]}},
}
},
])
This is the result:
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"First_DBref_as_array : [
{
"k" : "$ref",
"v" : "B"
},
{
"k" : "$id",
"v" : ObjectId("582abcd85d2dfa67f44127e0")
}
],
"Second_DBref_as_array" : [
{
"k" : "$ref",
"v" : "B"
},
{
"k" : "$id",
"v" : ObjectId("582abcd85d2dfa67f44127e0")
}
]
}
Once you have transformed a dbref into an array, you can get rid of the useless fields by querying only the value at index 1, like this:
db.A.aggregate([
{
$project: {
First_DBref_as_array: {$arrayElemAt: [{$objectToArray:{$arrayElemAt:["$bid",0]}},1]},
Second_DBref_as_array: {$arrayElemAt: [{$objectToArray:{$arrayElemAt:["$bid",0]}},1]},
}
},
])
result:
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"First_DBref_as_array" : {
"k" : "$id",
"v" : ObjectId("582abcd85d2dfa67f44127e0")
},
"Second_DBref_as_array" : {
"k" : "$id",
"v" : ObjectId("582abcd85d2dfa67f44127e0")
}
}
Then you can get finally to the value you want by pointing to "$myvalue.v", just like this
db.A.aggregate([
{
$project: {
first_DBref_as_array: {$arrayElemAt: [{$objectToArray:{$arrayElemAt:["$bid",0]}},1]},
second_DBref_as_array: {$arrayElemAt: [{$objectToArray:{$arrayElemAt:["$bid",0]}},1]},
}
},
{
$project: {
first_DBref_as_ObjectId: "$first_DBref_as_array.v",
second_DBref_as_ObjectId: "$second_DBref_as_array.v"
}
}
])
result:
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"first_DBref_as_ObjectId" : ObjectId("582abcd85d2dfa67f44127e0"),
"second_DBref_as_ObjectId" : ObjectId("582abcd85d2dfa67f44127e0")
}
Obviously, in a normal pipeline, you don't need all these redundant steps, using a nested $map, you can get to the same result in one go :
db.A.aggregate([
{
$project: {
B_fk: { $map : {input: { $map: { input:"$bid",
in: { $arrayElemAt: [{$objectToArray: "$$this"}, 1 ]}, } },
in: "$$this.v"}},
}
},
])
result:
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"B_fk" : [
ObjectId("582abcd85d2dfa67f44127e0"),
ObjectId("582abcd85d2dfa67f44127e1")
]
}
I hope the explanation is clear enough, if not feel free to ask.
As of mongoDB 3.4, this is not possible. You can't use DBRef in the aggregation pipeline, except in the $match stage.
I strongly recommend you to get rid of DBRef and switch to manual references. However, if you really need to keep DBRef, here is an (ugly) solution:
first, create a new collection named "C", where DBRefs are replaced by their Ids using mapReduce:
db.A.mapReduce(
function() {
var key = this._id;
var value = [];
for ( var index = 0; index < this.bid.length; index++){
value.push(this.bid[index].$id);
}
emit(key, value);
},
function(key,values) {
return values;
},
{
"query": {},
"out": "C"
}
)
then, run your aggregation query on the new "C" collection:
db.C.aggregate([
{
$unwind:"$value"
},
{
$lookup:{
from:"B",
localField:"value",
foreignField:"_id",
as:"bs"
}
}
]);
output:
{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"value":ObjectId("582abcd85d2dfa67f44127e0"),
"bs":[
{
"_id":ObjectId("582abcd85d2dfa67f44127e0"),
"status":1,
"seq":0
}
]
}{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"value":ObjectId("582abcd85d2dfa67f44127e1"),
"bs":[
{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"status":1,
"seq":0
}
]
}
Just in case someone is coming here in 2021:
Starting with MongoDB 4.3.3 the second query of the OP does work:
db.A.aggregate(
[
{$unwind: {path: "$bid"}},
{$lookup: {from: "B", localField: "bid.$id", foreignField: "_id", as: "bs"}},
]
)
The result is:
{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"bid":DBRef("B", "ObjectId("582abcd85d2dfa67f44127e0")),
"bs":[
{
"_id":ObjectId("582abcd85d2dfa67f44127e0")",
"status":1,
"seq":0
}
]
}{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"bid":DBRef("B", "ObjectId("582abcd85d2dfa67f44127e1")),
"bs":[
{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"status":1,
"seq":0
}
]
}
See SERVER-14466 for more information.