MongoDB - Aggregate (join two collection) with nested array as join key - mongodb

I have two collections. (updated)
I need to join them and return only a few fields.
As I understand, I should use aggregate and projection features.
I try to find examples, but can't find the right, in my case foreign key is placed in the array in the nested field.
It is not easy for me, I’m new at this, and I couldn’t win against this MongoDB query.
In the code block below I wrote the simplified equivalent models (doc1, doc2) and expected results.
Does anyone have any ideas?
db.doc1.deleteMany({});
db.doc2.deleteMany({});
// tow related documents
db.doc1.insert( [
{
"version" : 123456,
"doc" : {
"code":"A1",
"name":"some document A1",
"doc2CodeArray":[
{"code":"B01"},
{"code":"B02"},
{"code":"B03"},
{"code":"B04"},
{"code":"B05"},
{"code":"B06"}
]
}
},
{
"version" : 123457,
"doc" : {
"code":"A2",
"name":"some document A2",
"doc2CodeArray":[
{"code":"B07"},
{"code":"B08"},
{"code":"B09"},
{"code":"B10"},
{"code":"B11"},
{"code":"B12"}
]
}
},
{
"version" : 123457,
"doc" : {
"code":"A2",
"name":"some document A2",
"doc2CodeArray":null
}
}
]);
db.doc2.insert( [
{
"version" : 567890,
"doc" : {
"code":"B01",
"valueArray":[{"valueType":"int","valueData":"1"}],
"doc2Type":{"code":"C1"}
}
},
{"version" : 567890,"doc" : { "code":"B02", "valueArray":[{"valueType":"int","valueData":"2","isDefault":false}],"doc2Type":{"code":"C2","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B03", "valueArray":[{"valueType":"int","valueData":"3","isDefault":false}],"doc2Type":{"code":"C3","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B04", "valueArray":[{"valueType":"int","valueData":"4","isDefault":false}],"doc2Type":{"code":"C4","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B05", "valueArray":[{"valueType":"int","valueData":"5","isDefault":false}],"doc2Type":{"code":"C5","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B06", "valueArray":[{"valueType":"int","valueData":"6","isDefault":false},
{"valueType":"str","valueData":"F","isDefault":false}],"doc2Type":{"code":"C6","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B07", "valueArray":[{"valueType":"int","valueData":"1","isDefault":false}],"doc2Type":{"code":"C1","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B08", "valueArray":[{"valueType":"int","valueData":"2","isDefault":false}],"doc2Type":{"code":"C2","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B09", "valueArray":[{"valueType":"int","valueData":"3","isDefault":false}],"doc2Type":{"code":"C3","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B10", "valueArray":[{"valueType":"int","valueData":"4","isDefault":false}],"doc2Type":{"code":"C4","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B11", "valueArray":[{"valueType":"int","valueData":"5","isDefault":false}],"doc2Type":{"code":"C5","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B12", "valueArray":[{"valueType":"int","valueData":"6","isDefault":false}],"doc2Type":{"code":"C6","addInfo":"1234567890"}}},
]);
The result that I want
I need join collections and select only few props
{
"doc":{code:"A1"},
"doc2Items":[
{"code":"C2", "value":{"value":"3"}},
{"code":"C3", "value":{"value":"4"}},
]
},
{
"doc":{code:"A2"},
"doc2Items":[
{"code":"C2", "value":{"value":"2"}},
{"code":"C3", "value":{"value":"3"}},
{"code":"C4", "value":{"value":"4"}},
]
}

https://mongoplayground.net/p/1-1SU8SgbTQ
db.doc1.aggregate([
{
$lookup: {
from: "doc2",
localField: "doc.doc2CodeArray.code",
foreignField: "doc.code",
as: "doc.doc2Items"
}
},
{
$replaceRoot: {
newRoot: "$doc"
}
},
{
$addFields: {
"doc2Items": {
$map: {
input: "$doc2Items",
in: {
doc2TypeCode: "$$this.doc.doc2Type.code",
doc2ValueArray: "$$this.doc.valueArray"
}
}
}
}
},
{
$unset: [
"doc2CodeArray"
]
}
])

$lookup - Join doc1 with doc2.
Pipeline:
1.1. $match - Matching doc.code (from doc2) is within the doc.doc2CodeArray.code array (from doc1).
1.2. $project - Decorate output document to be returned in doc2Items.
$project - Decorate output document.
db.doc1.aggregate([
{
$lookup: {
from: "doc2",
let: {
doc2CodeArray: "$doc.doc2CodeArray.code"
},
pipeline: [
{
$match: {
$expr: {
$in: [
"$doc.code",
"$$doc2CodeArray"
]
}
}
},
{
$project: {
_id: 0,
"doc2TypeCode": "$doc.doc2Type.code",
"doc2ValueArray": "$doc.valueArray"
}
}
],
as: "doc2Items"
}
},
{
$project: {
_id: 0,
"doc1Code": "$doc.code",
"doc1Name": "$doc.name",
"doc2Items": "$doc2Items"
}
}
])
Sample Mongo Playground

Last few days I deep dived to reading.
And I have got an accepted result.
I updated my question with example of more relevan data and I has given the my solution in the example below.
db.doc1.aggregate([
{
$match: {
// filter by doc.code preffix
"doc.code": { '$regex': 'A', '$options': 'i' }, "doc.doc2CodeArray": { $ne: null }
//"doc.code": "A1"
}
},
{
$lookup: { // join doc1 & doc2
from: "doc2",
localField: "doc.doc2CodeArray.code",
foreignField: "doc.code",
as: "doc2Items"
}
},
{
$project: {
_id: 0,
"doc.code": 1,
"doc2Items": { // items from doc2
$map: { // I need not all props from doc2 - do $map
"input": {
$filter: { // I need items only with 3 codes C2, C3, C4 - do $filter
"input": "$doc2Items",
"as": "d0",
"cond": { $or:[
{ "$eq": ["$$d0.doc.doc2Type.code", "C2"] },
{ "$eq": ["$$d0.doc.doc2Type.code", "C3"] },
{ "$eq": ["$$d0.doc.doc2Type.code", "C4"] },
]}
},
},
"as": "d",
"in": {
"code": "$$d.doc.doc2Type.code",
"value": {
$arrayElemAt: [ // I need only first record of nested array
{
$map: { // I need not all props of first record of nested array
"input": "$$d.doc.valueArray",
"as": "d2",
"in": {
"value": "$$d2.valueData"
}
}
}
, 0]
},
}
}// map
}
}// project
}
])
Now I read the needed data two time faster then before.
I think I began to understand mongo db query philosophy.
Thank everyone for the answers and tips! 'Yong Shun' and 'qtxo' helped me find the correct solution.

Related

Use regex in expr in Mongodb aggragtion pipeline

I have Country Table
{
"_id" : ObjectId("627cd43f48aea72fdc0d88e0"),
"county" : "india"
},
{
"_id" : ObjectId("627cd43f48aea72fdc0d88e1"),
"county" : "china"
}
And City Table
{
"_id" : ObjectId("627cd43f48aea72fdc0d88e0"),
"county_name" : "India"
},
{
"_id" : ObjectId("627cd43f48aea72fdc0d88e1"),
"county_name" : "China"
}
In Country Table Country name is in lower letter and In City Table Country Name is in Upper Letter. So Now how can use like condition to join both collection with matching same name.
Ex. Get Data of india Like India
db.getCollection('countryTable').aggregate([
{
$lookup: {
from: "cityTable",
let: { county: "$county" },
pipeline: [{
$match: {
"$expr": {
"$regexMatch": {
"input": "$county_name",
"regex": "$$county",
"options": "i"
}
}
}
}],
as: "citydetails"
}
},
{ $unwind: "$citydetails" }
])
With MongoDB v4.0, you can do the $match in subpipeline with $toLower
db.countryTable.aggregate([
{
$lookup: {
from: "cityTable",
let: {
county: "$county"
},
pipeline: [
{
$match: {
"$expr": {
$eq: [
{
"$toLower": "$$county"
},
{
"$toLower": "$county_name"
}
]
}
}
}
],
as: "citydetails"
}
},
{
$unwind: "$citydetails"
}
])
Here is the Mongo playground for your reference.

How do I fetch only the first element from the array?

How do I fetch only the first element from the "topicsName" array?
Data I have input:
{
"_id" : ObjectId("606b7046a0ccf72222c00c2f"),
"groupId" : ObjectId("5f06cca74e51ba15f5167b86"),
"insertedAt" : "2021-04-05T20:17:10.144521Z",
"isActive" : true,
"staffId" : [
"606b6c34a0ccf72222c5a4df",
"606b6c48a0ccf722228aa035"
],
"subjectName" : "Maths",
"teamId" : ObjectId("6069a6a9a0ccf704e7f4b537"),
"updatedAt" : "2022-04-29T07:57:31.072067Z",
"syllabus" : [
{
"chapterId" : "626b9b94ae6cd2092024f3ee",
"chapterName" : "chap1",
"topicsName" : [
{
"topicId" : "626b9b94ae6cd2092024f3ef",
"topicName" : "1.1"
},
{
"topicId" : "626b9b94ae6cd2092024f3f0",
"topicName" : "1.2"
}
]
},
{
"chapterId" : "626b9b94ae6cd2092024f3f1",
"chapterName" : "chap2",
"topicsName" : [
{
"topicId" : "626b9b94ae6cd2092024f3f2",
"topicName" : "2.1"
},
{
"topicId" : "626b9b94ae6cd2092024f3f3",
"topicName" : "2.2"
}
]
}
]
}
The Query I used to try to fetch the element:- "topicId" : "626b9b94ae6cd2092024f3ef" from the
"topicsName" array.
db.subject_staff_database
.find(
{ _id: ObjectId("606b7046a0ccf72222c00c2f") },
{
syllabus: {
$elemMatch: {
chapterId: "626b9b94ae6cd2092024f3f1",
topicsName: { $elemMatch: { topicId: "626b9b94ae6cd2092024f3f2" } },
},
},
}
)
.pretty();
I was trying to fetch only the first element from the "topicsName" array, but it fetched both the elements in that array.
You can do the followings in an aggregation pipeline.
$match with your given id locate documents
$reduce to flatten the syllabus and topicsName arrays
$filter to get the expected element
db.collection.aggregate([
{
$match: {
"syllabus.topicsName.topicId": "626b9b94ae6cd2092024f3ef"
}
},
{
"$project": {
result: {
"$reduce": {
"input": "$syllabus.topicsName",
"initialValue": [],
"in": {
"$concatArrays": [
"$$value",
"$$this"
]
}
}
}
}
},
{
"$project": {
result: {
"$filter": {
"input": "$result",
"as": "r",
"cond": {
$eq: [
"$$r.topicId",
"626b9b94ae6cd2092024f3ef"
]
}
}
}
}
}
])
Here is the Mongo playground for your reference.
Welcome Ganesh Sowdepalli,
You are not only asking to "fetch only the first element from the array", but to fetch only the matching element of a nested array property of an object item in array.
Edit: (according to #ray's comment)
One way to do it is using an aggregation pipeline:
db.subject_staff_database.aggregate([
{
$match: {"_id": ObjectId("606b7046a0ccf72222c00c2f")}
},
{
$project: {
syllabus: {
$filter: {
input: "$syllabus",
as: "item",
cond: {$eq: ["$$item.chapterId", "626b9b94ae6cd2092024f3f1"
]
}
}
}
}
},
{
$unwind: "$syllabus"
},
{
$project: {
"syllabus.topicsName": {
$filter: {
input: "$syllabus.topicsName",
as: "item",
cond: {$eq: ["$$item.topicId", "626b9b94ae6cd2092024f3f2"]}
}
},
"syllabus.chapterId": 1,
"syllabus.chapterName": 1,
_id: 0
}
}
])
As you can see on this playground example.
If you want the actual first element, not by _id, look here on my first understanding to your question.
The aggregation pipeline allows us to do several operation on the results.
Since syllabus is an array that may contain more than one matching chapterId, we need to $filter it for the items we want.

Aggregation $filter is not working after $lookup

I am trying to filter data after the lookup operator. I am not getting the expected behaviour out of my query.
My gateway collection is
{ "_id" : "18001887", "mac_id" : "18001887", group_id: "0" }
{ "_id" : "18001888", "mac_id" : "18001888", group_id: "1" }
{ "_id" : "18001889", "mac_id" : "18001889", group_id: "0" }
My commands collection is
{
"_id" : ObjectId("615581dcb9ebca6c37eb39e4"),
"org_id" : 0,
"mac_id" : "18001887",
"config" : {
"user_info" : [
{
"user_id" : 1,
"user_pwd" : "123456",
"mapped_id" : 1
},
{
"user_id" : 2,
"user_pwd" : "123123",
"mapped_id" : 3
}
]
}
}
{
"_id" : ObjectId("615581dcb9ebca6c37eb39e4"),
"org_id" : 0,
"mac_id" : "18001889",
"config" : {
"slave_id" : 1
}
}
I want to fetch the commands of gateways with group_id = 0 and "config.user_info.mapped_id" = 1.
I wrote the below query but it doesn't seem to work
gateway_model.aggregate([
{
$match: {
group_id: "0"
},
},
{
$project: {
_id: 0,
mac_id: 1
}
},
{
$lookup: {
from: "commands",
localField: "mac_id",
foreignField: "mac_id",
as: "childs"
}
},
{
$project: {
mac_id: 1,
childs: {
$filter: {
"input": "$childs",
"as": "child",
"cond": {"$eq": ["$$child.config.user_info.mapped_id", 1]},
}
}
}
}
])
Above query returns gateways with group_id 0 and childs is an empty array.
The field user_info is array and you are checking equal-to condition in $filter operation, You can change your $filter condition as per below,
When we access mapped_id from array field $$child.config.user_info.mapped_id, it will return array of ids so we need to use $in condition
$ifNull to check if user_info field is not present then it will return blank array
$in operator to check is 1 in mapped_id's array
{
$project: {
mac_id: 1,
childs: {
$filter: {
"input": "$childs",
"as": "child",
"cond": {
"$in": [
1,
{ $ifNull: ["$$child.config.user_info.mapped_id", []] }
]
}
}
}
}
}
Playground
The second option and this is right way to handle this situation, $lookup using pipeline,
let to pass mac_id to pipeline
check $expr condition for mac_id
match mapped_id condition
db.gateway.aggregate([
{ $match: { group_id: "0" } },
{
$lookup: {
from: "commands",
let: { mac_id: "$mac_id" },
pipeline: [
{
$match: {
$expr: { $eq: ["$mac_id", "$$mac_id"] },
"config.user_info.mapped_id": 1
}
}
],
as: "childs"
}
},
{
$project: {
_id: 0,
mac_id: 1,
childs: 1
}
}
])
Playground
If you want to filter user_info array then you can add one more stage after $match stage in $lookup stage,
{
$addFields: {
"config.user_info": {
$filter: {
input: "$config.user_info",
cond: { $eq: ["$$this.mapped_id", 1] }
}
}
}
}
Playground

How to $lookup by avoiding null values in mongodb aggregate

In here i'm using $lookup to to a left join from other collections, the query works fine but when some records missing values it returns
errmsg : $in requires an array as a second argument, found: null
Heres the querying document structure :
{
"no" : "2020921008981",
"sale" : {
"soldItems" : [
{
"itemId" : "5b55ac7f0550de00210a3b24",
},
{
"itemId" : "5b55ac7f0550de00215584re",
}
],
"bills" : [
{
"billNo" : "2020921053467",
"insurancePlanId" : "160",
},
{
"billNo" : "2020921053467",
"insurancePlanId" : "170",
}
],
"visitIds" : [
5b55ac7f0550de00210a3b24, 5b55ac7f0550de00210a3b24
]
}
}
the query :
db.case.aggregate([
{
$lookup: {
from: "insurance",
let: { ipids: "$sale.bill.insurancePlanId" },
pipeline: [
{
$unwind: "$coveragePlans"
},
{
$match: { $expr: { $in: ["$coveragePlans._id", "$$ipids"] } }
},
{
$project: { _id: 0, name: 1 }
}
],
as: "insurances"
}
},
{
$lookup: {
from: "item",
let: { iid: "$salesOrder.purchaseItems.itemRefId" },
pipeline: [
{
$match: {
$expr: {
$in: ["$_id", {
$map: {
input: "$$iid",
in: { $toObjectId: "$$this" }
}
}
]
}
}
}
],
as: "items"
}
}
])
insurance collection :
{
"_id" : ObjectId("5b55aca20550de00210a6d25"),
"name" : "HIJKL"
"coveragePlans" : [
{
"_id" : "160",
"name" : "UVWZ",
},
{
"_id" : "161",
"name" : "LMNO",
}
]
},
{
"_id" : ObjectId("5b55aca20550de00210a6d25"),
"name" : "WXYZ"
"coveragePlans" : [
{
"_id" : "169",
"name" : "5ABC",
},
{
"_id" : "170",
"name" : "4XYZ",
}
]
}
item collection :
{
"_id" : ObjectId("5b55ac7f0550de00210a3b24"),
"code" : "ABCDE"
},
{
"_id" : ObjectId("5b55ac7f0550de00215584re"),
"code" : "PQRST"
}
How to avoid this and do null checks effectively before pipe-lining into the next stages? Tried with { $match: { "fieldName": { $exists: true, $ne: null } } } but it returns mongo error regarding the format. If its the way to go please mention the stage i should put that.. Thanks in advance
You can use $ifNull operator
let: { ipids: {$ifNull:["$sale.bill.insurancePlanId", [] ]} },
EDIT: To skip empty "$salesOrder.purchaseItems.itemRefId" values
let: { iid: {$filter: {input:"$salesOrder.purchaseItems.itemRefId", cond:{$ne:["$$this", ""]}}} },
You can get around that by not using $in.
It looks like this $map is executed separately for every document in the items collection. If you were to run the map in an $addFields stage, you could used the simple form of lookup to match the added field to _id, which would automagically handle missing, null, and array.
Remove the added field with a $project stage if necessary.
db.case.aggregate([
{$lookup: {
from: "insurance",
let: { ipids: "$sale.bill.insurancePlanId" },
pipeline: [
{$unwind: "$coveragePlans"},
{$match: { $expr: { $in: ["$coveragePlans._id", "$$ipids"] } }},
{$project: { _id: 0, name: 1 }}
],
as: "insurances"
}}
{$addFields:{
matchArray:{$map: {
input: "$$iid",
in: { $toObjectId: "$$this" }
}}
}},
{$lookup: {
from: "item",
localField: "matchArray",
foreignField:"_id",
as: "items"
}},
{$project:{
arrayField: 0
}}
])

How to make lookup between two collections when an item in an array exists in the other collection?

In Lookup with a pipeline, I would like to get the linked records from an array in the parent document.
// Orders
[{
"_id" : ObjectId("5b5b91a25c68de2538620689"),
"Name" : "Test",
"Products" : [
ObjectId("5b5b919a5c68de2538620688"),
ObjectId("5b5b925a5c68de2538621a15")
]
}]
// Products
[
{
"_id": ObjectId("5b5b919a5c68de2538620688"),
"ProductName": "P1"
},
{
"_id": ObjectId("5b5b925a5c68de2538621a15"),
"ProductName": "P2"
}
,
{
"_id": ObjectId("5b5b925a5c68de2538621a55"),
"ProductName": "P3"
}
]
How to make a lookup between Orders and Products when Products field is an array!
I tried this query
db.getCollection("Orders").
aggregate(
[
{
$lookup:
{
from: "Products",
let: { localId: "$_id" , prods: "$Products" },
pipeline: [
{
"$match":
{
"_id" : { $in: "$$prods" }
}
},
{
$project:
{
"_id": "$_id",
"name": "$prods" ,
}
}
],
as: "linkedData"
}
},
{
"$skip": 0
},
{
"$limit": 1
},
]
)
This is not working because $in is expecting an array, and even though $$prods is an array, it is not accepting it.
Is my whole approach correct? How to make this magic join ?
You were going in the right direction the only thing you missed here is to use expr with in aggregation operator which matches the same fields of the document
db.getCollection("Orders").aggregate([
{ "$lookup": {
"from": "Products",
"let": { "localId": "$_id" , "prods": "$Products" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$prods" ] } } },
{ "$project": { "_id": 1, "name": "$ProductName" } }
],
"as": "linkedData"
}},
{ "$skip": 0 },
{ "$limit": 1 }
])
See the docs here
You just need regular $lookup, the documentation states that:
If your localField is an array, you may want to add an $unwind stage to your pipeline. Otherwise, the equality condition between the localField and foreignField is foreignField: { $in: [ localField.elem1, localField.elem2, ... ] }.
So for below aggregation:
db.Orders.aggregate([
{
$lookup: {
from :"Products",
localField: "Products",
foreignField: "_id",
as: "Products"
}
}
])
you'll get following result for your sample data:
{
"_id" : ObjectId("5b5b91a25c68de2538620689"),
"Name" : "Test",
"Products" : [
{
"_id" : ObjectId("5b5b919a5c68de2538620688"),
"ProductName" : "P1"
},
{
"_id" : ObjectId("5b5b925a5c68de2538621a15"),
"ProductName" : "P2"
}
]
}
have you try unwind before the lookup. use unwind to brak the array annd then make lookup.