Mongodb $lookup with nested document - mongodb

I am trying to create a join using mongo's lookup. I have these three collections.
orderTracking
{
_id: ObejctId("59fb7815b3b8429f4750b0df"),
itemName : "Hamam Soap",
TrackLocation: [{locationId: 1, at:"2017-10-11"},
{locationId: 2,at:"2017-10-13"}],
userId : 12,
price: 20
}
locationType
{
_id: ObejctId("59b2111345cb72345a35fefd"),
locationId : 1
productTypeName: "Warehouse"
},{
_id: ObejctId("59af8ce445cb72345a35feea"),
locationId : 2
productTypeName: "On Transit"
}
User
{
_id: ObejctId("59a504eb6171b554c02292a9"),
"user ID":12,
"userName" : "Shahabaz Shafi",
"dateOfBirth" : "1992-01-01",
"addres": {
"country" : "India",
"state" : "Karnataka",
"city" : "Bengaluru"
}
}
and trying to flatten this to this kind of output.
{
"userName" : "Shahabaz Shafi",
"userId":12,
"dateOfBirth" : "1992-01-01",
"country" : "India",
"state" : "Karnataka",
"city" : "Bengaluru"
"locationType" : [ {productTypeName: "Warehouse",at:"2017-10-11"}, {productTypeName: "On Transit",at:"2017-10-13"}]
}
Edit: 15-11-2018 Updated output
Made some changes to the output columns
{
"userName":"Shahabaz Shafi",
"userId":12,
"dateOfBirth":"1992-01-01",
"country":"India",
"state":"Karnataka",
"city":"Bengaluru",
"items":[
{
"itemName":"Hamam Soap",
"userId":12,
"price":20,
"TrackLocation":[
{
"locationId":1,
"at":"2017-10-11",
"productTypeName":"Warehouse"
},
{
"locationId":2,
"at":"2017-10-13",
"productTypeName":"On Transit"
}
]
}
]
}
How do I approach this ?
PS : I am also using compass

You can use below aggregation with mongodb 3.6 and above
db.User.aggregate([
{ "$lookup": {
"from": "orderTracking",
"let": { "userId": "$userId" },
"pipeline": [
{ "$match": { "$expr": { "$eq": ["$userId", "$$userId"] }}},
{ "$unwind": "$TrackLocation" },
{ "$lookup": {
"from": "locationType",
"let": { "location": "$TrackLocation.locationId" },
"pipeline": [
{ "$match": { "$expr": { "$eq": ["$locationId", "$$location"] }}}
],
"as": "locationType"
}},
{ "$project": {
"_id": 0,
"productTypeName": { "$arrayElemAt": ["$locationType.productTypeName", 0] },
"at": "$TrackLocation.at"
}}
],
"as": "locationType"
}},
{ "$replaceRoot": { "newRoot": { "$mergeObjects": ["$addres", "$$ROOT"] }}},
{ "$project": { "addres": 0 }}
])
Output
[
{
"_id": ObjectId("59a504eb6171b554c02292a9"),
"city": "Bengaluru",
"country": "India",
"dateOfBirth": "1992-01-01",
"locationType": [
{
"at": "2017-10-11",
"productTypeName": "Warehouse"
},
{
"at": "2017-10-13",
"productTypeName": "On Transit"
}
],
"state": "Karnataka",
"userId": 12,
"userName": "Shahabaz Shafi"
}
]

Related

How to iterate list in mongodb $lookup and pipeline

I have two collections i.e parent and chilednodes.
{
"_id" : "5e6cd8c1996ddf1c28e14505",
"parentList" : [
{
"_id" : "5e6c70e8996ddf1c28e14504",
"startDate" : "2020-02-25T14:01:58.697Z",
"active_id" : "child_vesrion_1",
"child_id" : "5e5e2cd4e972a95b6c32b5bf30"
},
{
"_id" : "5e6c70e8996ddf1c28e14506",
"startDate" : "2020-02-25T14:01:58.697Z",
"active_id" : "child_vesrion_1",
"child_id" : "5e5e2cd4e972a95b6c32b5bf31"
}
]
}
And childnodes are;
{
"_id" : "5e5e2cd4e972a95b6c32b5bf31",
"startDate" : "2020-03-25T14:01:58.697Z",
"endDate" : null,
"child_vesrion_1" : {
"childName" : "test3",
"createdDate" : "2020-02-25T14:01:58.697Z",
"text" : "test3 text",
"type" : "test3 type"
},
"child_vesrion_2" : {
"childName" : "Test4",
"createdDate" : "2020-02-25T14:01:58.697Z",
"text" : "test4 text",
"type" : "test4 type"
},
"active" : "child_vesrion_1"
},
{
"_id" : "5e5e2cd4e972a95b6c32b5bf30",
"startDate" : "2020-02-25T14:01:58.697Z",
"endDate" : null,
"child_vesrion_1" : {
"childName" : "test1",
"createdDate" : "2020-02-25T14:01:58.697Z",
"text" : "test1 text",
"type" : "test1 type"
},
"child_vesrion_2" : {
"childName" : "test2",
"createdDate" : "2020-02-25T14:01:58.697Z",
"text" : "test2 text",
"type" : "test2 type"
},
"active" : "child_vesrion_1"
}
Here is my query;
db.parent.aggregate([
{ $match: { "_id": "5e6cd8c1996ddf1c28e14505" } },
{
$lookup: {
from: "childnodes",
let: { "child_id": "$parentList.child_id", "activeid": "$parentList.active_id" },
pipeline: [
{ $match: { "$expr": { $eq: ["$_id", "$$child_id"] } } },
{
$project: {
"child_id": "$_id",
"start_date": "$startDate",
"current_version_Key": "$active",
"active_child_name": {
"$reduce": {
"input": { "$objectToArray": "$$ROOT" },
"initialValue": "",
"in": {
"$cond": [{ "$eq": ["$$this.k", "$$activeid"] },
"$$this.v.childName",
"$$value"
]
}
}
},
"text": {
"$reduce": {
"input": { "$objectToArray": "$$ROOT" },
"initialValue": "",
"in": {
"$cond": [{ "$eq": ["$$this.k", "$$activeid"] },
"$$this.v.text",
"$$value"
]
}
}
},
"type": {
"$reduce": {
"input": { "$objectToArray": "$$ROOT" },
"initialValue": "",
"in": {
"$cond": [{ "$eq": ["$$this.k", "$$activeid"] },
"$$this.v.type",
"$$value"
]
}
}
}
}
}
],
as: "finalList",
},
},
{
$project: {
parentList: 0,
},
},
]);
I am expecting results like;
{
"_id": "5e6cd8c1996ddf1c28e14505",
"finalList": [
{
"child_id": "5e5e2cd4e972a95b6c32b5bf30",
"start_date": "2020-02-25T14:01:58.697Z",
"current_version_Key": "child_vesrion_1",
"active_child_name": "test1",
"text": "test1 text",
"type": "test1 type",
},
{
"child_id": "5e5e2cd4e972a95b6c32b5bf31",
"start_date": "2020-02-25T14:01:58.697Z",
"current_version_Key": "child_vesrion_1",
"active_child_name": "test3",
"text": "test3 text",
"type": "test3 type",
}
]
}
But i am not getting anything in finalList. It is returning an empty array.
I have tried with different approaches but it didn't help me. I am bit new to mongodb, any help on this would be appreciable.
You were so close. Your parentList is an array, so when you define child_id and activeid inside $lookup, they are also array.
If we add $unwind before the $lookup + $group at the end, your query works as expected.
Try this one:
db.parent.aggregate([
{
$match: {
"_id": "5e6cd8c1996ddf1c28e14505"
}
},
{
$unwind: "$parentList"
},
{
$lookup: {
from: "childnodes",
let: {
"child_id": "$parentList.child_id",
"activeid": "$parentList.active_id"
},
pipeline: [
{
$match: {
"$expr": {
$eq: [
"$_id",
"$$child_id"
]
}
}
},
{
$addFields: {
child_version: {
$arrayElemAt: [
{
$filter: {
input: {
$objectToArray: "$$ROOT"
},
cond: {
$eq: [
"$$this.k",
"$$activeid"
]
}
}
},
0
]
}
}
},
{
$project: {
"_id": 0,
"child_id": "$_id",
"start_date": "$startDate",
"current_version_Key": "$active",
"active_child_name": "$child_version.v.childName",
"text": "$child_version.v.text",
"type": "$child_version.v.type"
}
}
],
as: "finalList"
}
},
{
$unwind: "$finalList"
},
{
$group: {
_id: "$_id",
parentList: {
$push: "$finalList"
}
}
}
])
MongoPlayground

How to push all values in single array in mongodb

Colleges
/* 1 createdAt:5/9/2019, 7:00:04 PM*/
{
"_id" : ObjectId("5cd42b5c65b41027845938ae"),
"clgID" : "100",
"name" : "Anna University"
},
/* 2 createdAt:5/9/2019, 7:00:04 PM*/
{
"_id" : ObjectId("5cd42b5c65b41027845938ad"),
"clgID" : "200",
"name" : "National"
}
Subjects:
/* 1 createdAt:5/9/2019, 7:03:24 PM*/
{
"_id" : ObjectId("5cd42c2465b41027845938b0"),
"name" : "Hindi",
"members" : {
"student" : [
"123"
]
},
"college" : {
"collegeID" : "100"
}
},
/* 2 createdAt:5/9/2019, 7:03:24 PM*/
{
"_id" : ObjectId("5cd42c2465b41027845938af"),
"name" : "English",
"members" : {
"student" : [
"456",
"789"
]
},
"college" : {
"collegeID" : "100"
}
}
Here i am having two collection and i want to join Colleges table is clgID and Subjects table iscollege.collegeID , then i want to take members.student values and push into single array based on college.collegeID.
My Expected Output
{
"GroupDetails" : [ ],
"clgName" : "National"
},
{
"GroupDetails" : [
"123",
"456",
"789"
],
"clgName" : "Anna University"
}
My Code
db.Colleges.aggregate([
{ $match : { "clgID" : { $in : ["100", "200"] } } },
{ $lookup: { from: "Subjects", localField: "clgID", foreignField: "college.collegeID", as: "GroupDetails" } },
//{ $unwind: "$GroupDetails" },
{ $project: { '_id' : false, 'clgName' : '$name', 'GroupDetails.members.student' : true } }
])
I am getting like this
/* 1 */
{
"GroupDetails" : [ ],
"clgName" : "National"
},
/* 2 */
{
"GroupDetails" : [
{
"members" : {
"student" : [
"456"
]
}
},
{
"members" : {
"student" : [
"123"
]
}
}
],
"clgName" : "Anna University"
}
You can use below aggregation with mongodb 3.6 and above
db.Colleges.aggregate([
{ "$match": { "clgID": { "$in": ["100", "200"] } } },
{ "$lookup": {
"from": "Subjects",
"let": { "clgId": "$clgID" },
"pipeline": [
{ "$match": { "$expr": { "$eq": ["$$clgId", "$college.collegeID"] } } },
{ "$group": {
"_id": "$college.collegeID",
"groupDetails": { "$push": "$members.student" }
}},
{ "$project": {
"groupDetails": {
"$reduce": {
"input": "$groupDetails",
"initialValue": [],
"in": { "$concatArrays": ["$$this", "$$value"] }
}
}
}}
],
"as": "clg"
}},
{ "$unwind": { "path": "$clg", "preserveNullAndEmptyArrays": true } },
{ "$project": {
"clgName": "$name",
"groupDetails": { "$ifNull": ["$clg.groupDetails", []] }
}}
])
MongoPlayground
Or with the mongodb 3.4 and below
db.Colleges.aggregate([
{ "$match": { "clgID": { "$in": ["100", "200"] }}},
{ "$lookup": {
"from": "Subjects",
"localField": "clgID",
"foreignField": "college.collegeID",
"as": "clg"
}},
{ "$unwind": { "path": "$clg", "preserveNullAndEmptyArrays": true }},
{ "$group": {
"_id": { "clgId": "$clg.college.collegeID", "_id": "$_id" },
"groupDetails": { "$push": "$clg.members.student" },
"clgName": { "$first": "$name" }
}},
{ "$project": {
"_id": "$_id._id",
"clgName": 1,
"groupDetails": {
"$reduce": {
"input": "$groupDetails",
"initialValue": [],
"in": { "$concatArrays": ["$$this", "$$value"] }
}
}
}}
])
MongoPlayground

mongo aggregation framework group by quarter/half year/year

I have a database with this schema structure :
{
"name" : "Carl",
"city" : "paris",
"time" : "1-2018",
"notes" : [
"A",
"A",
"B",
"C",
"D"
]
}
And this query using the aggregation framework :
db.getCollection('collection').aggregate(
[{
"$match": {
"$and": [{
"$or": [ {
"time": "1-2018"
}, {
"time": "2-2018"
} ]
}, {
"name": "Carl"
}, {
"city": "paris"
}]
}
}, {
"$unwind": "$notes"
}, {
"$group": {
"_id": {
"notes": "$notes",
"time": "$time"
},
"count": {
"$sum": 1
}
}
}
, {
"$group": {
"_id": "$_id.time",
"count": {
"$sum": 1
}
}
}, {
"$project": {
"_id": 0,
"time": "$_id",
"count": 1
}
}])
It working correcly and i'm getting these results these results :
{
"count" : 4.0,
"time" : "2-2018"
}
{
"count" : 4.0,
"time" : "1-2018"
}
My issue is that i'd like to keep the same match stage and i'd like to group by quarter.
Here the result i'd like to have :
{
"count" : 8.0,
"time" : "1-2018" // here quarter 1
}
Thanks

Rewind data of two nested array field after $unwind and $lookup and $filter on date range in $project

{
"_id" : ObjectId("590b12b6330e1567acd29e69"),
"name": "Foo",
"sales_history" : [
{
"_id" : ObjectId("593ce8e4cfaa652df543d9e3"),
"sold_at" : ISODate("2017-06-11T06:53:24.881Z"),
"sold_to" : ObjectId("593509e938792e046ba14a02"),
"sold_products" : [
{
"product_dp" : 100,
"quantity" : 1,
"product_id" : ObjectId("591068be1f4c6c79a442a788"),
"_id" : ObjectId("593ce8e4cfaa652df543d9e5")
},
{
"product_dp" : 100,
"quantity" : 1,
"product_id" : ObjectId("593a33dccfaa652df543d924"),
"_id" : ObjectId("593ce8e4cfaa652df543d9e4")
}
]
},
{
"_id" : ObjectId("5944cb7142a04740357020b9"),
"sold_at" : ISODate("2017-06-17T06:25:53.332Z"),
"sold_to" : ObjectId("5927d4a59e58ba0c61066f3b"),
"sold_products" : [
{
"product_dp" : 500,
"quantity" : 1,
"price" : 5650,
"product_id" : ObjectId("593191ed53a2741dd9bffeb5"),
"_id" : ObjectId("5944cb7142a04740357020ba")
}
]
}
]
}
I have User schema like this. I want detail of product_id reference, with a date range search criteria on sold_at date field.
My expected data like following when I searched in sold_at at: 2017-06-11
{
"_id" : ObjectId("590b12b6330e1567acd29e69"),
"name": "Foo",
"sales_history" : [
{
"_id" : ObjectId("593ce8e4cfaa652df543d9e3"),
"sold_at" : ISODate("2017-06-11T06:53:24.881Z"),
"sold_to" : ObjectId("593509e938792e046ba14a02"),
"sold_products" : [
{
"product_dp" : 100,
"quantity" : 1,
"product_id": {
_id:ObjectId("hsfgg123412yh3gy1u2g3"),
name: "Product1",
code: "FG0154"
},
}
]
}
]
}
Product detail need to be populate in product_id, sales_history array need to be filtered in date range.
You can try below aggregation query.
$filter sales history on date range followed by $unwinding sales history & sold_products.
$lookup sold_products to get the product details.
$group back sold_products & sales history
db.collection.aggregate([
{
"$project": {
"name": 1,
"sales_history": {
"$filter": {
"input": "$sales_history",
"as": "history",
"cond": {
"$and": [
{
"$gte": [
"$$history.sold_at",
ISODate("2017-06-11T00:00:00.000Z")
]
},
{
"$lt": [
"$$history.sold_at",
ISODate("2017-06-12T00:00:00.000Z")
]
}
]
}
}
}
}
},
{
"$unwind": "$sales_history"
},
{
"$unwind": "$sales_history.sold_products"
},
{
"$lookup": {
"from": lookupcollection,
"localField": "sales_history.sold_products.product_id",
"foreignField": "_id",
"as": "sales_history.sold_products.product_id"
}
},
{
"$group": {
"_id": {
"_id": "$_id",
"sales_history_id": "$sales_history._id"
},
"name": {
"$first": "$name"
},
"sold_at": {
"$first": "$sales_history.sold_at"
},
"sold_to": {
"$first": "$sales_history.sold_to"
},
"sold_products": {
"$push": "$sales_history.sold_products"
}
}
},
{
"$group": {
"_id": "$_id._id",
"name": {
"$first": "$name"
},
"sales_history": {
"$push": {
"_id": "$_id.sales_history_id",
"sold_at": "$sold_at",
"sold_to": "$sold_to",
"sold_products": "$sold_products"
}
}
}
}
]);

display corresponding fields in mongo in a list

This is my stored data:
{ "_id" : ObjectId("57080a7b01351177a4113f63"), "title" : "Data Scientist", "url" : "https://www.Pinterest.com/jobs/732?t=nu6xow", "timestamp" : "2016-04-08 19:46:03", "company" : "Pinterest", "state" : " CA", "todays_date" : "04/08/2016", "city_name" : "San+Francisco", "location" : "San Francisco, CA", "team" : "T0BT323QS", "search_word" : "Data+scientist"}
{ "_id" : ObjectId("57080a7b01351177a4113f64"), "title" : "Director of Analytics / Data Mining", "url" : "http://www.Pinterest.com/careers-position-data-mining-leader", "timestamp" : "2016-04-08 19:46:03", "company" : "Pinterest", "state" : " CA", "todays_date" : "04/08/2016", "city_name" : "San+Francisco", "location" : "Silicon Valley, CA", "team" : "T0BT323QS", "search_word" : "Data+scientist"}
{ "_id" : ObjectId("57080a7d01351177a4113f65"), "title" : "Senior Real World Data Scientist", "url" : "http://www.Pinterest.com/careers/detail/00443369/Senior-Real-World-Data-Scientist?src=JB-12568", "timestamp" : "2016-04-08 19:46:05", "company" : "Pinterest", "state" : " CA", "todays_date" : "04/08/2016", "city_name" : "San+Francisco", "location" : "South San Francisco, CA", "team" : "T0BT323QS", "search_word" : "Data+scientist"}
This is my query:
db.Books.aggregate([{$match:{"timestamp":{
$gte: "2016-04-08 19:46:03", $lt: "2016-04-08 19:46:06"}}}
,{ "$group": {
"_id": "$company",
"count": { "$sum": 1 },
"urls": {
"$addToSet": "$url"
}
}},
{ "$sort": { "count": -1 } },
{ "$limit": 10 },
{ "$project": {
"count": 1,
"urls": { "$slice": ["$urls",0, 3] }
}}
])
This is the output:
{
"_id" : "Pinterest",
"urls" : [
"https://www.Pinterest.com/jobs/732?t=nu6xow",
"http://www.Pinterest.com/careers-position-data-mining-leader",
"http://www.Pinterest.com/careers/detail/00443369/Senior-Real-World-Data-Scientist?src=JB-12568"
]
}
However, alongwith "url" I want it to display corresponding "title" and "location" field. Something like this:
{
"_id" : "Pinterest",
"urls" : [
[
"https://www.Pinterest.com/jobs/732?t=nu6xow",
"Data Scientist","San Francisco, CA"
],[
"http://www.Pinterest.com/careers-position-data-mining-leader",
"Director of Analytics / Data Mining","Silicon Valley, CA"
],[
"http://www.Pinterest.com/careers/detail/00443369/Senior-Real-World-Data-Scientist?src=JB-12568",
"Senior Real World Data Scientist",
"South San Francisco, CA"
]
]}
For the MongoDB 2.6 through 3.2 releases, you would need some help from $map:
db.Books.aggregate([
{ "$match":{
"timestamp":{
"$gte": "2016-04-08 19:46:03", "$lt": "2016-04-08 19:46:06"
}
}},
{ "$group": {
"_id": "$company",
"count": { "$sum": 1 },
"urls": {
"$push": {
"$map": {
"input": [ "A", "B", "C" ],
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el", "A" ] },
"$url",
{ "$cond": [
{ "$eq": [ "$$el", "B" ] },
"$title",
"$location"
]}
]
}
}
}
}
}},
{ "$sort": { "count": -1 } },
{ "$limit": 10 },
{ "$project": {
"count": 1,
"urls": { "$slice": ["$urls",0, 3] }
}}
])
And that is how you would get each item notated as an array.
You probably really should be doing this though instead:
db.Books.aggregate([
{ "$match":{
"timestamp":{
"$gte": "2016-04-08 19:46:03", "$lt": "2016-04-08 19:46:06"
}
}},
{ "$group": {
"_id": "$company",
"count": { "$sum": 1 },
"urls": {
"$push": {
"url": "$url",
"title": "$title",
"location": "$location"
}
}
}},
{ "$sort": { "count": -1 } },
{ "$limit": 10 },
{ "$project": {
"count": 1,
"urls": { "$slice": ["$urls",0, 3] }
}}
])
Since it does actually identify the fields by key. But if for some reason you prefer the array format, then you can do it that way.
For $addToSet then just replace the $push with $addToSet, but if not all fields are unique, then $group on the "url" property first:
db.Books.aggregate([
{ "$match":{
"timestamp":{
"$gte": "2016-04-08 19:46:03", "$lt": "2016-04-08 19:46:06"
}
}},
{ "$group": {
"_id": {
"company": "$company",
"url": "$url"
},
"title": { "$first": "$title" },
"location": { "$first": "$location" },
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.company",
"count": { "$sum": "$count" },
"urls": {
"$push": {
"$map": {
"input": [ "A", "B", "C" ],
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el", "A" ] },
"$_id.url",
{ "$cond": [
{ "$eq": [ "$$el", "B" ] },
"$title",
"$location"
]}
]
}
}
}
}
}},
{ "$sort": { "count": -1 } },
{ "$limit": 10 },
{ "$project": {
"count": 1,
"urls": { "$slice": ["$urls",0, 3] }
}}
])
the way to get similar document is to $push document with selected filed to urls array
db.a1.aggregate([{$match:{"timestamp":{
$gte: "2016-04-08 19:46:03", $lt: "2016-04-08 19:46:06"}}}
,{ "$group": {
"_id": "$company",
"count": { "$sum": 1 },
"urls": {
"$push": {url:"$url", title:"$title", location:"$location"}
}
}},
{ "$sort": { "count": -1 } },
{ "$limit": 10 },
{ "$project": {
"count": 1,
"urls": { "$slice": ["$urls",0, 3] }
}}
])
then you will be able to get document shaped like this one:
{
"_id" : "Pinterest",
"count" : 3,
"urls" : [
{
"url" : "https://www.Pinterest.com/jobs/732?t=nu6xow",
"title" : "Data Scientist",
"location" : "San Francisco, CA"
},
{
"url" : "http://www.Pinterest.com/careers-position-data-mining-leader",
"title" : "Director of Analytics / Data Mining",
"location" : "Silicon Valley, CA"
},
{
"url" : "http://www.Pinterest.com/careers/detail/00443369/Senior-Real-World-Data-Scientist?src=JB-12568",
"title" : "Senior Real World Data Scientist",
"location" : "South San Francisco, CA"
}
]
}
Any questions welcome!
Have fun!