MongoDB join data inside an array of objects - mongodb

I have document like this in a collection called diagnoses :
{
"_id" : ObjectId("582d43d18ec3f432f3260682"),
"patientid" : ObjectId("582aacff3894c3afd7ad4677"),
"doctorid" : ObjectId("582a80c93894c3afd7ad4675"),
"medicalcondition" : "high fever, cough, runny nose.",
"diagnosis" : "Viral Flu",
"addmissiondate" : "2016-01-12",
"dischargedate" : "2016-01-16",
"bhtno" : "125",
"prescription" : [
{
"drug" : ObjectId("58345e0e996d340bd8126149"),
"instructions" : "Take 2 daily, after meals."
},
{
"drug" : ObjectId("5836bc0b291918eb42966320"),
"instructions" : "Take 1 daily, after meals."
}
]
}
The drug id inside the prescription object array is from a separate collection called drugs, see sample document below :
{
"_id" : ObjectId("58345e0e996d340bd8126149"),
"genericname" : "Paracetamol Tab 500mg",
"type" : "X",
"isbrand" : false
}
I am trying to create a mongodb query using the native node.js driver to get a result like this:
{
"_id" : ObjectId("582d43d18ec3f432f3260682"),
"patientid" : ObjectId("582aacff3894c3afd7ad4677"),
"doctorid" : ObjectId("582a80c93894c3afd7ad4675"),
"medicalcondition" : "high fever, cough, runny nose.",
"diagnosis" : "Viral Flu",
"addmissiondate" : "2016-01-12",
"dischargedate" : "2016-01-16",
"bhtno" : "125",
"prescription" : [
{
"drug" :
{
"_id" : ObjectId("58345e0e996d340bd8126149"),
"genericname" : "Paracetamol Tab 500mg",
"type" : "X",
"isbrand" : false
},
"instructions" : "Take 2 daily, after meals."
},
...
]
}
Any advice on how to approach a similar result like this is much appreciated, thanks.

Using MongoDB 3.4.4 and newer
With the aggregation framework, the $lookup operators supports arrays
db.diagnoses.aggregate([
{ "$addFields": {
"prescription": { "$ifNull" : [ "$prescription", [ ] ] }
} },
{ "$lookup": {
"from": "drugs",
"localField": "prescription.drug",
"foreignField": "_id",
"as": "drugs"
} },
{ "$addFields": {
"prescription": {
"$map": {
"input": "$prescription",
"in": {
"$mergeObjects": [
"$$this",
{ "drug": {
"$arrayElemAt": [
"$drugs",
{
"$indexOfArray": [
"$drugs._id",
"$$this.drug"
]
}
]
} }
]
}
}
}
} },
{ "$project": { "drugs": 0 } }
])
For older MongoDB versions:
You can create a pipeline that first flattens the prescription array using the $unwind operator and a $lookup subsequent pipeline step to do a "left outer join" on the "drugs" collection. Apply another $unwind operation on the created array from the "joined" field. $group the previously flattened documents from the first pipeline where there $unwind operator outputs a document for each element in the prescription array.
Assembling the above pipeline, run the following aggregate operation:
db.diagnoses.aggregate([
{
"$project": {
"patientid": 1,
"doctorid": 1,
"medicalcondition": 1,
"diagnosis": 1,
"addmissiondate": 1,
"dischargedate": 1,
"bhtno": 1,
"prescription": { "$ifNull" : [ "$prescription", [ ] ] }
}
},
{
"$unwind": {
"path": "$prescription",
"preserveNullAndEmptyArrays": true
}
},
{
"$lookup": {
"from": "drugs",
"localField": "prescription.drug",
"foreignField": "_id",
"as": "prescription.drug"
}
},
{ "$unwind": "$prescription.drug" },
{
"$group": {
"_id": "$_id",
"patientid" : { "$first": "$patientid" },
"doctorid" : { "$first": "$doctorid" },
"medicalcondition" : { "$first": "$medicalcondition" },
"diagnosis" : { "$first": "$diagnosis" },
"addmissiondate" : { "$first": "$addmissiondate" },
"dischargedate" : { "$first": "$dischargedate" },
"bhtno" : { "$first": "$bhtno" },
"prescription" : { "$push": "$prescription" }
}
}
])
Sample Output
{
"_id" : ObjectId("582d43d18ec3f432f3260682"),
"patientid" : ObjectId("582aacff3894c3afd7ad4677"),
"doctorid" : ObjectId("582a80c93894c3afd7ad4675"),
"medicalcondition" : "high fever, cough, runny nose.",
"diagnosis" : "Viral Flu",
"addmissiondate" : "2016-01-12",
"dischargedate" : "2016-01-16",
"bhtno" : "125",
"prescription" : [
{
"drug" : {
"_id" : ObjectId("58345e0e996d340bd8126149"),
"genericname" : "Paracetamol Tab 500mg",
"type" : "X",
"isbrand" : false
},
"instructions" : "Take 2 daily, after meals."
},
{
"drug" : {
"_id" : ObjectId("5836bc0b291918eb42966320"),
"genericname" : "Paracetamol Tab 100mg",
"type" : "Y",
"isbrand" : false
},
"instructions" : "Take 1 daily, after meals."
}
]
}

In MongoDB 3.6 or later versions
It seems that
$lookup will overwrite the original array instead of merging it.
A working solution (a workaround, if you prefer) is to create a different field,
and then merge two fields, as shown below:
db.diagnoses.aggregate([
{ "$lookup": {
"from": "drugs",
"localField": "prescription.drug",
"foreignField": "_id",
"as": "prescription_drug_info"
} },
{ "$addFields": {
"merged_drug_info": {
"$map": {
"input": "$prescription",
"in": {
"$mergeObjects": [
"$$this",
{ "$arrayElemAt": [
"$prescription_drug_info._id",
"$$this._id"
] }
]
}
}
}
} }
])
This would add two more fields and the name of the desired field
will be merged_drug_info. We can then add $project stage to filter
out excessive fields and $set stage to rename the field:
...
{ "$set": { "prescription": "$merged_drug_info" } },
{ "$project": { "prescription_drug_info": 0, "merged_drug_info": 0 } }
...

Related

mongodb aggregation multiple lookup with conditions

I have 3 collection.
db.a.insert([
{ "_id" : ObjectId("5b56989172ebcb11105e8f41"), "db_type":b, "number" : 1},
{ "_id" : ObjectId("5b56989172ebcb11105e8f42"), "db_type":c, "number" : 2},
])
db.b.insert([
{ "_id" : ObjectId("5b56989172ebcb11105e8f43"), "number" : 1, "value" : "111"},
])
db.c.insert([
{ "_id" : ObjectId("5b56989172ebcb11105e8f44"), "number" : 2, "value" : "222"},
])
I want to make a lookup query that gets values from each collection according to db_type.
What should I do in this case?
result :
{ "_id" : ObjectId("5b56989172ebcb11105e8f41"), "db_type" : b, "number" : 1, "value" : "111"}
{ "_id" : ObjectId("5b56989172ebcb11105e8f42"), "db_type" : c, "number" : 2, "value" : "222"}
clogged part...
db.getCollection('a').aggregate([
{
"$lookup":{
"from": "b" or "c", // I want to give condition here.
"localField": "number",
"foreignField": "number",
"as": "result"
}
},
])
For your case, as you have only 2 cases b and c to lookup from. You can simply do 2 separate lookups and use $setUnion to group the results together.
db.a.aggregate([
{
"$lookup": {
"from": "b",
"let": {
db_type: "$db_type",
number: "$number"
},
"pipeline": [
{
$match: {
$expr: {
$and: [
{
$eq: [
"$$db_type",
"b"
]
},
{
$eq: [
"$$number",
"$number"
]
}
]
}
}
}
],
"as": "bLookup"
}
},
{
"$lookup": {
"from": "c",
"let": {
db_type: "$db_type",
number: "$number"
},
"pipeline": [
{
$match: {
$expr: {
$and: [
{
$eq: [
"$$db_type",
"c"
]
},
{
$eq: [
"$$number",
"$number"
]
}
]
}
}
}
],
"as": "cLookup"
}
},
{
"$addFields": {
"allLookup": {
"$setUnion": [
"$bLookup",
"$cLookup"
]
}
}
}
])
Here is the Mongo playground for your reference.

exclude fields in $lookup aggregation

I am querying between 3 collections I want to exclude _id everywhere in output
My output is:
{
"_id" : ObjectId("5b6aed5f9bcdb5d4ae64aef5"),
"userID" : "1",
"skills" : [
{
"_id" : ObjectId("5b766b5f1365a4940bb6050f"),
"skillID" : "javaid",
"skillname" : "जावा",
"languageID" : "hindiid"
},
{
"_id" : ObjectId("5b766b8c1365a4940bb60535"),
"skillID" : "pythonid",
"skillname" : "पायथन",
"languageID" : "hindiid"
}
],
"gender" : {
"_id" : ObjectId("5b7687cd2a2329043e2383d5"),
"genderID" : "femaleid",
"gendername" : "महिला",
"languageID" : "hindiid"
}
}
Query:
db.User.aggregate([
{ "$match": { "userID":"1" }},
{ "$lookup":{
"from": "Skill",
"pipeline": [
{ "$match": { "languageID": "hindiid", "skillID": { "$in": [ "javaid","pythonid" ] }}},
],
"as": "skills"
}},
{ "$lookup": {
"from": "Gender",
"pipeline": [
{ "$match": { "languageID": "hindiid", "genderID" : "femaleid" }},
],
"as": "gender"
}},
{ "$unwind": { "path": "$gender", "preserveNullAndEmptyArrays": true }},
{ "$project": { "userID": 1, "skills": 1, "gender": 1 }}
])
In output for every object has _id.Example for skill list every object has _id i want exclude _id field every where. How I can exclude?
In mongodb 3.6 you can use projection ($project) inside $lookup pipeline... Something like this
db.User.aggregate([
{ "$match": { "userID":"1" }},
{ "$lookup":{
"from": "Skill",
"pipeline": [
{ "$match": { "languageID": "hindiid", "skillID": { "$in": [ "javaid","pythonid" ] }}},
{ "$project": { "_id": 0 }}
],
"as": "skills"
}}
])

$lookup nested array in mongodb

I am struggling with the newish (lovely) lookup operator in MongoDB. I have 3 collections:
artists
{
"_id" : ObjectId("5b0d2b2c7ac4792df69a9942"),
"name" : "Dream Theater",
"started_in" : NumberInt(1985),
"active" : true,
"country" : "US",
"current_members" : [
ObjectId("5b0d2a7c7ac4792df69a9941")
],
"previous_members" : [
ObjectId("5b0d2bf57ac4792df69a9954")
],
"albums" : [
ObjectId("5b0d16ee7ac4792df69a9924"),
ObjectId("5b0d47667ac4792df69a9994")
],
"genres" : [
"prog metal",
"prog rock"
]
}
Albums
{
"_id" : ObjectId("5b0d16ee7ac4792df69a9924"),
"title" : "Images and words",
"released" : ISODate("1992-07-07T00:00:00.000+0000"),
"songs" : [
ObjectId("5b0d15ab7ac4792df69a9916"),
ObjectId("5b0d15ee7ac4792df69a991e"),
ObjectId("5b0d2db37ac4792df69a995d"),
ObjectId("5b0d2dbe7ac4792df69a995e"),
ObjectId("5b0d2dcb7ac4792df69a995f"),
ObjectId("5b0d2dd87ac4792df69a9960"),
ObjectId("5b0d2de27ac4792df69a9961"),
ObjectId("5b0d2dec7ac4792df69a9962")
],
"type" : "LP"
}
{
"title" : "Awake",
"released" : ISODate("1994-10-04T00:00:00.000+0000"),
"songs" : [
ObjectId("5b0d470d7ac4792df69a9991")
],
"type" : "LP",
"_id" : ObjectId("5b0d47667ac4792df69a9994")
}
Songs
{
"_id" : ObjectId("5b0d15ab7ac4792df69a9916"),
"title" : "Pull me under"
}
{
"_id" : ObjectId("5b0d15ee7ac4792df69a991e"),
"title" : "Another day"
}
{
"title" : "Take the time",
"_id" : ObjectId("5b0d2db37ac4792df69a995d")
}
{
"title" : "Surrounded",
"_id" : ObjectId("5b0d2dbe7ac4792df69a995e")
}
{
"title" : "Metropolis - part I",
"_id" : ObjectId("5b0d2dcb7ac4792df69a995f")
}
{
"title" : "Under a glass moon",
"_id" : ObjectId("5b0d2dd87ac4792df69a9960")
}
{
"title" : "Wait for sleep",
"_id" : ObjectId("5b0d2de27ac4792df69a9961")
}
{
"title" : "Learning to live",
"_id" : ObjectId("5b0d2dec7ac4792df69a9962")
}
{
"title" : "6:00",
"_id" : ObjectId("5b0d470d7ac4792df69a9991")
}
I can easily do an aggregation with $lookup to get the detailed albums array, but how do I get also the detailed songs in the corresponding albums?
I would like to extend the following query:
db.artists.aggregate([ {
$lookup: {
from: "albums",
localField: "albums",
foreignField: "_id",
as: "albums"
}
}]).pretty()
If you have mongodb version 3.6 then you can try with nested $lookup aggregation...
db.collection.aggregate([
{ "$lookup": {
"from": Albums.collection.name,
"let": { "albums": "$albums" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$albums" ] } } },
{ "$lookup": {
"from": Songs.collection.name,
"let": { "songs": "$songs" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$songs" ] } } }
],
"as": "songs"
}}
],
"as": "albums"
}}
])
And for long-winded explanation you can go through $lookup multiple levels without $unwind?
Or If you have mongodb version prior to 3.6
db.collection.aggregate([
{ "$lookup": {
"from": Albums.collection.name,
"localField": "albums",
"foreignField": "_id",
"as": "albums"
}},
{ "$unwind": "$albums" },
{ "$lookup": {
"from": Songs.collection.name,
"localField": "albums.songs",
"foreignField": "_id",
"as": "albums.songs",
}},
{ "$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"started_in": { "$first": "$started_in" },
"active": { "$first": "$active" },
"country": { "$first": "$country" },
"albums": {
"$push": {
"_id": "$albums._id",
"title": "$albums.title",
"released": "$albums.released",
"type": "$albums.type",
"songs": "$albums.songs"
}
}
}}
])

Rewind data of two nested array field after $unwind and $lookup and $filter on date range in $project

{
"_id" : ObjectId("590b12b6330e1567acd29e69"),
"name": "Foo",
"sales_history" : [
{
"_id" : ObjectId("593ce8e4cfaa652df543d9e3"),
"sold_at" : ISODate("2017-06-11T06:53:24.881Z"),
"sold_to" : ObjectId("593509e938792e046ba14a02"),
"sold_products" : [
{
"product_dp" : 100,
"quantity" : 1,
"product_id" : ObjectId("591068be1f4c6c79a442a788"),
"_id" : ObjectId("593ce8e4cfaa652df543d9e5")
},
{
"product_dp" : 100,
"quantity" : 1,
"product_id" : ObjectId("593a33dccfaa652df543d924"),
"_id" : ObjectId("593ce8e4cfaa652df543d9e4")
}
]
},
{
"_id" : ObjectId("5944cb7142a04740357020b9"),
"sold_at" : ISODate("2017-06-17T06:25:53.332Z"),
"sold_to" : ObjectId("5927d4a59e58ba0c61066f3b"),
"sold_products" : [
{
"product_dp" : 500,
"quantity" : 1,
"price" : 5650,
"product_id" : ObjectId("593191ed53a2741dd9bffeb5"),
"_id" : ObjectId("5944cb7142a04740357020ba")
}
]
}
]
}
I have User schema like this. I want detail of product_id reference, with a date range search criteria on sold_at date field.
My expected data like following when I searched in sold_at at: 2017-06-11
{
"_id" : ObjectId("590b12b6330e1567acd29e69"),
"name": "Foo",
"sales_history" : [
{
"_id" : ObjectId("593ce8e4cfaa652df543d9e3"),
"sold_at" : ISODate("2017-06-11T06:53:24.881Z"),
"sold_to" : ObjectId("593509e938792e046ba14a02"),
"sold_products" : [
{
"product_dp" : 100,
"quantity" : 1,
"product_id": {
_id:ObjectId("hsfgg123412yh3gy1u2g3"),
name: "Product1",
code: "FG0154"
},
}
]
}
]
}
Product detail need to be populate in product_id, sales_history array need to be filtered in date range.
You can try below aggregation query.
$filter sales history on date range followed by $unwinding sales history & sold_products.
$lookup sold_products to get the product details.
$group back sold_products & sales history
db.collection.aggregate([
{
"$project": {
"name": 1,
"sales_history": {
"$filter": {
"input": "$sales_history",
"as": "history",
"cond": {
"$and": [
{
"$gte": [
"$$history.sold_at",
ISODate("2017-06-11T00:00:00.000Z")
]
},
{
"$lt": [
"$$history.sold_at",
ISODate("2017-06-12T00:00:00.000Z")
]
}
]
}
}
}
}
},
{
"$unwind": "$sales_history"
},
{
"$unwind": "$sales_history.sold_products"
},
{
"$lookup": {
"from": lookupcollection,
"localField": "sales_history.sold_products.product_id",
"foreignField": "_id",
"as": "sales_history.sold_products.product_id"
}
},
{
"$group": {
"_id": {
"_id": "$_id",
"sales_history_id": "$sales_history._id"
},
"name": {
"$first": "$name"
},
"sold_at": {
"$first": "$sales_history.sold_at"
},
"sold_to": {
"$first": "$sales_history.sold_to"
},
"sold_products": {
"$push": "$sales_history.sold_products"
}
}
},
{
"$group": {
"_id": "$_id._id",
"name": {
"$first": "$name"
},
"sales_history": {
"$push": {
"_id": "$_id.sales_history_id",
"sold_at": "$sold_at",
"sold_to": "$sold_to",
"sold_products": "$sold_products"
}
}
}
}
]);

Weighted Average rating through mongodb

Is it possible to do a query to sort by "weighted average"
There is 5 values from 1-5 possible. Weighted average is
(n5*5 + n4*4 + n3*3 + n2*2 + n1*1) / (n5+n4+n3+n2+n1)
Where n5 would be the count of objects with rating: 5
I have the following example. If you find better structure to store I am happy to hear.
{
"_id" : "wPg4jzJsEFXNxR5Wf",
"caveId" : "56424a93819e7419112c883e",
"data" : [
{
"value" : 1
},
{
"value" : 3
},
{
"value" : 4
},
{
"value" : 2
}
]
}
{
"_id" : "oSrtv33MgnkJFvNan",
"caveId" : "56424a93819e7419112c949f",
"data" : [
{
"value" : 1
},
{
"value" : 4
},
{
"value" : 4
},
{
"value" : 2
}
]
}
{
"_id" : "gJRMMQPwDwjFrL7zz",
"caveId" : "56424a93819e7419112c8727",
"data" : [
{
"value" : 5
},
{
"value" : 1
},
{
"value" : 4
}
]
}
Example of _ID: oSrtv33MgnkJFvNan (Second one)
(2*4 + 1*2 + 1*1)/(2+1+1) = 2.75
Then I would want to sort all the documents by that value.
Order would be
gJRMMQPwDwjFrL7zz: value: 3.33
oSrtv33MgnkJFvNan: value 2.75
wPg4jzJsEFXNxR5Wf: value 2.5
Well the answer is really both "yes" and "no" in respect to can MongoDB sort data from calculation like this. It can of course do it, but possibly not in a practical way for your purpose.
The two tools MongoDB has to do any sort of calculation are the aggregation framework and mapReduce. The former currently lacks the operators to really handle this in a practical way. The second can be "tricked" into sorting, as an artifact of how mapReduce works, by putting the component to be sorted in the grouping key (even if there is no actual grouping).
So you can basically apply the math with something like this:
db.data.mapReduce(
function() {
var vals = this.data.map(function(el){ return el.value }),
uniq = {};
vals.forEach(function(el) {
if (!uniq.hasOwnProperty(el)) {
uniq[el] = 1;
} else {
uniq[el]++;
}
});
var weight = Array.sum(Object.keys(uniq).map(function(key) {
return uniq[key] * key
})) / Array.sum(Object.keys(uniq).map(function(key) {
return uniq[key];
}))
var id = this._id;
delete this._id;
emit({ "weight": weight, "orig": id },this);
},
function() {},
{ "out": { "inline": 1 } }
)
Which gives you this output:
{
"results" : [
{
"_id" : {
"weight" : 2.5,
"orig" : "wPg4jzJsEFXNxR5Wf"
},
"value" : {
"caveId" : "56424a93819e7419112c883e",
"data" : [
{
"value" : 1
},
{
"value" : 3
},
{
"value" : 4
},
{
"value" : 2
}
]
}
},
{
"_id" : {
"weight" : 2.75,
"orig" : "oSrtv33MgnkJFvNan"
},
"value" : {
"caveId" : "56424a93819e7419112c949f",
"data" : [
{
"value" : 1
},
{
"value" : 4
},
{
"value" : 4
},
{
"value" : 2
}
]
}
},
{
"_id" : {
"weight" : 3.3333333333333335,
"orig" : "gJRMMQPwDwjFrL7zz"
},
"value" : {
"caveId" : "56424a93819e7419112c8727",
"data" : [
{
"value" : 5
},
{
"value" : 1
},
{
"value" : 4
}
]
}
}
]
}
So all the results are sorted, but of course the restriction applies that mapReduce can only produce "inline" output that is under the 16MB BSON limit, or alternately write the results out to another collection.
Even with new features being added to the aggregation framework that can assist here ( from current development series 3.1.x ) this would still require some juggling with $unwind in order to get the "sum" of elements in any way ( no such feature as a "reduce" function yet ), which does not make it a stable or practical alternative.
So you can do it with mapReduce, but for my money I would have another process that calculates this to run periodicallly ( or triggered on updates ) and update a standard "weight" field on the document, that could then be used directly for sorting.
Having a value in place in your documents is always the most performant option.
For the curious, you can grab a development branch release of MongoDB ( 3.1.x series ), or any release after that and apply an aggregation pipeline like this:
db.data.aggregate([
{ "$project": {
"caveId": 1,
"data": 1,
"conv": {
"$setUnion": [
{ "$map": {
"input": "$data",
"as": "el",
"in": "$$el.value"
}},
[]
]
},
"orig": {
"$map": {
"input": "$data",
"as": "el",
"in": "$$el.value"
}
}
}},
{ "$project": {
"caveId": 1,
"data": 1,
"conv": 1,
"orig": 1,
"counts": { "$map": {
"input": "$conv",
"as": "el",
"in": {
"$size": {
"$filter": {
"input": "$orig",
"as": "o",
"cond": {
"$eq": [ "$$o", "$$el" ]
}
}
}
}
}}
}},
{ "$unwind": { "path": "$conv", "includeArrayIndex": true } },
{ "$group": {
"_id": "$_id",
"caveId": { "$first": "$caveId" },
"data": { "$first": "$data" },
"counts": { "$first": "$counts" },
"mult": {
"$sum": {
"$multiply": [
"$conv.value",
{ "$arrayElemAt": [ "$counts", "$conv.index" ] }
]
}
}
}},
{ "$unwind": "$counts" },
{ "$group": {
"_id": "$_id",
"caveId": { "$first": "$caveId" },
"data": { "$first": "$data" },
"count": { "$sum": "$counts" },
"mult": { "$first": "$mult" }
}},
{ "$project": {
"data": 1,
"weight": { "$divide": [ "$mult", "$count" ] }
}},
{ "$sort": { "weight": 1 } }
])
But even with helpers like $filter and "includeArrayIndex" in $unwind and the $arrayElemAt operator using that index later to match up the distinct elements with their counts, the usage of $unwind in any way makes this a non-performant solution.
It may become practical in the future if operators like $map can produce index values needed for pairing and with the introduction of any methods to similarly do an "in-line sum" operation or other math on array results without processing $unwind. But as of writing this does not exist, even in development.