MongoDB projection on specific nested properties - mongodb

"data" : {
"visits" : {
"daily" : {
"2018-09-05" : 3586,
"2018-09-06" : 2969,
"2018-09-07" : 2624,
"2018-09-08" : 2803,
"2018-09-09" : 3439,
"2018-09-10" : 3655
}
}
},
I have property structure in MongoDB like this, what I am trying to do is, if i have start date and end date, for example (2018-09-06 - 2018-09-07),
I want to get result in this format
"data" : {
"visits" : {
"daily" : {
"2018-09-06" : 2969,
"2018-09-07" : 2624
}
}
},
Is there any efficient way to do it dynamically? I can do it by putting in projections things like this {"data.visits.daily.2018-09-06": 1, "data.visits.daily.2018-09-07": 1} and it works but it doesn't seem to me like a good solution.

Using MongoDB 3.4.4 and newer versions:
db.collection.aggregate([
{ "$addFields": {
"data.visits.daily": {
"$arrayToObject": {
"$filter": {
"input": { "$objectToArray": "$data.visits.daily" },
"as": "el",
"cond": {
"$and": [
{ "$gte": ["$$el.k", "2018-09-06"] },
{ "$lte": ["$$el.k", "2018-09-07"] },
]
}
}
}
}
} }
])
The above pipeline will yield the final output
{
"data" : {
"visits" : {
"daily" : {
"2018-09-06" : 2969,
"2018-09-07" : 2624
}
}
}
}
Explanations
The pipeline can be decomposed to show each individual operator's results.
$objectToArray
$objectToArray enables you to transform the document with dynamic keys
into an array that contains a element for each field/value pair in the original document. Each element in the return array is a document that contains two fields k and v.
Running the pipeline with just the operator in a $project stage
db.collection.aggregate([
{ "$project": {
"keys": { "$objectToArray": "$data.visits.daily" }
} }
])
yields
{
"_id" : ObjectId("5bab6d09b1951fef20a5dce4"),
"keys" : [
{
"k" : "2018-09-05",
"v" : 3586
},
{
"k" : "2018-09-06",
"v" : 2969
},
{
"k" : "2018-09-07",
"v" : 2624
},
{
"k" : "2018-09-08",
"v" : 2803
},
{
"k" : "2018-09-09",
"v" : 3439
},
{
"k" : "2018-09-10",
"v" : 3655
}
]
}
$filter
The $filter operator acts as a filtering mechanism for the array produced by the $objectToArray operator, works by selecting a subset of the array to return based on the specified condition which
becomes your query.
Consider the following pipeline which returns an array of the key/value pair that matches the condition "2018-09-06" <= key <= "2018-09-07"
db.collection.aggregate([
{ "$project": {
"keys": {
"$filter": {
"input": { "$objectToArray": "$data.visits.daily" },
"as": "el",
"cond": {
"$and": [
{ "$gte": ["$$el.k", "2018-09-06"] },
{ "$lte": ["$$el.k", "2018-09-07"] },
]
}
}
}
} }
])
which yields
{
"_id" : ObjectId("5bab6d09b1951fef20a5dce4"),
"keys" : [
{
"k" : "2018-09-06",
"v" : 2969
},
{
"k" : "2018-09-07",
"v" : 2624
}
]
}
$arrayToObject
This will transform the filtered array above from
[
{
"k" : "2018-09-06",
"v" : 2969
},
{
"k" : "2018-09-07",
"v" : 2624
}
]
to the original document with the dynamic key
{
"2018-09-06" : 2969,
"2018-09-07" : 2624
}
so running the pipeline
db.collection.aggregate([
{ "$project": {
"keys": {
"$arrayToObject": {
"$filter": {
"input": { "$objectToArray": "$data.visits.daily" },
"as": "el",
"cond": {
"$and": [
{ "$gte": ["$$el.k", "2018-09-06"] },
{ "$lte": ["$$el.k", "2018-09-07"] },
]
}
}
}
}
} }
])
will produce
{
"_id" : ObjectId("5bab6d09b1951fef20a5dce4"),
"keys" : {
"2018-09-06" : 2969,
"2018-09-07" : 2624
}
}
But of course you would want to preserve the original schema i.e. the current fields so you would need to use $addFields instead of the $project pipeline used for illustrated.
$addFields
This is is equivalent to a $project stage that explicitly specifies all existing fields in the input documents and adds the new fields. Specifying an existing field name in an $addFields operation causes the original field to be replaced and you would need to use dot notation to to update the embedded data.visits.daily field with the dynamic keys.

You can achieve this using the following aggregation :
var startdate = "2018-09-06";
var enddate = "2018-09-09";
db['01'].aggregate(
[
{
$project: {
daily:{$objectToArray:"$data.visits.daily"}
}
},
{
$unwind: {
path : "$daily",
}
},
{
$addFields: {
"date": {$dateFromString:{dateString:"$daily.k",format:"%Y-%m-%d"}}
}
},
{
$match: {
$and:[{date:{$gte:new Date(startdate)}},{date:{$lte:new Date(enddate)}}]
}
},
{
$group: {
_id:"_id",
daily:{$push:"$daily"}
}
},
{
$project: {
"data.visits.daily":{$arrayToObject:"$daily"}
}
},
]
);

Related

Querying a multi-nested array in MongoDb 3.4.2

MongoDB Version - 3.4.2
I'm trying to query within the Sitecore Analytics database, trying to retrieve all users that are associated with a given List Id.
The example dataset I have follows the default Sitecore Analytics setup:
"Tags" : {
"Entries" : {
"ContactLists" : {
"Values" : {
"0" : {
"Value" : "{1E2D1AB7-72A0-4FF7-906B-DCDC020B87D2}",
"DateTime" : ISODate("2020-10-23T17:38:13.891Z")
},
"1" : {
"Value" : "{28BECCD3-476B-4B1D-9A75-02E59EF21286}",
"DateTime" : ISODate("2018-04-18T14:22:41.763Z")
},
"2" : {
"Value" : "{2C2BB0C3-483D-490E-B93A-9155BFBBE5DC}",
"DateTime" : ISODate("2018-05-10T14:26:08.494Z")
},
"3" : {
"Value" : "{DBE480F6-E305-4B35-9E6D-CBED64F4E44F}",
"DateTime" : ISODate("2018-10-27T02:41:28.776Z")
},
}
}
}
},
I want to iterate through all the entries within Values without having to specify 0/1/2/3, avoiding the following:
db.getCollection('Contacts').find({"Tags.Entries.ContactLists.Values.1.Value": "{28BECCD3-476B-4B1D-9A75-02E59EF21286}"})
I've tried the following:
db.getCollection('Contacts').find({"Tags.Entries.ContactLists.Values": {$elemMatch : {"Value":"{28BECCD3-476B-4B1D-9A75-02E59EF21286}"}}})
db.getCollection('Contacts').find({'Tags' : {$elemMatch : {$all : ['{28BECCD3-476B-4B1D-9A75-02E59EF21286}']}}})
db.getCollection('Contacts').forEach(function (doc) {
for(var i in doc.Tags.Entries.ContactLists.Values)
{
doc.Tags.Entries.ContactLists.Values[i].Value = "{28BECCD3-476B-4B1D-9A75-02E59EF21286}";
}
})
And a few other variations which I cannot recall now. And none work.
Any ideas if this is possible or on how to do this?
I want the outcome to just show filter out the results showing only the entries containing the matching GUID
Many thanks!
Demo - https://mongoplayground.net/p/upgYxgzPwJQ
It can be done using aggregation pipeline
Use $objectToArray to convert array
Use $filter to filter the array
db.collection.aggregate([
{
$addFields: {
filteredValue: {
$filter: {
input: {
$objectToArray: "$Tags.Entries.ContactLists.Values"
},
as: "val",
cond: {
$eq: [ // filter condition
"$$val.v.Value",
"{28BECCD3-476B-4B1D-9A75-02E59EF21286}"
]
}
}
}
}
}
])
Output -
[
{
"Tags": {
"Entries": {
"ContactLists": {
"Values": {
"0": {
"DateTime": ISODate("2020-10-23T17:38:13.891Z"),
"Value": "{1E2D1AB7-72A0-4FF7-906B-DCDC020B87D2}"
},
"1": {
"DateTime": ISODate("2018-04-18T14:22:41.763Z"),
"Value": "{28BECCD3-476B-4B1D-9A75-02E59EF21286}"
},
"2": {
"DateTime": ISODate("2018-05-10T14:26:08.494Z"),
"Value": "{2C2BB0C3-483D-490E-B93A-9155BFBBE5DC}"
},
"3": {
"DateTime": ISODate("2018-10-27T02:41:28.776Z"),
"Value": "{DBE480F6-E305-4B35-9E6D-CBED64F4E44F}"
}
}
}
}
},
"_id": ObjectId("5a934e000102030405000000"),
"filteredValue": [
{
"k": "1",
"v": {
"DateTime": ISODate("2018-04-18T14:22:41.763Z"),
"Value": "{28BECCD3-476B-4B1D-9A75-02E59EF21286}"
}
}
]
}
]
You can not use $elemMatch because Values is not array, but object. You can solve the problem with Aggregation Pipeline:
$addFields to add new field Values_Array that will be array representation of Values object.
$objectToArray to transform Values object to array
$match to find all documents that has requested value in new Values_Array field
$project to specify which properties to return from the result
db.getCollection('Contacts').aggregate([
{
"$addFields": {
"Values_Array": {
"$objectToArray": "$Tags.Entries.ContactLists.Values"
}
}
},
{
"$match": {
"Values_Array.v.Value": "{28BECCD3-476B-4B1D-9A75-02E59EF21286}"
}
},
{
"$project": {
"Tags": 1
}
}
])
Here is the working example: https://mongoplayground.net/p/2gY-vu3Qrvz

Filter array in subdocument array field

I am trying to fetch an element from an array in the MongoDB. I think the aggregation filter is the right one to apply. But I tried million times already, I still cannot find where is the problem. Could you give me hand?
MongoDB sample data:
{
"_id" : 12,
"items" : [
{
"columns" : [
{
"title" : "hhh",
"value" : 10
},
{
"title" : "hahaha",
"value" : 20
}
]
},
{
"columns" : [
{
"title" : "hiii",
"value" : 50
}
]
}
]
}
My solution:
db.myCollection.aggregate([
{
$project: {
items: {
$filter: {
input: "$items",
as: "item",
cond: { $eq: [ "$$item.columns.title", "hahaha" ]}
}
}
}
}
]).pretty()
My result:
{
"_id" : 15,
"items" : [
{
"columns" : [ ]
},
{
"columns" : [ ]
}
]
}
Expected result:
{
"_id" : 15,
"items" : [
{
"columns" : [
{
"title" : "hahaha",
"value" : 20
}
]
},
{
"columns" : []
}
]
}
I have checked the Mongo reference:
https://docs.mongodb.com/manual/reference/operator/aggregation/filter/#example
MongoDB version:3.4.1
Testing environment: Mongo Shell
You need to use the $map array operator to $filter the sub array in your subdocument. Also you should do this in the $addFields aggregation pipeline stage to automatically include all others fields in the query result if you need them.
You can also replace the $addFields stage with $project as you were doing but in this case, you will need to explicitly include all other fields.
let value = "hahaha";
db.coll.aggregate([
{
"$addFields": {
"items": {
"$map": {
"input": "$items",
"as": "item",
"in": {
"columns": {
"$filter": {
"input": "$$item.columns",
"as": "elt",
"cond": { "$eq": [ "$$elt.title", value ] }
}
}
}
}
}
}
}
])

MongoDB join data inside an array of objects

I have document like this in a collection called diagnoses :
{
"_id" : ObjectId("582d43d18ec3f432f3260682"),
"patientid" : ObjectId("582aacff3894c3afd7ad4677"),
"doctorid" : ObjectId("582a80c93894c3afd7ad4675"),
"medicalcondition" : "high fever, cough, runny nose.",
"diagnosis" : "Viral Flu",
"addmissiondate" : "2016-01-12",
"dischargedate" : "2016-01-16",
"bhtno" : "125",
"prescription" : [
{
"drug" : ObjectId("58345e0e996d340bd8126149"),
"instructions" : "Take 2 daily, after meals."
},
{
"drug" : ObjectId("5836bc0b291918eb42966320"),
"instructions" : "Take 1 daily, after meals."
}
]
}
The drug id inside the prescription object array is from a separate collection called drugs, see sample document below :
{
"_id" : ObjectId("58345e0e996d340bd8126149"),
"genericname" : "Paracetamol Tab 500mg",
"type" : "X",
"isbrand" : false
}
I am trying to create a mongodb query using the native node.js driver to get a result like this:
{
"_id" : ObjectId("582d43d18ec3f432f3260682"),
"patientid" : ObjectId("582aacff3894c3afd7ad4677"),
"doctorid" : ObjectId("582a80c93894c3afd7ad4675"),
"medicalcondition" : "high fever, cough, runny nose.",
"diagnosis" : "Viral Flu",
"addmissiondate" : "2016-01-12",
"dischargedate" : "2016-01-16",
"bhtno" : "125",
"prescription" : [
{
"drug" :
{
"_id" : ObjectId("58345e0e996d340bd8126149"),
"genericname" : "Paracetamol Tab 500mg",
"type" : "X",
"isbrand" : false
},
"instructions" : "Take 2 daily, after meals."
},
...
]
}
Any advice on how to approach a similar result like this is much appreciated, thanks.
Using MongoDB 3.4.4 and newer
With the aggregation framework, the $lookup operators supports arrays
db.diagnoses.aggregate([
{ "$addFields": {
"prescription": { "$ifNull" : [ "$prescription", [ ] ] }
} },
{ "$lookup": {
"from": "drugs",
"localField": "prescription.drug",
"foreignField": "_id",
"as": "drugs"
} },
{ "$addFields": {
"prescription": {
"$map": {
"input": "$prescription",
"in": {
"$mergeObjects": [
"$$this",
{ "drug": {
"$arrayElemAt": [
"$drugs",
{
"$indexOfArray": [
"$drugs._id",
"$$this.drug"
]
}
]
} }
]
}
}
}
} },
{ "$project": { "drugs": 0 } }
])
For older MongoDB versions:
You can create a pipeline that first flattens the prescription array using the $unwind operator and a $lookup subsequent pipeline step to do a "left outer join" on the "drugs" collection. Apply another $unwind operation on the created array from the "joined" field. $group the previously flattened documents from the first pipeline where there $unwind operator outputs a document for each element in the prescription array.
Assembling the above pipeline, run the following aggregate operation:
db.diagnoses.aggregate([
{
"$project": {
"patientid": 1,
"doctorid": 1,
"medicalcondition": 1,
"diagnosis": 1,
"addmissiondate": 1,
"dischargedate": 1,
"bhtno": 1,
"prescription": { "$ifNull" : [ "$prescription", [ ] ] }
}
},
{
"$unwind": {
"path": "$prescription",
"preserveNullAndEmptyArrays": true
}
},
{
"$lookup": {
"from": "drugs",
"localField": "prescription.drug",
"foreignField": "_id",
"as": "prescription.drug"
}
},
{ "$unwind": "$prescription.drug" },
{
"$group": {
"_id": "$_id",
"patientid" : { "$first": "$patientid" },
"doctorid" : { "$first": "$doctorid" },
"medicalcondition" : { "$first": "$medicalcondition" },
"diagnosis" : { "$first": "$diagnosis" },
"addmissiondate" : { "$first": "$addmissiondate" },
"dischargedate" : { "$first": "$dischargedate" },
"bhtno" : { "$first": "$bhtno" },
"prescription" : { "$push": "$prescription" }
}
}
])
Sample Output
{
"_id" : ObjectId("582d43d18ec3f432f3260682"),
"patientid" : ObjectId("582aacff3894c3afd7ad4677"),
"doctorid" : ObjectId("582a80c93894c3afd7ad4675"),
"medicalcondition" : "high fever, cough, runny nose.",
"diagnosis" : "Viral Flu",
"addmissiondate" : "2016-01-12",
"dischargedate" : "2016-01-16",
"bhtno" : "125",
"prescription" : [
{
"drug" : {
"_id" : ObjectId("58345e0e996d340bd8126149"),
"genericname" : "Paracetamol Tab 500mg",
"type" : "X",
"isbrand" : false
},
"instructions" : "Take 2 daily, after meals."
},
{
"drug" : {
"_id" : ObjectId("5836bc0b291918eb42966320"),
"genericname" : "Paracetamol Tab 100mg",
"type" : "Y",
"isbrand" : false
},
"instructions" : "Take 1 daily, after meals."
}
]
}
In MongoDB 3.6 or later versions
It seems that
$lookup will overwrite the original array instead of merging it.
A working solution (a workaround, if you prefer) is to create a different field,
and then merge two fields, as shown below:
db.diagnoses.aggregate([
{ "$lookup": {
"from": "drugs",
"localField": "prescription.drug",
"foreignField": "_id",
"as": "prescription_drug_info"
} },
{ "$addFields": {
"merged_drug_info": {
"$map": {
"input": "$prescription",
"in": {
"$mergeObjects": [
"$$this",
{ "$arrayElemAt": [
"$prescription_drug_info._id",
"$$this._id"
] }
]
}
}
}
} }
])
This would add two more fields and the name of the desired field
will be merged_drug_info. We can then add $project stage to filter
out excessive fields and $set stage to rename the field:
...
{ "$set": { "prescription": "$merged_drug_info" } },
{ "$project": { "prescription_drug_info": 0, "merged_drug_info": 0 } }
...

Mongodb Aggregation Rows to Columns

I have the following dataset. I need to group them by Account, and then turn the Element_Fieldname into a column.
var collection = [
{
Account:12345,
Element_Fieldname:"cars",
Element_Value:true
},
{
Account:12345,
Element_Fieldname:"boats",
Element_Value:false
}
]
This was my attempt to convert rows to columns, but its not working.
db.getCollection('my_collection').aggregate([{
$match : {
Element_Fieldname : {
$in : ["cars", "boats"]
}
}
}, {
$group : {
_id : "$Account",
values : {
$addToSet : {
field : "$Element_Fieldname",
value : "$Element_Value"
}
}
}
}, {
$project : {
Account : "$_id",
cars : {
"$cond" : [{
$eq : ["$Element_Fieldname", "cars"]
}, "$Element_Value", null]
},
boats : {
"$cond" : [{
$eq : ["$Element_Fieldname", "day_before_water_bottles"]
}, "$Element_Value", null]
},
}
}
])
This just gives me null in my cars and boats fields. Any help would be great.
And this is my desired results:
var desiredResult = [
{
Account:12345,
cars:true,
boats:false
}
]
this is a big tricky but you will get what you need :-)
please add $match on the top of aggregation pipeline
db.collection.aggregate([{
$project : {
_id : 0,
"Account" : 1,
car : {
$cond : [{
$eq : ["$Element_Fieldname", "cars"]
}, "$Element_Value", null]
},
boats : {
$cond : [{
$eq : ["$Element_Fieldname", "boats"]
}, "$Element_Value", null]
},
}
},
{
$group : {
_id : "$Account",
carData : {
$addToSet : "$car"
},
boatsData : {
$addToSet : "$boats"
}
}
}, {
$unwind : "$carData"
}, {
$match : {
carData : {
$ne : null
}
}
}, {
$unwind : "$boatsData"
}, {
$match : {
boatsData : {
$ne : null
}
}
},
])
and result
{
"_id" : 12345,
"carData" : true,
"boatsData" : false
}
It is not possible to do the type of computation you are describing with the aggregation framework, however there is a proposed $arrayToObject expression which will give you the functionality to peek into the key names, and create new key/values dynamically.
For example, you could do
db.collection.aggregate([
{
"$match": { "Element_Fieldname":{ "$in": ["cars", "boats"] } }
},
{
"$group": {
"_id": "$Account",
"attrs": {
"$push": {
"key": "$Element_Fieldname",
"val": "$Element_Value"
}
}
}
},
{
"$project": {
"Account": "$_id",
"_id": 0,
"newAttrs": {
"$arrayToObject": {
"$map": {
"input": "$attrs",
"as": "el",
in: ["$$el.key", "$$el.val"]
}
}
}
}
},
{
"$project": {
"Account": 1,
"cars": "$newAttrs.cars",
"boats": "$newAttrs.boats"
}
}
])
Vote for this jira ticket https://jira.mongodb.org/browse/SERVER-23310 to get this feature.
As a workaround, mapreduce seems like the available option. Consider running the following map-reduce operation:
db.collection.mapReduce(
function() {
var obj = {};
obj[this.Element_Fieldname] = this.Element_Value;
emit(this.Account, obj);
},
function(key, values) {
var obj = {};
values.forEach(function(value) {
Object.keys(value).forEach(function(key) {
obj[key] = value[key];
});
});
return obj;
},
{ "out": { "inline": 1 } }
)
Result:
{
"_id" : 12345,
"value" : {
"cars" : true,
"boats" : false
}
}

Weighted Average rating through mongodb

Is it possible to do a query to sort by "weighted average"
There is 5 values from 1-5 possible. Weighted average is
(n5*5 + n4*4 + n3*3 + n2*2 + n1*1) / (n5+n4+n3+n2+n1)
Where n5 would be the count of objects with rating: 5
I have the following example. If you find better structure to store I am happy to hear.
{
"_id" : "wPg4jzJsEFXNxR5Wf",
"caveId" : "56424a93819e7419112c883e",
"data" : [
{
"value" : 1
},
{
"value" : 3
},
{
"value" : 4
},
{
"value" : 2
}
]
}
{
"_id" : "oSrtv33MgnkJFvNan",
"caveId" : "56424a93819e7419112c949f",
"data" : [
{
"value" : 1
},
{
"value" : 4
},
{
"value" : 4
},
{
"value" : 2
}
]
}
{
"_id" : "gJRMMQPwDwjFrL7zz",
"caveId" : "56424a93819e7419112c8727",
"data" : [
{
"value" : 5
},
{
"value" : 1
},
{
"value" : 4
}
]
}
Example of _ID: oSrtv33MgnkJFvNan (Second one)
(2*4 + 1*2 + 1*1)/(2+1+1) = 2.75
Then I would want to sort all the documents by that value.
Order would be
gJRMMQPwDwjFrL7zz: value: 3.33
oSrtv33MgnkJFvNan: value 2.75
wPg4jzJsEFXNxR5Wf: value 2.5
Well the answer is really both "yes" and "no" in respect to can MongoDB sort data from calculation like this. It can of course do it, but possibly not in a practical way for your purpose.
The two tools MongoDB has to do any sort of calculation are the aggregation framework and mapReduce. The former currently lacks the operators to really handle this in a practical way. The second can be "tricked" into sorting, as an artifact of how mapReduce works, by putting the component to be sorted in the grouping key (even if there is no actual grouping).
So you can basically apply the math with something like this:
db.data.mapReduce(
function() {
var vals = this.data.map(function(el){ return el.value }),
uniq = {};
vals.forEach(function(el) {
if (!uniq.hasOwnProperty(el)) {
uniq[el] = 1;
} else {
uniq[el]++;
}
});
var weight = Array.sum(Object.keys(uniq).map(function(key) {
return uniq[key] * key
})) / Array.sum(Object.keys(uniq).map(function(key) {
return uniq[key];
}))
var id = this._id;
delete this._id;
emit({ "weight": weight, "orig": id },this);
},
function() {},
{ "out": { "inline": 1 } }
)
Which gives you this output:
{
"results" : [
{
"_id" : {
"weight" : 2.5,
"orig" : "wPg4jzJsEFXNxR5Wf"
},
"value" : {
"caveId" : "56424a93819e7419112c883e",
"data" : [
{
"value" : 1
},
{
"value" : 3
},
{
"value" : 4
},
{
"value" : 2
}
]
}
},
{
"_id" : {
"weight" : 2.75,
"orig" : "oSrtv33MgnkJFvNan"
},
"value" : {
"caveId" : "56424a93819e7419112c949f",
"data" : [
{
"value" : 1
},
{
"value" : 4
},
{
"value" : 4
},
{
"value" : 2
}
]
}
},
{
"_id" : {
"weight" : 3.3333333333333335,
"orig" : "gJRMMQPwDwjFrL7zz"
},
"value" : {
"caveId" : "56424a93819e7419112c8727",
"data" : [
{
"value" : 5
},
{
"value" : 1
},
{
"value" : 4
}
]
}
}
]
}
So all the results are sorted, but of course the restriction applies that mapReduce can only produce "inline" output that is under the 16MB BSON limit, or alternately write the results out to another collection.
Even with new features being added to the aggregation framework that can assist here ( from current development series 3.1.x ) this would still require some juggling with $unwind in order to get the "sum" of elements in any way ( no such feature as a "reduce" function yet ), which does not make it a stable or practical alternative.
So you can do it with mapReduce, but for my money I would have another process that calculates this to run periodicallly ( or triggered on updates ) and update a standard "weight" field on the document, that could then be used directly for sorting.
Having a value in place in your documents is always the most performant option.
For the curious, you can grab a development branch release of MongoDB ( 3.1.x series ), or any release after that and apply an aggregation pipeline like this:
db.data.aggregate([
{ "$project": {
"caveId": 1,
"data": 1,
"conv": {
"$setUnion": [
{ "$map": {
"input": "$data",
"as": "el",
"in": "$$el.value"
}},
[]
]
},
"orig": {
"$map": {
"input": "$data",
"as": "el",
"in": "$$el.value"
}
}
}},
{ "$project": {
"caveId": 1,
"data": 1,
"conv": 1,
"orig": 1,
"counts": { "$map": {
"input": "$conv",
"as": "el",
"in": {
"$size": {
"$filter": {
"input": "$orig",
"as": "o",
"cond": {
"$eq": [ "$$o", "$$el" ]
}
}
}
}
}}
}},
{ "$unwind": { "path": "$conv", "includeArrayIndex": true } },
{ "$group": {
"_id": "$_id",
"caveId": { "$first": "$caveId" },
"data": { "$first": "$data" },
"counts": { "$first": "$counts" },
"mult": {
"$sum": {
"$multiply": [
"$conv.value",
{ "$arrayElemAt": [ "$counts", "$conv.index" ] }
]
}
}
}},
{ "$unwind": "$counts" },
{ "$group": {
"_id": "$_id",
"caveId": { "$first": "$caveId" },
"data": { "$first": "$data" },
"count": { "$sum": "$counts" },
"mult": { "$first": "$mult" }
}},
{ "$project": {
"data": 1,
"weight": { "$divide": [ "$mult", "$count" ] }
}},
{ "$sort": { "weight": 1 } }
])
But even with helpers like $filter and "includeArrayIndex" in $unwind and the $arrayElemAt operator using that index later to match up the distinct elements with their counts, the usage of $unwind in any way makes this a non-performant solution.
It may become practical in the future if operators like $map can produce index values needed for pairing and with the introduction of any methods to similarly do an "in-line sum" operation or other math on array results without processing $unwind. But as of writing this does not exist, even in development.