Get max from unwound arrays - mongodb

I have a collection of documents where I want to find the maximum values of each of the ratios of every possible pair of fields in the data object. For example:
Documents:
[
{ data: { a: 1, b: 5, c: 2 } },
{ data: { a: 4, b: 1, c: 1 } },
{ data: { a: 2, b: 4, c: 3 } }
]
Desired output:
{
a: { a: 1, b: 4, c: 4 },
b: { a: 5, b: 1, c: 2.5 },
c: { a: 2, b: 1, c: 1 }
}
So the output a.b is the largest of the a:b ratios 1/5, 4/1, and 2/4.
So I figure I first use $objectToArray to convert data, then $unwind on the result, but I'm having a hard time figuring out how to group everything together. The number of documents I have won't be too large, but the number of keys in data can be in the low thousands, so I'm not sure how well Mongo will be able to handle doing a bunch of $lookup's and comparing the values like that.

You can try following aggregation:
db.col.aggregate([
{
$addFields: { data: { $objectToArray: "$data" } }
},
{
$project: {
pairs: {
$map: {
input: { $range: [ 0, { $multiply: [ { $size: "$data" }, { $size: "$data" } ] } ] },
as: "index",
in: {
$let: {
vars: {
leftIndex: { $floor: { $divide: [ "$$index", { $size: "$data" } ] } },
rightIndex: { $mod: [ "$$index", { $size: "$data" } ] }
},
in: {
l: { $arrayElemAt: [ "$data", "$$leftIndex" ] },
r: { $arrayElemAt: [ "$data", "$$rightIndex" ] }
}
}
}
}
}
}
},
{ $unwind: "$pairs" },
{
$group: {
_id: { l: "$pairs.l.k", r: "$pairs.r.k" },
value: { $max: { $divide: [ "$pairs.l.v", "$pairs.r.v" ] } }
}
},
{
$sort: {
"_id.l": 1, "_id.r": 1
}
},
{
$group: {
_id: "$_id.l",
values: { $push: { k: "$_id.r", v: "$value" } }
}
},
{
$addFields: { values: { $arrayToObject: "$values" } }
},
{
$project: {
root: [ { k: "$_id", v: "$values" } ]
}
},
{
$sort: { "root.k": 1 }
},
{
$replaceRoot: {
newRoot: {
$arrayToObject: "$root"
}
}
}
])
Basically you need $objectToArray and $arrayToObject to transform between arrays and objects. Basically the point is that for each object you need to generate nxn pairs (3x3=9 in this case). You can perform such iteration using $range operator. Then using $mod and $divide with $floor you can get index pairs like (0,0)...(2,2). Then you just need $group with $max to get max values for each pair type (like a with b and so on). To get final shape you also need $replaceRoot.
Outputs:
{ "a" : { "a" : 1, "b" : 4, "c" : 4 } }
{ "b" : { "a" : 5, "b" : 1, "c" : 2.5 } }
{ "c" : { "a" : 2, "b" : 1, "c" : 1 } }

Related

mongo / mongoose aggregation pipeline query for survey data

I am trying to write a query to get all of the results of some survey data stored in a mongo. The tricky part is some questions are radio questions with a single answer, and some questions are multi-select type questions, some are values that need to be averaged, so I want to perform different aggregations depending on the type of question.
The results are stored in a schema like this, with each item in the array being a survey response.
[
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547")
},
answers: {
'question1': 'a',
'question2': 'a',
'question3': ['a','c'],
'question4': 3
},
createdAt: 2022-03-03T07:30:40.517Z,
},
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547"),
},
answers: {
'question1': 'a',
'question2': 'b',
'question3': ['a','c'],
'question4': 2
},
createdAt: 2022-03-03T07:30:40.518Z,
},
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547"),
},
answers: {
'question1': 'b',
'question2': 'c',
'question3': ['b']
'question4': 1
},
createdAt: 2022-03-03T07:30:40.518Z,
}
]
question1 and question2 are radio questions, so there can be only 1 answer, whereas question 3 is a multi-select, so the user can have multiple answers. Question 4 is a value that needs to be averaged.
I think there is some way to accomplish this in a single aggregation pipeline with some combination of facets, grouping, filters, projections, etc, but I am stuck.
I'd like to get a final result that looks like this
{
'question1' : {
'a' : 2,
'b' : 1
},
'question2' : {
'a' : 1,
'b' : 1,
'c' : 1,
},
'question3' : {
'a' : 2,
'b' : 1,
'c' : 2,
},
'question4' : 2 //avg (3+2+1)/3
}
OR even better:
{
'radio': {
'question1' : {
'a' : 2,
'b' : 1
},
'question2' : {
'a' : 1,
'b' : 1,
'c' : 1,
},
},
'multi': {
'question3' : {
'a' : 2,
'b' : 1,
'c' : 2,
}
},
'avg' : {
'question4' : 2
}
}
My pipeline would look something like this:
Response.aggregate([
{ $match: { 'metaData.survey': surveyId} }, // filter only for the specific survey
{ $project: { // I assume I have to turn the answers into an array
"answers": { $objectToArray: "$answers" },
"createdAt": "$createdAt"
}
},
// maybe facet here?
// conceptually, In the next stage I'd want to bucket the questions
// by type with something like below, then perform the right type of
// aggregation depending on the question type
// if $in [$$answers.k ['question1, 'question2']] group by k, v and count
// if $in [$$answers.k ['question3']] unwind and count each unique value?
// { $facet : { radio: [], multi:[]}}
])
Basically, I know which question Id is a radio or a multi-select, I'm just trying to figure out how to format the pipeline to achieve the desired output based on the questionId being in a known array.
Bonus points if I can figure out how to also group the by day/month based on the createdAt time
db.collection.aggregate([
{
$match: {}
},
{
$project: { answers: { $objectToArray: "$answers" } }
},
{
$unwind: "$answers"
},
{
$unwind: "$answers.v"
},
{
$group: {
_id: "$answers",
c: { "$sum": 1 }
}
},
{
$group: {
_id: "$_id.k",
v: { "$push": { k: "$_id.v", v: "$c" } }
}
},
{
$group: {
_id: null,
v: { "$push": { k: "$_id", v: { "$arrayToObject": "$v" } } }
}
},
{
$set: { v: { $arrayToObject: "$v" } }
},
{
$replaceWith: "$v"
}
])
mongoplayground
db.collection.aggregate([
{
$match: {}
},
{
$project: { answers: { $objectToArray: "$answers" } }
},
{
$unwind: "$answers"
},
{
$set: {
"answers.type": {
$switch: {
branches: [
{
case: { $isArray: "$answers.v" },
then: "multi"
},
{
case: { $eq: [ { $type: "$answers.v" }, "string" ] },
then: "radio"
},
{
case: { $isNumber: "$answers.v" },
then: "avg"
}
],
default: "other"
}
}
}
},
{
$unwind: "$answers.v"
},
{
$group: {
_id: "$answers",
c: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.k",
type: { $first: "$_id.type" },
v: {
$push: {
k: { $toString: "$_id.v" },
v: "$c"
}
}
}
},
{
$group: {
_id: "$type",
v: {
$push: {
k: "$_id",
v: { $arrayToObject: "$v" }
}
}
}
},
{
$group: {
_id: null,
v: {
$push: {
k: "$_id",
v: { $arrayToObject: "$v" }
}
}
}
},
{
$set: { v: { $arrayToObject: "$v" } }
},
{
$replaceWith: "$v"
},
{
$set: {
avg: {
$arrayToObject: {
$map: {
input: { $objectToArray: "$avg" },
as: "s",
in: {
k: "$$s.k",
v: {
$avg: {
$map: {
input: { $objectToArray: "$$s.v" },
as: "x",
in: { $multiply: [ { $toInt: "$$x.k" }, "$$x.v" ] }
}
}
}
}
}
}
}
}
}
])
mongoplayground

Move field to a specific object in an array in MongoDB

Here's my data:
{
foos: [
{ _id: 1 },
{ _id: 2 },
{ _id: 3 }
],
bar: "baz"
}
Now I wanna move field bar into foos object with _id = 2 to have this:
{
foos: [
{ _id: 1 },
{ _id: 2, bar: "baz" },
{ _id: 3 },
]
}
How can I do this using aggregation framework?
$map to iterate loop of foos and check _id: 2 condition, if match then return bar object and merge with current object $mergeObjects
db.collection.aggregate([
{
$project: {
foos: {
$map: {
input: "$foos",
in: {
$cond: [
{ $eq: ["$$this._id", 2] },
{ $mergeObjects: ["$$this", { bar: "$bar" }] },
"$$this"
]
}
}
}
}
}
])
Playground

mongodb average arrays across many documents

Using mongodb, I have a collection of documents where each document has a fixed length vector of floating point values such as below:
items = [
{"id": "1", "vec": [1, 2, 0]},
{"id": "2", "vec": [6, 4, 1]},
{"id": "3", "vec": [3, 2, 2]},
]
I would like to take the row wise average of these vectors. In this example I would expect the result to return
[ (1 + 6 + 3) / 3, (2 + 4 + 2) / 3, (0 + 1 + 2) / 3 ]
This answer is very close to what I am looking for, but as far as I can tell it will only work on vectors of size 2. mongoDB - average on array values
An answer has been provided that is not very performant for large arrays. For context I am using ~700 dimension vectors.
This should work: https://mongoplayground.net/p/PKXqmmW31nW
[
{
$group: {
_id: null,
a: {
$push: {
$arrayElemAt: ["$vec", 0]
}
},
b: {
$push: {
$arrayElemAt: ["$vec", 1]
}
},
c: {
$push: {
$arrayElemAt: ["$vec", 2]
}
}
}
},
{
$project: {
a: {
$avg: "$a"
},
b: {
$avg: "$b"
},
c: {
$avg: "$c"
}
}
}
]
Which outputs:
[
{
"_id": null,
"a": 3.3333333333333335,
"b": 2.6666666666666665,
"c": 1
}
]
Here's a more efficient without $avg operator. I'll leave other answer up for reference.
https://mongoplayground.net/p/rVERc8YjKZv
db.collection.aggregate([
{
$group: {
_id: null,
a: {
$sum: {
$arrayElemAt: ["$vec", 0]
}
},
b: {
$sum: {
$arrayElemAt: ["$vec", 1]
}
},
c: {
$sum: {
$arrayElemAt: ["$vec", 2]
}
},
totalDocuments: {
$sum: 1
}
}
},
{
$project: {
a: {
$divide: ["$a", "$totalDocuments"]
},
b: {
$divide: ["$b", "$totalDocuments"]
},
c: {
$divide: ["$c", "$totalDocuments"]
}
}
}
])
You can use $unwind to get values into separate documents, the key is to keep the index of the values. Then you can use $group by the index and calculate the average using the $avg operator.
db.collection.aggregate([
{
$unwind: {
path: "$vec",
includeArrayIndex: "i" // unwind and keep index
}
},
{
$group: {
_id: "$i", // group by index
avg: { $avg: "$vec" }
}
}, // at this stage, you already get all the values you need, in separate documents. The following stages will put all the values in an array
{
$sort: { _id: 1 }
},
{
$group: {
_id: null,
avg: { $push: "$avg" }
}
}
])
Mongo Playground

MongoDB aggregation filter based on max value

Suppose I have a document structure where one of the fields, X, is an array av objects as shown below.
"X" : [
{
"A" : "abc",
"B" : 123
},
{
"A" : "wer",
"B" : 124
},
{
"A" : "fgh",
"B" : 124
}
]
How can I project only the document where field B has the highest values? And if the maximum value is shared by several documents, I just want to return one of them (not important which one). In this case the result could look like:
"X" : [
{
"A" : "wer",
"B" : 124
}
]
What about this one:
db.collection.aggregate([
{
$set: {
X: {
$filter: {
input: "$X",
cond: { $eq: ["$$this.B", { $max: "$X.B" }] }
}
}
}
},
{ $set: { X: { $arrayElemAt: ["$X", 0] } } }
])
You can use $reduce
db.collection.aggregate([
{
"$project": {
"X": {
$reduce: {
input: "$X",
initialValue: {},
in: {
$cond: [ { "$gt": [ "$$this.B", "$$value.B" ]}, // Condition Check
"$$this", // If condition true ($$this - Current Object)
"$$value" // If condition false $$value - Previous Returned Object
]
}
}
}
}
}
])
Mongo Playground
Updated answer:
Another option that results in the full object being returned at the end:
[
{$unwind: {
path: "$X"
}},
{$sort: {
"X.B": -1
}},
{$group: {
_id: { _id: "$_id"},
X: {
$first: "$X"
}
}}]
Original answer:
You can use the $max operator (https://docs.mongodb.com/manual/reference/operator/aggregation/max/).
[{$project: {
X: {$max: "$X.B"}
}}]

How to get several combinations of documents where sums of properties reach a certain value in Mongodb?

If we imagine this kind of document structure :
[
{
id: 1,
name: "",
values : {
a: 24,
b: 42
}
},
{
id: 2,
name: "",
values : {
a: 43,
b: 53
}
},
{
id: 3,
name: "",
values : {
a: 33,
b: 25
}
},
{
id: 4,
name: "",
values : {
a: 89,
b: 2
}
}
// ...
]
Is it possible to get one or more lists of documents where, for example, the sum of the $.values.a equals 100 and the sum of the $.values.b equals 120? Or if not is it possible to sort the bests fits with a kind of threshold?
For example, the best output can be something like that :
[
{
id: 1,
name: "",
values : {
a: 24,
b: 42
}
},
{
id: 2,
name: "",
values : {
a: 43,
b: 53
}
},
{
id: 3,
name: "",
values : {
a: 33,
b: 25
}
}
]
There is no any native implementation...
But, You can have desired results if your data meets some requirements:
You collection has no too much data (this solution scales badly)
Your id field is unique
Your collection has index for id field
Explanation
We sort by id
With $lookup with the same collection (it's important ´id´ to be indexed) and pick next 10 documents for the current document L i=(Doc i+1 ... Doc i+11)
With $reduce, we count from i ... i+n untill a > 100 and b > 120
With $facet, we separate lists which meets exactly a=100, b=120 results (equals) and threshold (+- 10 for values.a and values.b)
Last steps, if any equals exists, we ignore threshold. Otherwise, we take threshold.
db.collection.aggregate([
{
$sort: {
id: 1
}
},
{
$lookup: {
from: "collection",
let: {
id: "$id"
},
pipeline: [
{
$sort: {
id: 1
}
},
{
$match: {
$expr: {
$gt: [
"$id",
"$$id"
]
}
}
},
{
$limit: 10
}
],
as: "bucket"
}
},
{
$replaceRoot: {
newRoot: {
$reduce: {
input: "$bucket",
initialValue: {
a: "$values.a",
b: "$values.b",
data: [
{
_id: "$_id",
id: "$id",
name: "$name",
values: "$values"
}
]
},
in: {
a: {
$add: [
"$$value.a",
{
$cond: [
{
$and: [
{
$lt: [
"$$value.a",
100
]
},
{
$lt: [
"$$value.b",
120
]
}
]
},
"$$this.values.a",
0
]
}
]
},
b: {
$add: [
"$$value.b",
{
$cond: [
{
$and: [
{
$lt: [
"$$value.a",
100
]
},
{
$lt: [
"$$value.b",
120
]
}
]
},
"$$this.values.b",
0
]
}
]
},
data: {
$concatArrays: [
"$$value.data",
{
$cond: [
{
$and: [
{
$lt: [
"$$value.a",
100
]
},
{
$lt: [
"$$value.b",
120
]
}
]
},
[
"$$this"
],
[]
]
}
]
}
}
}
}
}
},
{
$facet: {
equals: [
{
$match: {
a: 100,
b: 120
}
}
],
threshold: [
{
$match: {
a: {
$gte: 90,
$lt: 110
},
b: {
$gte: 110,
$lt: 130
}
}
}
]
}
},
{
$project: {
result: {
$cond: [
{
$gt: [
{
$size: "$equals"
},
0
]
},
"$equals",
"$threshold"
]
}
}
},
{
$unwind: "$result"
}
])
MongoPlayground