Related
I am trying to write a query to get all of the results of some survey data stored in a mongo. The tricky part is some questions are radio questions with a single answer, and some questions are multi-select type questions, some are values that need to be averaged, so I want to perform different aggregations depending on the type of question.
The results are stored in a schema like this, with each item in the array being a survey response.
[
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547")
},
answers: {
'question1': 'a',
'question2': 'a',
'question3': ['a','c'],
'question4': 3
},
createdAt: 2022-03-03T07:30:40.517Z,
},
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547"),
},
answers: {
'question1': 'a',
'question2': 'b',
'question3': ['a','c'],
'question4': 2
},
createdAt: 2022-03-03T07:30:40.518Z,
},
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547"),
},
answers: {
'question1': 'b',
'question2': 'c',
'question3': ['b']
'question4': 1
},
createdAt: 2022-03-03T07:30:40.518Z,
}
]
question1 and question2 are radio questions, so there can be only 1 answer, whereas question 3 is a multi-select, so the user can have multiple answers. Question 4 is a value that needs to be averaged.
I think there is some way to accomplish this in a single aggregation pipeline with some combination of facets, grouping, filters, projections, etc, but I am stuck.
I'd like to get a final result that looks like this
{
'question1' : {
'a' : 2,
'b' : 1
},
'question2' : {
'a' : 1,
'b' : 1,
'c' : 1,
},
'question3' : {
'a' : 2,
'b' : 1,
'c' : 2,
},
'question4' : 2 //avg (3+2+1)/3
}
OR even better:
{
'radio': {
'question1' : {
'a' : 2,
'b' : 1
},
'question2' : {
'a' : 1,
'b' : 1,
'c' : 1,
},
},
'multi': {
'question3' : {
'a' : 2,
'b' : 1,
'c' : 2,
}
},
'avg' : {
'question4' : 2
}
}
My pipeline would look something like this:
Response.aggregate([
{ $match: { 'metaData.survey': surveyId} }, // filter only for the specific survey
{ $project: { // I assume I have to turn the answers into an array
"answers": { $objectToArray: "$answers" },
"createdAt": "$createdAt"
}
},
// maybe facet here?
// conceptually, In the next stage I'd want to bucket the questions
// by type with something like below, then perform the right type of
// aggregation depending on the question type
// if $in [$$answers.k ['question1, 'question2']] group by k, v and count
// if $in [$$answers.k ['question3']] unwind and count each unique value?
// { $facet : { radio: [], multi:[]}}
])
Basically, I know which question Id is a radio or a multi-select, I'm just trying to figure out how to format the pipeline to achieve the desired output based on the questionId being in a known array.
Bonus points if I can figure out how to also group the by day/month based on the createdAt time
db.collection.aggregate([
{
$match: {}
},
{
$project: { answers: { $objectToArray: "$answers" } }
},
{
$unwind: "$answers"
},
{
$unwind: "$answers.v"
},
{
$group: {
_id: "$answers",
c: { "$sum": 1 }
}
},
{
$group: {
_id: "$_id.k",
v: { "$push": { k: "$_id.v", v: "$c" } }
}
},
{
$group: {
_id: null,
v: { "$push": { k: "$_id", v: { "$arrayToObject": "$v" } } }
}
},
{
$set: { v: { $arrayToObject: "$v" } }
},
{
$replaceWith: "$v"
}
])
mongoplayground
db.collection.aggregate([
{
$match: {}
},
{
$project: { answers: { $objectToArray: "$answers" } }
},
{
$unwind: "$answers"
},
{
$set: {
"answers.type": {
$switch: {
branches: [
{
case: { $isArray: "$answers.v" },
then: "multi"
},
{
case: { $eq: [ { $type: "$answers.v" }, "string" ] },
then: "radio"
},
{
case: { $isNumber: "$answers.v" },
then: "avg"
}
],
default: "other"
}
}
}
},
{
$unwind: "$answers.v"
},
{
$group: {
_id: "$answers",
c: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.k",
type: { $first: "$_id.type" },
v: {
$push: {
k: { $toString: "$_id.v" },
v: "$c"
}
}
}
},
{
$group: {
_id: "$type",
v: {
$push: {
k: "$_id",
v: { $arrayToObject: "$v" }
}
}
}
},
{
$group: {
_id: null,
v: {
$push: {
k: "$_id",
v: { $arrayToObject: "$v" }
}
}
}
},
{
$set: { v: { $arrayToObject: "$v" } }
},
{
$replaceWith: "$v"
},
{
$set: {
avg: {
$arrayToObject: {
$map: {
input: { $objectToArray: "$avg" },
as: "s",
in: {
k: "$$s.k",
v: {
$avg: {
$map: {
input: { $objectToArray: "$$s.v" },
as: "x",
in: { $multiply: [ { $toInt: "$$x.k" }, "$$x.v" ] }
}
}
}
}
}
}
}
}
}
])
mongoplayground
A: It should be output how many _ids are included by date grouped by date.
B: The number of elements in details in A.
If it has element, count 1. not 0. If the document is as follows, the value counted after excluding from A becomes B
{
_id: ObjectId
details: array //no elements
createdAt: Date
}
C: The count of B becomes C, except when there are specific details.slaesManagerIds among B.
details.salesManagerIds is provided as an array.
For examples,
[ObjecttId("612f57184205db63a3396a9e"), ObjectId("612cb021278f621a222087d7")]
I made query as follows.
https://mongoplayground.net/p/6sBxAmO_31y
It goes well until B. How can I write a query to get C ?
If you write and execute a query that can obtain C through the link above, you should get the following result.
[
{
"A": 2,
"B": 1,
"C": 1,
"_id": "2018-05-19"
},
{
"A": 3,
"B": 3,
"C": 1,
"_id": "2018-05-18"
}
]
use $filter
db.collection.aggregate([
{
$group: {
_id: {
$dateToString: {
format: "%Y-%m-%d",
date: "$createdAt"
}
},
A: {
$sum: 1
},
B: {
$sum: {
$cond: [
{
$and: [
{
$isArray: "$details"
},
{
$gt: [
{
$size: "$details"
},
0
]
}
]
},
1,
0
]
}
},
C: {
$sum: {
$cond: [
{
$and: [
{
$isArray: "$details"
},
{
$gt: [
{
$size: "$details"
},
0
]
},
{
$gt: [
{
$size: {
$filter: {
input: "$details",
as: "d",
cond: {
$and: [
{
$not: [
{
$in: [
"$$d.salesManagerId",
[
ObjectId("612f57184205db63a3396a9e"),
ObjectId("612cb021278f621a222087d7")
]
]
}
]
}
]
}
}
}
},
0
]
}
]
},
1,
0
]
}
}
}
},
{
$sort: {
_id: -1
}
}
])
mongoplayground
I want to get related data based on current item processing.
Sample:
[
{ field1: 1, field2: 2, value: 12 },
{ field1: 1, field2: 2, value: 21 },
{ field1: 1, value: 1 },
{ field2: 2, value: 2 },
{ field1: 2, field2: 3, value: 23 }
];
and result:
[
{
_id: { field1: 1, field2: 2 },
value: [12, 12],
relatedValue: [1, 2], // of item 1 and 2 because field 1 = 1 or field 2 = 2
},
];
Sample query:
db.collectionA.aggregate([
{
$match: { field1: 1 }
},
{
"$group":{
"_id":{
"field1":"$field1",
"field2":"$field2"
},
"alerts":{
"$push":{
"_id":"$_id",
"value":"$value",
"relatedData": {
"$unionWith": {
"coll": "collectionA",
"pipeline": [{
"$match": {
"$or": [
{ "field1": "$field1" },
{ "field2": "$field2" }
]
}
}]
}
}
}
}
}
}
])
I tried run this query but error, Please help me fix or give a solution
// Edited: value should be array because I want to group data by field1, field2 and push all value of group to an array
You're trying to use $unionWith within $group but it is a "pipeline stage" meaning it can't be used like that, the same way you can't use $group within a $group.
Additionally this stage is used to "union" two collections and not to populate data based on value matches ( which it seems you're trying to do here ), for this case you want to use $lookup, like so:
db.collection.aggregate([
{
$lookup: {
from: "collection",
let: {
field1: "$field1",
field2: "$field2",
docId: "$_id"
},
pipeline: [
{
$match: {
$expr: {
$and: [
{
$or: [
{
$eq: [
"$$field1",
"$field1"
]
},
{
$eq: [
"$$field2",
"$field2"
]
}
]
},
{
$ne: [
"$$docId",
"$_id"
]
}
]
}
}
},
{
$project: {
value: 1
}
}
],
as: "relatedData"
}
},
{
$group: {
_id: {
field1: "$field1",
field2: "$field2"
},
values: {
$push: "$value"
},
relatedValue: {
$push: {
$map: {
input: "$relatedData",
in: "$$this.value"
}
}
}
}
},
{
$project: {
field1: "$_id.field1",
field2: "$_id.field2",
values: 1,
relatedValues: {
"$setDifference": [
{
"$reduce": {
input: "$relatedValue",
initialValue: [],
in: {
"$setUnion": [
"$$this",
"$$value"
]
}
}
},
"$values"
]
}
}
}
])
Mongo Playground
I am new to mongodb and wanted to convert my array to object using pipeline. For example,
{
field1: [1,2,3,4,5],
field2: [‘a’,’b’,’c’,’d’,’e’],
}
I want the above document to be converted to,
{
fields: [
{
field1: 1,
field2: ‘a’
},
......
{
field1: 5,
field2: ‘e’
}
]
}
Any idea how I can achieve this?
You can use $unwind to separate your arrays.
And then format your new list with $project without forgetting to remove the duplicates created by the $unwind.
db.collection.aggregate({
"$unwind": {
path: "$field1",
includeArrayIndex: "field1_index"
}
},
{
"$unwind": {
"path": "$field2",
"includeArrayIndex": "field2_index"
}
},
{
"$project": {
"fields": {
"field1": "$field1",
"field2": "$field2"
},
"diff": {
$cmp: [
"$field1_index",
"$field2_index"
]
}
}
},
{
"$match": {
"diff": 0
}
},
{
$group: {
_id: "$_id",
fields: {
$push: "$fields"
}
}
})
Try it here
You can use $zip and $map and $reduce to achieve this:
db.collection.aggregate([
{
"$addFields": {
fields: {
$reduce: {
input: {
$zip: {
inputs: [
{
$map: {
input: "$field1",
as: "f1",
in: {
field1: "$$f1"
}
}
},
{
$map: {
input: "$field2",
as: "f2",
in: {
field2: "$$f2"
}
}
}
]
}
},
initialValue: [],
in: {
"$concatArrays": [
[
{
"$mergeObjects": "$$this"
}
],
"$$value"
]
}
}
}
}
}
])
MongoPlayground
Make sure both field1 and field2 are of equal length or you will lose some data.
I want to group objects in the array by same value for specified field and produce a count.
I have the following mongodb document (non-relevant fields are not present).
{
arrayField: [
{ fieldA: value1, ...otherFields },
{ fieldA: value2, ...otherFields },
{ fieldA: value2, ...otherFields }
],
...otherFields
}
The following is what I want.
{
arrayField: [
{ fieldA: value1, ...otherFields },
{ fieldA: value2, ...otherFields },
{ fieldA: value2, ...otherFields }
],
newArrayField: [
{ fieldA: value1, count: 1 },
{ fieldA: value2, count: 2 },
],
...otherFields
}
Here I grouped embedded documents by fieldA.
I know how to do it with unwind and 2 group stages the following way. (irrelevant stages are ommited)
Concrete example
// document structure
{
_id: ObjectId(...),
type: "test",
results: [
{ choice: "a" },
{ choice: "b" },
{ choice: "a" }
]
}
db.test.aggregate([
{ $match: {} },
{
$unwind: {
path: "$results",
preserveNullAndEmptyArrays: true
}
},
{
$group: {
_id: {
_id: "$_id",
type: "$type",
choice: "$results.choice",
},
count: { $sum: 1 }
}
},
{
$group: {
_id: {
_id: "$_id._id",
type: "$_id.type",
result: "$results.choice",
},
groupedResults: { $push: { count: "$count", choice: "$_id.choice" } }
}
}
])
You can use below aggregation
db.test.aggregate([
{ "$addFields": {
"newArrayField": {
"$map": {
"input": { "$setUnion": ["$arrayField.fieldA"] },
"as": "m",
"in": {
"fieldA": "$$m",
"count": {
"$size": {
"$filter": {
"input": "$arrayField",
"as": "d",
"cond": { "$eq": ["$$d.fieldA", "$$m"] }
}
}
}
}
}
}
}}
])
The below adds a new array field, which is generated by:
Using $setUnion to get unique set of array items, with inner $map to
extract only the choice field
Using $map on the unique set of items,
with inner $reduce on the original array, to sum all items where
choice matches
Pipeline:
db.test.aggregate([{
$addFields: {
newArrayField: {
$map: {
input: {
$setUnion: [{
$map: {
input: "$results",
in: { choice: "$$this.choice" }
}
}
]
},
as: "i",
in: {
choice: '$$i.choice',
count: {
$reduce: {
input: "$results",
initialValue: 0,
in: {
$sum: ["$$value", { $cond: [ { $eq: [ "$$this.choice", "$$i.choice" ] }, 1, 0 ] }]
}
}
}
}
}
}
}
}])
The $reduce will iterate over the results array n times, where n is the number of unique values of choice, so the performance will depend on that.