Mongo DB aggregation result in the provided manner - mongodb

the following data in my mongo-db
Input:
[
{
'id':1,
"year": "2022-10-01",
"Area":{
"Education":'Engineering'
}
},
{
'id':2,
"year": "2022-10-01",
"Area":{
"Education":'Commerce'
}
},
{
'id':3,
"year": "2022-10-01",
"Area":{
"Education":'Arts'
}
},
{
'id':4,
"year": "2022-10-01",
"Area":{
"Education":'Arts'
}
},
{
'id':5,
"year": "2022-01-01",
"Area":{
"Education":'Engineering'
}
},
{
'id':6,
"year": "2022-01-01",
"Area":{
"Education":'Engineering'
}
}
]
there are records of several years based on each Education field, final result to be in this form where first is grouped by date and then grouped by Education field and the count of each education field in each year
Outcome in this manner:
{
"2022-10-01": {
"Education": {
"Engineering": 1,
"Commerce": 1,
"Arts": 2
}
},
"2021-01-01": {
"Education": {
"Engineering": 2,
"Commerce": 0,
"Arts": 0
}
}
}

Try this one:
db.collection.aggregate([
{ $group: { _id: { year: "$year", Education: "$Area.Education" }, count: { $sum: 1 } } },
{ $group: { _id: "$_id.year", Education: { $push: { k: "$_id.Education", v: "$count" } } } },
{ $set: { k: "$_id", Education: { $arrayToObject: "$Education" } } },
{ $group: { _id: null, root: { $push: { k: "$_id", v: { Education: "$Education" } } } } },
{ $replaceWith: { $arrayToObject: "$root" } }
])
Mongo Playground
It is a bit difficult to count values which do not exist, because there could be infinite numbers of them. Maybe like this:
db.collection.aggregate([
{ $group: { _id: { year: "$year", Education: "$Area.Education" }, count: { $sum: 1 } } },
{ $group: { _id: "$_id.year", Education: { $push: { k: "$_id.Education", v: "$count" } } } },
{
$set: {
Education: {
$map: {
input: ["Engineering", "Arts", "Commerce"],
in: {
k: "$$this",
v: {
$let: {
vars: {
val: {
$first: {
$filter: {
input: "$Education",
as: "area",
cond: { $eq: ["$$this", "$$area.k"] }
}
}
}
},
in: { $ifNull: ["$$val.v", 0] }
}
}
}
}
}
}
},
{ $set: { k: "$_id", Education: { $arrayToObject: "$Education" } } },
{ $group: { _id: null, root: { $push: { k: "$_id", v: { Education: "$Education" } } } } },
{ $replaceWith: { $arrayToObject: "$root" } }
])

Related

How to get this pipeline to return exactly one document?

I am running the following aggregation pipeline:
const agg = [
{
'$match': {
'aaa': 'bbb'
}
}, {
'$group': {
'_id': '',
'total': {
'$sum': '$num'
}
}
}
];
My problem is, when $match matches nothing, the pipeline returns 0 documents. How do I get the pipeline to always return 1 document?
In MongoDB version 6.0 you can do it like this one:
db.collection.aggregate([
{ $match: { aaa: 'bbb' } },
{
$group: {
_id: null,
total: { $sum: "$num" }
}
},
{
$densify: {
field: "total",
range: { step: 1, bounds: [0, 0] }
}
},
{ $set: { _id: { $cond: [{ $eq: [{ $type: "$_id" }, "missing"] }, MaxKey, "$_id"] } } },
{ $sort: { _id: 1 } },
{ $limit: 1 }
])
In version < 6.0 you can try this one:
db.collection.aggregate([
{
$facet: {
data: [
{ $match: { aaa: 'bbb' } },
{ $group: { _id: null, total: { $sum: "$num" } } }
],
default: [
{ $limit: 1 },
{ $group: { _id: null, total: { $sum: 0 } } },
{ $set: { _id: MaxKey } }
]
}
},
{ $replaceWith: { $mergeObjects: [{ $first: "$default" }, { $first: "$data" }] } },
])
Or this one:
db.collection.aggregate([
{ $match: { aaa: 'bbb' } },
{ $group: { _id: null, total: { $sum: "$num" } } },
{
$unionWith: {
coll: "collection",
pipeline: [
{ $limit: 1 },
{ $set: { _id: MaxKey, total: 0 } },
{ $project: { _id: 1, total: 1 } }
]
}
},
{ $sort: { _id: 1 } },
{ $limit: 1 }
])

mongoose complex aggregation pipeline question

I am trying to finish up a data aggregation pipeline and having issues getting the data into the correct format. I'm not even sure if this is possible to do in one pipeline.
The original data looks like this:
[
{
answers: {
'question1': 'a',
'question2': 'c',
'question3': ['a','b'],
'question4': 1
},
createdAt: 2022-03-04T07:30:40.517Z,
},
{
answers: {
'question1': 'b',
'question2': 'c',
'question3': ['a','c']
'question4': 2
},
createdAt: 2022-03-04T07:30:40.518Z,
}
]
I've got my pipeline so far with this:
{ $project: {
"answers": { $objectToArray: "$answers" },
"date": { $dateToString: { format: "%Y-%m-%d", date: "$createdAt" }}
}},
{ $unwind: "$answers" },
{ $unwind: "$answers.v" },
{
$group: {
_id: { answers : "$answers", date: "$date"},
c: { $sum: 1 }}
},
and the data now looks like this:
{
_id: {
answers: { k: 'q3', v: 'b' },
date: '2022-03-04'
},
count: 1
},
{
_id: {
answers: { k: 'q3', v: 'a' },
date: '2022-03-04'
},
count: 2
},
{
_id: {
answers: { k: 'q4', v: 1 },
date: '2022-03-04'
},
count: 1
},
{
_id: {
answers: { k: 'q1', v: 'b' },
date: '2022-03-04'
},
count: 1
},
{
_id: {
answers: { k: 'q4', v: 2 },
date: '2022-03-04'
},
count: 1
},
{
_id: {
answers: { k: 'q2', v: 'c' },
date: '2022-03-04'
},
count: 2
},
{
_id: {
answers: { k: 'q3', v: 'c' },
date: '2022-03-04'
},
count: 1
},
{
_id: {
answers: { k: 'q1', v: 'a' },
date: '2022-03-04'
},
count: 1
}
I would like to get a result that looks something like this:
{
'dates': [
{
'date': '2022-03-04',
'q1': { 'a': 1, 'b': 1 }
'q2': { 'c': 2 },
'q3': { 'a': 2, 'b': 1, 'c': 1 },
'q4': { '1': 1, '2': 1 }
}
]
'totals': { // this would be the totals across all the dates
'q1': { 'a': 1, 'b': 1 }
'q2': { 'c': 2 },
'q3': { 'a': 2, 'b': 1, 'c': 1 },
'q4': { '1': 1, '2': 1 }
}
}
any help would be greatly appreciated, even if I can't get both the totals and breakdown in 1 query.
here is the mongoplaygroud I've been working on
Not that simple. An important stage you have to use is $facet in order to get totals and dates
Maybe with $setWindowFields the aggregation pipeline could be a little simpler, but that a quick guess.
db.collection.aggregate([
{
$project: {
_id: 0,
answers: { $objectToArray: "$answers" },
date: { $dateToString: { format: "%Y-%m-%d", date: "$createdAt" } }
}
},
{ $unwind: "$answers" },
{ $unwind: "$answers.v" },
{
$group: {
_id: {
answer: "$answers.v",
question: "$answers.k",
date: "$date"
},
count: { $sum: 1 }
}
},
{
$facet: {
dates: [
{
$group: {
_id: { question: "$_id.question", date: "$_id.date" },
count: {
$push: {
k: { $toString: "$_id.answer" },
v: "$count"
}
}
}
},
{
$group: {
_id: "$_id.date",
count: {
$push: {
k: "$_id.question",
v: { $arrayToObject: "$count" }
}
}
}
},
{
$replaceWith: {
$mergeObjects: [
{ date: "$_id" },
{ $arrayToObject: "$count" }
]
}
}
],
totals: [
{
$group: {
_id: { answer: "$_id.answer", question: "$_id.question" },
v: { $push: "$count" }
}
},
{
$group: {
_id: "$_id.question",
count: {
$push: {
k: { $toString: "$_id.answer" },
v: { $sum: "$v" }
}
}
}
},
{
$project: {
_id: 0,
k: "$_id",
v: { $arrayToObject: "$count" }
}
}
]
}
},
{ $set: { totals: { $arrayToObject: "$totals" } } }
])
Mongo Playground

mongo / mongoose aggregation pipeline query for survey data

I am trying to write a query to get all of the results of some survey data stored in a mongo. The tricky part is some questions are radio questions with a single answer, and some questions are multi-select type questions, some are values that need to be averaged, so I want to perform different aggregations depending on the type of question.
The results are stored in a schema like this, with each item in the array being a survey response.
[
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547")
},
answers: {
'question1': 'a',
'question2': 'a',
'question3': ['a','c'],
'question4': 3
},
createdAt: 2022-03-03T07:30:40.517Z,
},
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547"),
},
answers: {
'question1': 'a',
'question2': 'b',
'question3': ['a','c'],
'question4': 2
},
createdAt: 2022-03-03T07:30:40.518Z,
},
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547"),
},
answers: {
'question1': 'b',
'question2': 'c',
'question3': ['b']
'question4': 1
},
createdAt: 2022-03-03T07:30:40.518Z,
}
]
question1 and question2 are radio questions, so there can be only 1 answer, whereas question 3 is a multi-select, so the user can have multiple answers. Question 4 is a value that needs to be averaged.
I think there is some way to accomplish this in a single aggregation pipeline with some combination of facets, grouping, filters, projections, etc, but I am stuck.
I'd like to get a final result that looks like this
{
'question1' : {
'a' : 2,
'b' : 1
},
'question2' : {
'a' : 1,
'b' : 1,
'c' : 1,
},
'question3' : {
'a' : 2,
'b' : 1,
'c' : 2,
},
'question4' : 2 //avg (3+2+1)/3
}
OR even better:
{
'radio': {
'question1' : {
'a' : 2,
'b' : 1
},
'question2' : {
'a' : 1,
'b' : 1,
'c' : 1,
},
},
'multi': {
'question3' : {
'a' : 2,
'b' : 1,
'c' : 2,
}
},
'avg' : {
'question4' : 2
}
}
My pipeline would look something like this:
Response.aggregate([
{ $match: { 'metaData.survey': surveyId} }, // filter only for the specific survey
{ $project: { // I assume I have to turn the answers into an array
"answers": { $objectToArray: "$answers" },
"createdAt": "$createdAt"
}
},
// maybe facet here?
// conceptually, In the next stage I'd want to bucket the questions
// by type with something like below, then perform the right type of
// aggregation depending on the question type
// if $in [$$answers.k ['question1, 'question2']] group by k, v and count
// if $in [$$answers.k ['question3']] unwind and count each unique value?
// { $facet : { radio: [], multi:[]}}
])
Basically, I know which question Id is a radio or a multi-select, I'm just trying to figure out how to format the pipeline to achieve the desired output based on the questionId being in a known array.
Bonus points if I can figure out how to also group the by day/month based on the createdAt time
db.collection.aggregate([
{
$match: {}
},
{
$project: { answers: { $objectToArray: "$answers" } }
},
{
$unwind: "$answers"
},
{
$unwind: "$answers.v"
},
{
$group: {
_id: "$answers",
c: { "$sum": 1 }
}
},
{
$group: {
_id: "$_id.k",
v: { "$push": { k: "$_id.v", v: "$c" } }
}
},
{
$group: {
_id: null,
v: { "$push": { k: "$_id", v: { "$arrayToObject": "$v" } } }
}
},
{
$set: { v: { $arrayToObject: "$v" } }
},
{
$replaceWith: "$v"
}
])
mongoplayground
db.collection.aggregate([
{
$match: {}
},
{
$project: { answers: { $objectToArray: "$answers" } }
},
{
$unwind: "$answers"
},
{
$set: {
"answers.type": {
$switch: {
branches: [
{
case: { $isArray: "$answers.v" },
then: "multi"
},
{
case: { $eq: [ { $type: "$answers.v" }, "string" ] },
then: "radio"
},
{
case: { $isNumber: "$answers.v" },
then: "avg"
}
],
default: "other"
}
}
}
},
{
$unwind: "$answers.v"
},
{
$group: {
_id: "$answers",
c: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.k",
type: { $first: "$_id.type" },
v: {
$push: {
k: { $toString: "$_id.v" },
v: "$c"
}
}
}
},
{
$group: {
_id: "$type",
v: {
$push: {
k: "$_id",
v: { $arrayToObject: "$v" }
}
}
}
},
{
$group: {
_id: null,
v: {
$push: {
k: "$_id",
v: { $arrayToObject: "$v" }
}
}
}
},
{
$set: { v: { $arrayToObject: "$v" } }
},
{
$replaceWith: "$v"
},
{
$set: {
avg: {
$arrayToObject: {
$map: {
input: { $objectToArray: "$avg" },
as: "s",
in: {
k: "$$s.k",
v: {
$avg: {
$map: {
input: { $objectToArray: "$$s.v" },
as: "x",
in: { $multiply: [ { $toInt: "$$x.k" }, "$$x.v" ] }
}
}
}
}
}
}
}
}
}
])
mongoplayground

Mongoose subquery

I have a collection that looks like below:
[
{
"orderNum": "100",
"createdTime": ISODate("2020-12-01T21:00:00.000Z"),
"amount": 100,
"memo": "100memo",
"list": [
1
]
},
{
"orderNum": "200",
"createdTime": ISODate("2020-12-01T21:01:00.000Z"),
"amount": 200,
"memo": "200memo",
"list": [
1,
2
]
},
{
"orderNum": "300",
"createdTime": ISODate("2020-12-01T21:02:00.000Z"),
"amount": 300,
"memo": "300memo"
},
{
"orderNum": "400",
"createdTime": ISODate("2020-12-01T21:03:00.000Z"),
"amount": 400,
"memo": "400memo"
},
]
and I'm trying to get the total amount of orders that were created before order# 300 (so order#100 and #200, total amount is 300).
Does anyone know how to get it via Mongoose?
You can use this one:
db.collection.aggregate([
{ $sort: { orderNum: 1 } }, // by default the order of documents in a collection is undetermined
{ $group: { _id: null, data: { $push: "$$ROOT" } } }, // put all documents into one document
{ $set: { data: { $slice: ["$data", { $indexOfArray: ["$data.orderNum", "300"] }] } } }, // cut desired elementes from array
{ $unwind: "$data" }, // transform back to documents
{ $replaceRoot: { newRoot: "$data" } },
{ $group: { _id: null, total_amount: { $sum: "$amount" } } } // make summary
])
Actually it is not needed to $unwind and $group, so the shortcut would be this:
db.collection.aggregate([
{ $sort: { orderNum: 1 } },
{ $group: { _id: null, data: { $push: "$$ROOT" } } },
{ $set: { data: { $slice: ["$data", { $indexOfArray: ["$data.orderNum", "300"] }] } } },
{ $project: { total_amount: { $sum: "$data.amount" } } }
])
But the answer from #turivishal is even better.
Update for additional field
{
$set: {
data: { $slice: ["$data", { $indexOfArray: ["$data.orderNum", "300"] }] },
memo: { $arrayElemAt: [ "$data.memo", { $indexOfArray: ["$data.orderNum", "300"] } ] }
}
}
or
{ $set: { data: { $slice: ["$data", { $indexOfArray: ["$data.orderNum", "300"] }] } } },
{ $set: { memo: { $last: { "$data.memo" } } },
$match orderNum less than 300
$group by null and get totalAmount using $sum of amount
YourSchemaModel.aggregate([
{ $match: { orderNum: { $lt: "300" } } },
{
$group: {
_id: null,
totalAmount: { $sum: "$amount" }
}
}
])
Playground

Sum of two int array in aggregation query grouped by another field

I'm new to MongoDB, and having some problems. My document contains fixed size int array that i should sum.
Can mongo sum two int array in a query grouped by another field? In my case, string date.
example data:
{d:[1,2], date:"17-01-2020"} {d:[3,4], date:"17-01-2020"} {d:[5,6], date:"18-01-2020"}
query result that i want:
{d:[4, 6], date:"17-01-2020"} {d:[5,6], date:"18-01-2020"}
If you have fixes size of two values then you can use this one:
db.collection.aggregate([
{ $group: { _id: null, data: { $push: "$d" } } },
{
$set: {
d: [
{$sum:{ $map: { input: "$data", in: { $first: "$$this" } } }},
{$sum:{ $map: { input: "$data", in: { $last: "$$this" } } }}
]
}
},
{$unset: "data"}
])
For any length use this one:
db.collection.aggregate([
{ $group: { _id: null, data: { $push: "$d" } } },
{
$set: {
d: {
$map: {
input: { $range: [0, { $size: { $first: "$data" } }] },
as: "idx",
in: { $sum: { $map: { input: "$data", in: { $arrayElemAt: ["$$this", "$$idx"] } } } }
}
}
}
}
])
The first array determines the length of all the other arrays.
Another approach is this one:
db.collection.aggregate([
{ $unwind: { path: "$d", includeArrayIndex: "idx" } },
{ $group: { _id: "$idx", d: { $sum: "$d" } } },
{ $sort: { _id: 1 } },
{ $group: { _id: null, d: { $push: "$d" } } }
])
Or if you like to add other fields:
db.collection.aggregate([
{ $unwind: { path: "$d", includeArrayIndex: "idx" } },
{ $group: { _id: "$idx", d: { $sum: "$d" }, date: { $first: "$date" } } },
{ $sort: { _id: 1 } },
{ $group: { _id: "$date", d: { $push: "$d" } } },
{ $project: { d: 1, date: "$_id" } }
])
This works even if the arrays do not all have the same length.