I am trying to finish up a data aggregation pipeline and having issues getting the data into the correct format. I'm not even sure if this is possible to do in one pipeline.
The original data looks like this:
[
{
answers: {
'question1': 'a',
'question2': 'c',
'question3': ['a','b'],
'question4': 1
},
createdAt: 2022-03-04T07:30:40.517Z,
},
{
answers: {
'question1': 'b',
'question2': 'c',
'question3': ['a','c']
'question4': 2
},
createdAt: 2022-03-04T07:30:40.518Z,
}
]
I've got my pipeline so far with this:
{ $project: {
"answers": { $objectToArray: "$answers" },
"date": { $dateToString: { format: "%Y-%m-%d", date: "$createdAt" }}
}},
{ $unwind: "$answers" },
{ $unwind: "$answers.v" },
{
$group: {
_id: { answers : "$answers", date: "$date"},
c: { $sum: 1 }}
},
and the data now looks like this:
{
_id: {
answers: { k: 'q3', v: 'b' },
date: '2022-03-04'
},
count: 1
},
{
_id: {
answers: { k: 'q3', v: 'a' },
date: '2022-03-04'
},
count: 2
},
{
_id: {
answers: { k: 'q4', v: 1 },
date: '2022-03-04'
},
count: 1
},
{
_id: {
answers: { k: 'q1', v: 'b' },
date: '2022-03-04'
},
count: 1
},
{
_id: {
answers: { k: 'q4', v: 2 },
date: '2022-03-04'
},
count: 1
},
{
_id: {
answers: { k: 'q2', v: 'c' },
date: '2022-03-04'
},
count: 2
},
{
_id: {
answers: { k: 'q3', v: 'c' },
date: '2022-03-04'
},
count: 1
},
{
_id: {
answers: { k: 'q1', v: 'a' },
date: '2022-03-04'
},
count: 1
}
I would like to get a result that looks something like this:
{
'dates': [
{
'date': '2022-03-04',
'q1': { 'a': 1, 'b': 1 }
'q2': { 'c': 2 },
'q3': { 'a': 2, 'b': 1, 'c': 1 },
'q4': { '1': 1, '2': 1 }
}
]
'totals': { // this would be the totals across all the dates
'q1': { 'a': 1, 'b': 1 }
'q2': { 'c': 2 },
'q3': { 'a': 2, 'b': 1, 'c': 1 },
'q4': { '1': 1, '2': 1 }
}
}
any help would be greatly appreciated, even if I can't get both the totals and breakdown in 1 query.
here is the mongoplaygroud I've been working on
Not that simple. An important stage you have to use is $facet in order to get totals and dates
Maybe with $setWindowFields the aggregation pipeline could be a little simpler, but that a quick guess.
db.collection.aggregate([
{
$project: {
_id: 0,
answers: { $objectToArray: "$answers" },
date: { $dateToString: { format: "%Y-%m-%d", date: "$createdAt" } }
}
},
{ $unwind: "$answers" },
{ $unwind: "$answers.v" },
{
$group: {
_id: {
answer: "$answers.v",
question: "$answers.k",
date: "$date"
},
count: { $sum: 1 }
}
},
{
$facet: {
dates: [
{
$group: {
_id: { question: "$_id.question", date: "$_id.date" },
count: {
$push: {
k: { $toString: "$_id.answer" },
v: "$count"
}
}
}
},
{
$group: {
_id: "$_id.date",
count: {
$push: {
k: "$_id.question",
v: { $arrayToObject: "$count" }
}
}
}
},
{
$replaceWith: {
$mergeObjects: [
{ date: "$_id" },
{ $arrayToObject: "$count" }
]
}
}
],
totals: [
{
$group: {
_id: { answer: "$_id.answer", question: "$_id.question" },
v: { $push: "$count" }
}
},
{
$group: {
_id: "$_id.question",
count: {
$push: {
k: { $toString: "$_id.answer" },
v: { $sum: "$v" }
}
}
}
},
{
$project: {
_id: 0,
k: "$_id",
v: { $arrayToObject: "$count" }
}
}
]
}
},
{ $set: { totals: { $arrayToObject: "$totals" } } }
])
Mongo Playground
Related
I am running the following aggregation pipeline:
const agg = [
{
'$match': {
'aaa': 'bbb'
}
}, {
'$group': {
'_id': '',
'total': {
'$sum': '$num'
}
}
}
];
My problem is, when $match matches nothing, the pipeline returns 0 documents. How do I get the pipeline to always return 1 document?
In MongoDB version 6.0 you can do it like this one:
db.collection.aggregate([
{ $match: { aaa: 'bbb' } },
{
$group: {
_id: null,
total: { $sum: "$num" }
}
},
{
$densify: {
field: "total",
range: { step: 1, bounds: [0, 0] }
}
},
{ $set: { _id: { $cond: [{ $eq: [{ $type: "$_id" }, "missing"] }, MaxKey, "$_id"] } } },
{ $sort: { _id: 1 } },
{ $limit: 1 }
])
In version < 6.0 you can try this one:
db.collection.aggregate([
{
$facet: {
data: [
{ $match: { aaa: 'bbb' } },
{ $group: { _id: null, total: { $sum: "$num" } } }
],
default: [
{ $limit: 1 },
{ $group: { _id: null, total: { $sum: 0 } } },
{ $set: { _id: MaxKey } }
]
}
},
{ $replaceWith: { $mergeObjects: [{ $first: "$default" }, { $first: "$data" }] } },
])
Or this one:
db.collection.aggregate([
{ $match: { aaa: 'bbb' } },
{ $group: { _id: null, total: { $sum: "$num" } } },
{
$unionWith: {
coll: "collection",
pipeline: [
{ $limit: 1 },
{ $set: { _id: MaxKey, total: 0 } },
{ $project: { _id: 1, total: 1 } }
]
}
},
{ $sort: { _id: 1 } },
{ $limit: 1 }
])
I need to assign a default value of zero on days with zero repair, but this is the result.
[
{ day: 21, month: 10, year: 2022, count: 2 },
{ day: 28, month: 10, year: 2022, count: 1 },
{ day: 24, month: 10, year: 2022, count: 2 }
]
I just need to access the weekly repair data, 0 should be the default on non-repair days
const result = await Repair.aggregate([
{
$match: {
createdDate: {
$gte: new Date(fromDate),
$lte: new Date(toDate),
},
},
},
{
$group: {
_id: {
day: "$day",
year: "$year",
month: "$month",
},
count: {
$sum: 1,
},
},
},
{
$project: {
_id: 0,
day: "$_id.day",
month: "$_id.month",
year: "$_id.year",
count: "$count",
},
},
]);
Without valid sample input data, it is difficult to give exact solution, but would be like this one:
db.collection.aggregate([
{
$match: {
createdDate: {
$gte: new Date(fromDate),
$lte: new Date(toDate),
},
},
},
{
$group: {
_id: { $dateTrunc: { date: "$createdDate", unit: "day" } },
count: { $sum: 1 },
},
},
{ $set: { createdDate: "$_id" } },
{
$densify: {
field: "createdDate",
range: {
step: 1,
unit: "day",
bounds: "full"
}
}
},
{
$fill: {
sortBy: { createdDate: 1 },
output: { count: { value: 0 } }
}
}
]);
Mongo Playground
Update
With MongoDB version 5 the code is a bit more complex. Would be this one:
db.collection.aggregate([
{
$match: {
createdDate: {
$gt: new Date("2022-10-23T00:00:00.000Z"),
$lt: new Date("2022-10-30T00:00:00.000Z")
}
}
},
{
$facet: {
repairs: [
{
$group: {
_id: { $dateTrunc: { date: "$createdDate", unit: "day" } },
count: { $count: {} }
}
},
{
$project: {
date: "$_id",
count: "$count",
_id: 0
}
}
]
}
},
{
$set: {
allDays: {
$range: [
0,
{
$add: [
{
$dateDiff: {
startDate: { $min: "$repairs.date" },
endDate: { $max: "$repairs.date" },
/*
or
startDate: new Date("2022-10-23T00:00:00.000Z"),
endDate: new Date("2022-10-30T00:00:00.000Z"),
*/
unit: "day",
}
},
1
]
}
]
}
}
},
{
$set: {
allDays: {
$map: {
input: "$allDays",
in: {
$dateAdd: {
startDate: { $min: "$repairs.date" },
unit: "day",
amount: "$$this"
}
}
}
}
}
},
{
$project: {
repairs: {
$map: {
input: "$allDays",
as: "day",
in: {
$mergeObjects: [
{ date: "$$day", count: 0 },
{
$first: {
$filter: {
input: "$repairs",
cond: {
$eq: [
"$$day",
"$$repairs.date"
]
},
as: "repairs"
}
}
}
]
}
}
}
}
},
{
$project: {
repairs: {
$map: {
input: "$repairs",
in: "$$this.count"
}
}
}
}
])
Mongo Playground
The result cannot be simple [ 2, 0, 0, 0, 1, 2 ], the result is always a JSON document, i.e. field and values. But you can do
db.collection.aggregate([...]).toArray().shift().repairs
the following data in my mongo-db
Input:
[
{
'id':1,
"year": "2022-10-01",
"Area":{
"Education":'Engineering'
}
},
{
'id':2,
"year": "2022-10-01",
"Area":{
"Education":'Commerce'
}
},
{
'id':3,
"year": "2022-10-01",
"Area":{
"Education":'Arts'
}
},
{
'id':4,
"year": "2022-10-01",
"Area":{
"Education":'Arts'
}
},
{
'id':5,
"year": "2022-01-01",
"Area":{
"Education":'Engineering'
}
},
{
'id':6,
"year": "2022-01-01",
"Area":{
"Education":'Engineering'
}
}
]
there are records of several years based on each Education field, final result to be in this form where first is grouped by date and then grouped by Education field and the count of each education field in each year
Outcome in this manner:
{
"2022-10-01": {
"Education": {
"Engineering": 1,
"Commerce": 1,
"Arts": 2
}
},
"2021-01-01": {
"Education": {
"Engineering": 2,
"Commerce": 0,
"Arts": 0
}
}
}
Try this one:
db.collection.aggregate([
{ $group: { _id: { year: "$year", Education: "$Area.Education" }, count: { $sum: 1 } } },
{ $group: { _id: "$_id.year", Education: { $push: { k: "$_id.Education", v: "$count" } } } },
{ $set: { k: "$_id", Education: { $arrayToObject: "$Education" } } },
{ $group: { _id: null, root: { $push: { k: "$_id", v: { Education: "$Education" } } } } },
{ $replaceWith: { $arrayToObject: "$root" } }
])
Mongo Playground
It is a bit difficult to count values which do not exist, because there could be infinite numbers of them. Maybe like this:
db.collection.aggregate([
{ $group: { _id: { year: "$year", Education: "$Area.Education" }, count: { $sum: 1 } } },
{ $group: { _id: "$_id.year", Education: { $push: { k: "$_id.Education", v: "$count" } } } },
{
$set: {
Education: {
$map: {
input: ["Engineering", "Arts", "Commerce"],
in: {
k: "$$this",
v: {
$let: {
vars: {
val: {
$first: {
$filter: {
input: "$Education",
as: "area",
cond: { $eq: ["$$this", "$$area.k"] }
}
}
}
},
in: { $ifNull: ["$$val.v", 0] }
}
}
}
}
}
}
},
{ $set: { k: "$_id", Education: { $arrayToObject: "$Education" } } },
{ $group: { _id: null, root: { $push: { k: "$_id", v: { Education: "$Education" } } } } },
{ $replaceWith: { $arrayToObject: "$root" } }
])
code currently in use
db.events.aggregate([
{ '$match':
{ thingId: 'node_0002',
eventState: 'test',
deviceId: {'$in': ['ARCT-1', 'ARCT-2', 'ARSS-1', 'ARSS-2', 'ARSW-1', 'ARCO-1']},
collectTimeText: {'$gte': '2022-04-01T00:00:00+09:00', '$lte': '2022-04-19T23:59:59+09:00' }}},
{ '$group': {
_id: {'$dateFromString': { dateString: { '$substr': [ '$collectTimeText', 0, 10 ] }}
},
list: { '$addToSet': {'deviceId':'$deviceId'} }},
},
{'$unwind':'$list'},
{ '$group': {
_id: {'$dateToString': {'format': "%Y-%m", date: "$_id"}
},
list: { '$push': '$list' }},
},
{'$sort': { _id: 1 } }], {})
result of using the code
{ _id: '2022-04',
list:
[ { deviceId: 'ARCT-2' },
{ deviceId: 'ARSS-1' },
{ deviceId: 'ARCT-1' },
{ deviceId: 'ARSW-1' },
{ deviceId: 'ARCO-1' },
{ deviceId: 'ARSS-2' },
{ deviceId: 'ARCT-2' },
{ deviceId: 'ARSS-1' },
{ deviceId: 'ARCT-1' },
{ deviceId: 'ARSW-1' },
{ deviceId: 'ARCO-1' },
{ deviceId: 'ARSS-2' },
{ deviceId: 'ARCT-2' },
{ deviceId: 'ARSS-1' },
{ deviceId: 'ARCT-1' },
{ deviceId: 'ARSW-1' },
{ deviceId: 'ARCO-1' },
{ deviceId: 'ARSS-2' } ] }
I want an output like the code below. Help
{ _id: '2022-04',
list:
{ 'ARCO-1': 3,
'ARCT-1': 3,
'ARSS-1': 3,
'ARCT-2': 3,
'ARSS-2': 3,
'ARSW-1': 3 } }
This is the code for generating statistics.
How do I get the results I want?
Did I write the code wrong in the first place?
When I added $project , I got the following result. How to check the count
db.events.aggregate([
{ '$match':
{ thingId: 'node_0002',
eventState: 'test',
deviceId: {'$in': ['ARCT-1', 'ARCT-2', 'ARSS-1', 'ARSS-2', 'ARSW-1', 'ARCO-1']},
collectTimeText: {'$gte': '2022-04-01T00:00:00+09:00', '$lte': '2022-04-19T23:59:59+09:00' }}},
{ '$group': {
_id: {'$dateFromString': { dateString: { '$substr': [ '$collectTimeText', 0, 10 ] }}
},
list: { '$addToSet': {'deviceId':'$deviceId'} }},
},
{'$unwind':'$list'},
{ '$group': {
_id: {'$dateToString': {'format': "%Y-%m", date: "$_id"}
},
list: { '$push': '$list' }},
},
{'$project':{
'list':{
'$arrayToObject':{
'$map': {
'input': '$list',
'as': 'el',
'in': {
'k': '$$el.deviceId',
'v': {'$sum':1}
}
}
}
}
}},
{'$sort': { _id: 1 } }], {})
Below is the result of adding $project.
{ _id: '2022-04',
list:
{ 'ARCO-1': 1,
'ARCT-1': 1,
'ARSS-1': 1,
'ARCT-2': 1,
'ARSS-2': 1,
'ARSW-1': 1 } }
I don't know what to do.... It seems like I'm almost there, but I don't know...
Maybe not all...?
$group twice and then $arrayToObject
db.collection.aggregate([
{
$group: {
_id: {
date: "$date",
deviceId: "$deviceId"
},
count: {
$sum: 1
}
}
},
{
$group: {
_id: "$_id.date",
list: {
$push: {
k: "$_id.deviceId",
v: "$count"
}
}
}
},
{
$set: {
list: {
$arrayToObject: "$list"
}
}
}
])
mongoplayground
I am trying to write a query to get all of the results of some survey data stored in a mongo. The tricky part is some questions are radio questions with a single answer, and some questions are multi-select type questions, some are values that need to be averaged, so I want to perform different aggregations depending on the type of question.
The results are stored in a schema like this, with each item in the array being a survey response.
[
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547")
},
answers: {
'question1': 'a',
'question2': 'a',
'question3': ['a','c'],
'question4': 3
},
createdAt: 2022-03-03T07:30:40.517Z,
},
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547"),
},
answers: {
'question1': 'a',
'question2': 'b',
'question3': ['a','c'],
'question4': 2
},
createdAt: 2022-03-03T07:30:40.518Z,
},
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547"),
},
answers: {
'question1': 'b',
'question2': 'c',
'question3': ['b']
'question4': 1
},
createdAt: 2022-03-03T07:30:40.518Z,
}
]
question1 and question2 are radio questions, so there can be only 1 answer, whereas question 3 is a multi-select, so the user can have multiple answers. Question 4 is a value that needs to be averaged.
I think there is some way to accomplish this in a single aggregation pipeline with some combination of facets, grouping, filters, projections, etc, but I am stuck.
I'd like to get a final result that looks like this
{
'question1' : {
'a' : 2,
'b' : 1
},
'question2' : {
'a' : 1,
'b' : 1,
'c' : 1,
},
'question3' : {
'a' : 2,
'b' : 1,
'c' : 2,
},
'question4' : 2 //avg (3+2+1)/3
}
OR even better:
{
'radio': {
'question1' : {
'a' : 2,
'b' : 1
},
'question2' : {
'a' : 1,
'b' : 1,
'c' : 1,
},
},
'multi': {
'question3' : {
'a' : 2,
'b' : 1,
'c' : 2,
}
},
'avg' : {
'question4' : 2
}
}
My pipeline would look something like this:
Response.aggregate([
{ $match: { 'metaData.survey': surveyId} }, // filter only for the specific survey
{ $project: { // I assume I have to turn the answers into an array
"answers": { $objectToArray: "$answers" },
"createdAt": "$createdAt"
}
},
// maybe facet here?
// conceptually, In the next stage I'd want to bucket the questions
// by type with something like below, then perform the right type of
// aggregation depending on the question type
// if $in [$$answers.k ['question1, 'question2']] group by k, v and count
// if $in [$$answers.k ['question3']] unwind and count each unique value?
// { $facet : { radio: [], multi:[]}}
])
Basically, I know which question Id is a radio or a multi-select, I'm just trying to figure out how to format the pipeline to achieve the desired output based on the questionId being in a known array.
Bonus points if I can figure out how to also group the by day/month based on the createdAt time
db.collection.aggregate([
{
$match: {}
},
{
$project: { answers: { $objectToArray: "$answers" } }
},
{
$unwind: "$answers"
},
{
$unwind: "$answers.v"
},
{
$group: {
_id: "$answers",
c: { "$sum": 1 }
}
},
{
$group: {
_id: "$_id.k",
v: { "$push": { k: "$_id.v", v: "$c" } }
}
},
{
$group: {
_id: null,
v: { "$push": { k: "$_id", v: { "$arrayToObject": "$v" } } }
}
},
{
$set: { v: { $arrayToObject: "$v" } }
},
{
$replaceWith: "$v"
}
])
mongoplayground
db.collection.aggregate([
{
$match: {}
},
{
$project: { answers: { $objectToArray: "$answers" } }
},
{
$unwind: "$answers"
},
{
$set: {
"answers.type": {
$switch: {
branches: [
{
case: { $isArray: "$answers.v" },
then: "multi"
},
{
case: { $eq: [ { $type: "$answers.v" }, "string" ] },
then: "radio"
},
{
case: { $isNumber: "$answers.v" },
then: "avg"
}
],
default: "other"
}
}
}
},
{
$unwind: "$answers.v"
},
{
$group: {
_id: "$answers",
c: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.k",
type: { $first: "$_id.type" },
v: {
$push: {
k: { $toString: "$_id.v" },
v: "$c"
}
}
}
},
{
$group: {
_id: "$type",
v: {
$push: {
k: "$_id",
v: { $arrayToObject: "$v" }
}
}
}
},
{
$group: {
_id: null,
v: {
$push: {
k: "$_id",
v: { $arrayToObject: "$v" }
}
}
}
},
{
$set: { v: { $arrayToObject: "$v" } }
},
{
$replaceWith: "$v"
},
{
$set: {
avg: {
$arrayToObject: {
$map: {
input: { $objectToArray: "$avg" },
as: "s",
in: {
k: "$$s.k",
v: {
$avg: {
$map: {
input: { $objectToArray: "$$s.v" },
as: "x",
in: { $multiply: [ { $toInt: "$$x.k" }, "$$x.v" ] }
}
}
}
}
}
}
}
}
}
])
mongoplayground