Mongo groupby three fields, get unique count for one field

Mongo groupby three fields, get unique count for one field - mongodb

I have records stored in my collection as follows:
{
"sessionId" : "f960e3db-838c-42aa-95ce-a807096f7036",
"date" : "12-02-2020",
"hour" : "13",
"month" : "02",
"time" : "13:46:50",
"weekDay" : "Wednesday",
}
I want to group the above records by 'date', 'hour', getting the number of unique'sessionId' per hour. Something like below:
{
"12-02-2020": {
00: 23,//hour:unique number of sessions in that hour
01: 3,
04: 33,
05: 0,
10: 1
},
"13-02-2020": {
00: 2,//hour:unique number of sessions in that hour
03: 33,
09: 23,
05: 6,
10: 1
}
}
Can anyone please formulate the query for the above?

It is often a challenge when you desire dynamic field names and arrays, I found this solution:
db.collection.aggregate([
// group by hour
{
$group: {
_id: { date: "$date", hour: "$hour" },
sessions: { $addToSet: "$sessionId" }
}
},
// count the sessions
{ $set: { sessions: { $size: "$sessions" } } },
// group by day
{
$group: {
_id: "$_id.date",
hour: { $push: "$_id.hour" },
sessions: { $push: "$sessions" }
}
},
// transform result
{ $set: { data: { $range: [0, { $size: "$hour" }] } } },
{
$set: {
data: {
$map: {
input: "$data",
as: "idx",
in: {
k: { $arrayElemAt: ["$hour", "$$idx"] },
v: { $arrayElemAt: ["$sessions", "$$idx"] }
}
}
}
}
},
// transform day and hour values
{ $set: { v: { $arrayToObject: "$data" } } },
{ $project: { data: { k: "$_id", v: "$v" } } },
{ $set: { data: { $arrayToObject: "$data" } } },
{ $replaceRoot: { newRoot: "$data" } }
])
Mongo playground

You can try as below :
db.collection.aggregate([
/** group based on session & date & hour to get unique docs based on session */
{ $group: { _id: { session: "$sessionId", date: "$date", hour: "$hour" } } },
/** group on date & hour & count no.of docs */
{
$group: {
_id: { date: "$_id.date", hour: "$_id.hour" },
count: { $sum: 1 }
}
},
/** transform into entire data of each doc into data field with converting ['k':k..., 'v':v...] into {k,v} */
{
$project: {
data: {
$arrayToObject: [
[
{
k: "$_id.date",
v: { $arrayToObject: [[{ k: "$_id.hour", v: "$count" }]] }
}
]
]
}
}
},
/** replace root of each doc with new root as data */
{
$replaceRoot: {
newRoot: "$data"
}
}
]);
Test : MongoDB-Playground

Related

mongo / mongoose aggregation pipeline query for survey data

I am trying to write a query to get all of the results of some survey data stored in a mongo. The tricky part is some questions are radio questions with a single answer, and some questions are multi-select type questions, some are values that need to be averaged, so I want to perform different aggregations depending on the type of question.
The results are stored in a schema like this, with each item in the array being a survey response.
[
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547")
},
answers: {
'question1': 'a',
'question2': 'a',
'question3': ['a','c'],
'question4': 3
},
createdAt: 2022-03-03T07:30:40.517Z,
},
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547"),
},
answers: {
'question1': 'a',
'question2': 'b',
'question3': ['a','c'],
'question4': 2
},
createdAt: 2022-03-03T07:30:40.518Z,
},
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547"),
},
answers: {
'question1': 'b',
'question2': 'c',
'question3': ['b']
'question4': 1
},
createdAt: 2022-03-03T07:30:40.518Z,
}
]
question1 and question2 are radio questions, so there can be only 1 answer, whereas question 3 is a multi-select, so the user can have multiple answers. Question 4 is a value that needs to be averaged.
I think there is some way to accomplish this in a single aggregation pipeline with some combination of facets, grouping, filters, projections, etc, but I am stuck.
I'd like to get a final result that looks like this
{
'question1' : {
'a' : 2,
'b' : 1
},
'question2' : {
'a' : 1,
'b' : 1,
'c' : 1,
},
'question3' : {
'a' : 2,
'b' : 1,
'c' : 2,
},
'question4' : 2 //avg (3+2+1)/3
}
OR even better:
{
'radio': {
'question1' : {
'a' : 2,
'b' : 1
},
'question2' : {
'a' : 1,
'b' : 1,
'c' : 1,
},
},
'multi': {
'question3' : {
'a' : 2,
'b' : 1,
'c' : 2,
}
},
'avg' : {
'question4' : 2
}
}
My pipeline would look something like this:
Response.aggregate([
{ $match: { 'metaData.survey': surveyId} }, // filter only for the specific survey
{ $project: { // I assume I have to turn the answers into an array
"answers": { $objectToArray: "$answers" },
"createdAt": "$createdAt"
}
},
// maybe facet here?
// conceptually, In the next stage I'd want to bucket the questions
// by type with something like below, then perform the right type of
// aggregation depending on the question type
// if $in [$$answers.k ['question1, 'question2']] group by k, v and count
// if $in [$$answers.k ['question3']] unwind and count each unique value?
// { $facet : { radio: [], multi:[]}}
])
Basically, I know which question Id is a radio or a multi-select, I'm just trying to figure out how to format the pipeline to achieve the desired output based on the questionId being in a known array.
Bonus points if I can figure out how to also group the by day/month based on the createdAt time

db.collection.aggregate([
{
$match: {}
},
{
$project: { answers: { $objectToArray: "$answers" } }
},
{
$unwind: "$answers"
},
{
$unwind: "$answers.v"
},
{
$group: {
_id: "$answers",
c: { "$sum": 1 }
}
},
{
$group: {
_id: "$_id.k",
v: { "$push": { k: "$_id.v", v: "$c" } }
}
},
{
$group: {
_id: null,
v: { "$push": { k: "$_id", v: { "$arrayToObject": "$v" } } }
}
},
{
$set: { v: { $arrayToObject: "$v" } }
},
{
$replaceWith: "$v"
}
])
mongoplayground
db.collection.aggregate([
{
$match: {}
},
{
$project: { answers: { $objectToArray: "$answers" } }
},
{
$unwind: "$answers"
},
{
$set: {
"answers.type": {
$switch: {
branches: [
{
case: { $isArray: "$answers.v" },
then: "multi"
},
{
case: { $eq: [ { $type: "$answers.v" }, "string" ] },
then: "radio"
},
{
case: { $isNumber: "$answers.v" },
then: "avg"
}
],
default: "other"
}
}
}
},
{
$unwind: "$answers.v"
},
{
$group: {
_id: "$answers",
c: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.k",
type: { $first: "$_id.type" },
v: {
$push: {
k: { $toString: "$_id.v" },
v: "$c"
}
}
}
},
{
$group: {
_id: "$type",
v: {
$push: {
k: "$_id",
v: { $arrayToObject: "$v" }
}
}
}
},
{
$group: {
_id: null,
v: {
$push: {
k: "$_id",
v: { $arrayToObject: "$v" }
}
}
}
},
{
$set: { v: { $arrayToObject: "$v" } }
},
{
$replaceWith: "$v"
},
{
$set: {
avg: {
$arrayToObject: {
$map: {
input: { $objectToArray: "$avg" },
as: "s",
in: {
k: "$$s.k",
v: {
$avg: {
$map: {
input: { $objectToArray: "$$s.v" },
as: "x",
in: { $multiply: [ { $toInt: "$$x.k" }, "$$x.v" ] }
}
}
}
}
}
}
}
}
}
])
mongoplayground

Combine arrays in MongoDB $group aggregation

I am using Mongo daily bucketing pattern. Each daily document contains an array with value calculated for every hour for that day:
{
meter: 'meterId',
date: 'dailyBucket',
hourlyConsumption: [0,0,1,1,1,2,2,2,4,4,4,4,3,3,3...] // array with 24 values for every hour of a day
}
Now in one of my aggregation queries, I would like to group documents for the same day of multiple meters and get a result like this:
INPUT (consumption of multiple meters in a same day)
{
meter: 'MeterA',
date: '2021-05-01',
hourlyConsumption: [0,0,1,1,1,2,2,2,4,4,4,4,3,3,3...]
},
{
meter: 'MeterB',
date: '2021-05-01',
hourlyConsumption: [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10...]
}
RESULT (combined into single document)
{
date: '2021-05-01',
hourlyConsumption: [10,10,11,11,11,12,12,12,14,14,14,14,13,13,13...]
}
is there a way to achieve this without using $accumulator?

You can use $reduce
db.collection.aggregate([
{
$group: {
_id: "$date",
hourlyConsumption: { $push: "$hourlyConsumption" }
}
},
{
$set: {
hourlyConsumption: {
$reduce: {
input: "$hourlyConsumption",
initialValue: [],
in: { $map: { input: { $range: [ 0, 23 ] },
as: "h",
in: {
$sum: [
{ $arrayElemAt: [ "$$value", "$$h" ] },
{ $arrayElemAt: [ "$$this", "$$h" ] }
]
}
}
}
}
}
}
}
])
Mongo Playground
Or you use $unwind and $group:
db.collection.aggregate([
{
$unwind: {
path: "$hourlyConsumption",
includeArrayIndex: "hour"
}
},
{
$group: {
_id: {
date: "$date",
hour: "$hour"
},
hourlyConsumption: { $sum: "$hourlyConsumption" }
}
},
{ $sort: { "_id.hour": 1 } },
{
$group: {
_id: "$_id.date",
hourlyConsumption: { $push: "$hourlyConsumption" }
}
}
])
Mongo Playground
However, when you use $unwind, then you actually contradict your bucketing design pattern.

how to label the field in MongoDB and make a sum with respect to date

Need to format the date and make a sum per day but sometimes a or b values are not available.
in the end, get one document with respect to date and sum. I'm using MongoDB 4.2.
Data Structure:
{
"data": {
"11-10-2001": {
"a": 17.281150000000001,
"b": 11.864060000000006
},
"13-10-2020": {
"b": 2.7616699999999994
},
"12-10-2001": {
"b": 4.0809599999999997
},
"09-10-2001": {
"b": 4.1286300000000005
},
"17-10-2001": {
"a": 15.140560000000123,
"b": 5.017139999999998
},
"18-10-2001": {
"b": 1.975189999999997,
"a": 7.093789999999976
}
}
}
Expected Output one document that contains the day and sum:
{
{
day: 11-10-2001,
sum : 29.145
},
{
day: 13-10-201,
sum : 2.7616699
},
{
day: 12-10-2001,
sum : 4.0809599999999997
},
{
day: 17-10-2001,
sum : 20.114
},
{
day: 18-10-2001,
sum : 9.145
}
}

You can try,
$map to iterate loop of data object after converting to array using $objectToArray
add key day, and sum, $reduce to loop of number object after converting to array using $objectToArray, $add to sum the value of number
$unwind deconstruct data array
$replaceRoot to replace data object to root
db.collection.aggregate([
{
$addFields: {
data: {
$map: {
input: { $objectToArray: "$data" },
in: {
day: "$$this.k",
sum: {
$reduce: {
input: { $objectToArray: "$$this.v" },
initialValue: 0,
in: { $add: ["$$this.v", "$$value"] }
}
}
}
}
}
}
},
{ $unwind: "$data" },
{ $replaceRoot: { newRoot: "$data" } }
])
Playground

You can do like following
db.collection.aggregate([
{
$project: { data: { "$objectToArray": "$data" } }
},
{
$unwind: "$data"
},
{
"$replaceRoot": { "newRoot": "$data" }
},
{
$addFields: { v: { "$objectToArray": "$v" } }
},
{
$addFields: {
v: {
$reduce: {
input: "$v",
initialValue: 0,
in: {
$add: [ "$$this.v", "$$value" ]
}
}
}
}
},
{
$group: {
_id: null,
data: {
$push: {
day: "$k",
sum: "$v"
}
}
}
}
])
Working Mongo playground

MongoDB -How to get last 30 days date from given date and that last 30 days date should be current date?

I am the beginner of MongoDB
Here I mentioned below my one document
{
"_id" : ObjectId("5e5bc292361b710c7727718e"),
"branch_id" : "BR5cc825dac42dac3aae49ff91",
"inventory" : [
{
"inventory_stock_id" : "wewe123",
"stock_name" : "xxxxx",
"stock_point" : "27",
"stock_type" : "yyyy",
"batch" : [
{
"quantity" : 40,
"manf_date" : "10-01-2020",
"exp_date" : "01-04-2020"
}
]
}
]
}
I want to get last 30 days from "exp_date" but it should be equal to current date
Here I mentioned exp_date: "01-04-2020" and the past 30 days of date is today date( "02-03-2020")
db.collection.find({"inventory.batch.exp_date" : {"$lte":"01-04-2020","$eq":"02-03-2020"}})
I don't know how to get last 30 days of exp_date and equal to current date
so anyone help me to solve this issue.

Usually it is a bad approach to store/compare Date values as strings.
You can do it like this. First convert the strings to proper Date objects:
db.collection.updateMany(
{},
[
{
$set: {
inventory: {
$map: {
input: "$inventory",
as: "inventory",
in: {
$mergeObjects: [
"$$inventory",
{
batch: {
$map: {
input: "$$inventory.batch",
in: {
quantity: "$$this.quantity",
manf_date: { $dateFromString: { dateString: "$$this.manf_date", format: "%d-%m-%Y" } },
exp_date: { $dateFromString: { dateString: "$$this.exp_date", format: "%d-%m-%Y" } }
}
}
}
}
]
}
}
}
}
}
]
)
When you have to work with Date values, then I recommend the Moment.js library.
The query would be this one:
db.collection.find(
{
"inventory.batch": {
$elemMatch: {
exp_date: {
$eq: moment().utc().add(30, 'days').startOf('day').toDate()
}
}
}
}
)
or as aggregation:
db.collection.aggregate([
{
$match: {
"inventory.batch": {
$elemMatch: {
exp_date: {
$eq: moment().utc().add(30, 'days').startOf('day').toDate()
}
}
}
}
}
])
Note, by default $dateFromString uses UTC times, whereas moment() uses your local time by default. Thus you have to use either moment().utc() or you specify the timezone field at $dateFromString.
In case you insist to keep the string values as Date, you can also use
db.collection.find(
{
"inventory.batch": {
$elemMatch: {
exp_date: {
$eq: moment().add(30, 'days').startOf('day').format("DD-MM-YYYY")
}
}
}
}
)
However, this will fail if you query with $gte, $ge, $lt, $lte operators.
Update
If you have not access to moments then you can run in purely in the aggregation:
db.collection.aggregate([
{ $unwind: "$inventory" },
{ $set: { ts: { $dateToParts: { date: { $add: ["$$NOW", { $multiply: [1000, 60, 60, 24, 30] }] } } } } },
{
$set: {
ts: {
$dateFromParts: {
year: "$ts.year",
month: "$ts.month",
day: "$ts.day",
timezone: "UTC"
}
}
}
},
{ $set: { matches: { $in: ["$ts", "$inventory.batch.exp_date"] } } },
{
$group: {
_id: { _id: "$_id", branch_id: "$branch_id" },
inventory: { $push: "$$ROOT.inventory" },
matches: { $push: "$$ROOT.matches" }
}
},
{ $match: { $expr: { $anyElementTrue: "$matches" } } },
{ $replaceRoot: { newRoot: { $mergeObjects: ["$$ROOT", "$_id"] } } },
{$unset: "matches"}
])
Or, if you like to write all on a single aggregation:
db.collection.aggregate([
{ $unwind: "$inventory" },
{
$set: {
"inventory.batch": {
$map: {
input: "$inventory.batch",
in: {
quantity: "$$this.quantity",
manf_date: { $dateFromString: { dateString: "$$this.manf_date", format: "%d-%m-%Y" } },
exp_date: { $dateFromString: { dateString: "$$this.exp_date", format: "%d-%m-%Y" } }
}
}
}
}
},
{ $set: { ts: { $dateToParts: { date: { $add: ["$$NOW", { $multiply: [1000, 60, 60, 24, 30] }] } } } } },
{
$set: {
ts: {
$dateFromParts: {
year: "$ts.year",
month: "$ts.month",
day: "$ts.day",
timezone: "UTC"
}
}
}
},
{ $set: { matches: { $in: ["$ts", "$inventory.batch.exp_date"] } } },
{
$group: {
_id: { _id: "$_id", branch_id: "$branch_id" },
inventory: { $push: "$$ROOT.inventory" },
matches: { $push: "$$ROOT.matches" }
}
},
{ $match: { $expr: { $anyElementTrue: "$matches" } } },
{ $replaceRoot: { newRoot: { $mergeObjects: ["$$ROOT", "$_id"] } } },
{ $unset: "matches" }
])

MongoDB - aggregating with nested objects, and changeable keys

I have a document which describes counts of different things observed by a camera within a 15 minute period. It looks like this:
{
"_id" : ObjectId("5b1a709a83552d002516ac19"),
"start" : ISODate("2018-06-08T11:45:00.000Z"),
"end" : ISODate("2018-06-08T12:00:00.000Z"),
"recording" : ObjectId("5b1a654683552d002516ac16"),
"data" : {
"counts" : {
"5b434d05da1f0e00252566be" : 12,
"5b434d05da1f0e00252566cc" : 4,
"5b434d05da1f0e00252566ca" : 1
}
}
}
The keys inside the data.counts object change with each document and refer to additional data that is fetched at a later date. There are unlimited number of keys inside data.counts (but usually about 20)
I am trying to aggregate all these 15 minute documents up to daily aggregated documents.
I have this query at the moment to do that:
db.getCollection("segments").aggregate([
{$match:{
"recording": ObjectId("5bf7f68ad8293a00261dd83f")
}},
{$project:{
"start": 1,
"recording": 1,
"data": 1
}},
{$group:{
_id: { $dateToString: { format: "%Y-%m-%d", date: "$start" } },
"segments": { $push: "$$ROOT" }
}},
{$sort: {_id: -1}},
]);
This does the grouping and returns all the segments in an array.
I want to also aggregate the information inside data.counts, so that I get the sum of values for all keys that are the same within the daily group.
This would save me from having another service loop through each 15 minute segment summing values with the same keys. E.g. the query would return something like this:
{
"_id" : "2019-02-27",
"counts" : {
"5b434d05da1f0e00252566be" : 351,
"5b434d05da1f0e00252566cc" : 194,
"5b434d05da1f0e00252566ca" : 111
... any other keys that were found within a day
}
}
How might I amend the query I already have, or use a different query?
Thanks!

You could use the $facet pipeline stage to create two sub-pipelines; one for segments and another for counts. These sub-pipelines can be joined by using $zip to stitch them together and $map to merge each 2-element array produced from zip. Note this will only work correctly if the sub-pipelines output sorted arrays of the same size, which is why we group and sort by start_date in each sub-pipeline.
Here's the query:
db.getCollection("segments").aggregate([{
$match: {
recording: ObjectId("5b1a654683552d002516ac16")
}
}, {
$project: {
start: 1,
recording: 1,
data: 1,
start_date: { $dateToString: { format: "%Y-%m-%d", date: "$start" }}
}
}, {
$facet: {
segments_pipeline: [{
$group: {
_id: "$start_date",
segments: {
$push: {
start: "$start",
recording: "$recording",
data: "$data"
}
}
}
}, {
$sort: {
_id: -1
}
}],
counts_pipeline: [{
$project: {
start_date: "$start_date",
count: { $objectToArray: "$data.counts" }
}
}, {
$unwind: "$count"
}, {
$group: {
_id: {
start_date: "$start_date",
count_id: "$count.k"
},
count_sum: { $sum: "$count.v" }
}
}, {
$group: {
_id: "$_id.start_date",
counts: {
$push: {
$arrayToObject: [[{
k: "$_id.count_id",
v: "$count_sum"
}]]
}
}
}
}, {
$project: {
counts: { $mergeObjects: "$counts" }
}
}, {
$sort: {
_id: -1
}
}]
}
}, {
$project: {
result: {
$map: {
input: { $zip: { inputs: ["$segments_pipeline", "$counts_pipeline"] }},
in: { $mergeObjects: "$$this" }
}
}
}
}, {
$unwind: "$result"
}, {
$replaceRoot: {
newRoot: "$result"
}
}])
Try it out here: Mongoplayground.

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

Mongo groupby three fields, get unique count for one field - mongodb

Related

mongo / mongoose aggregation pipeline query for survey data

Combine arrays in MongoDB $group aggregation

how to label the field in MongoDB and make a sum with respect to date

MongoDB -How to get last 30 days date from given date and that last 30 days date should be current date?

MongoDB - aggregating with nested objects, and changeable keys

Categories

Resources