MongoDB - aggregating with nested objects, and changeable keys - mongodb

I have a document which describes counts of different things observed by a camera within a 15 minute period. It looks like this:
{
"_id" : ObjectId("5b1a709a83552d002516ac19"),
"start" : ISODate("2018-06-08T11:45:00.000Z"),
"end" : ISODate("2018-06-08T12:00:00.000Z"),
"recording" : ObjectId("5b1a654683552d002516ac16"),
"data" : {
"counts" : {
"5b434d05da1f0e00252566be" : 12,
"5b434d05da1f0e00252566cc" : 4,
"5b434d05da1f0e00252566ca" : 1
}
}
}
The keys inside the data.counts object change with each document and refer to additional data that is fetched at a later date. There are unlimited number of keys inside data.counts (but usually about 20)
I am trying to aggregate all these 15 minute documents up to daily aggregated documents.
I have this query at the moment to do that:
db.getCollection("segments").aggregate([
{$match:{
"recording": ObjectId("5bf7f68ad8293a00261dd83f")
}},
{$project:{
"start": 1,
"recording": 1,
"data": 1
}},
{$group:{
_id: { $dateToString: { format: "%Y-%m-%d", date: "$start" } },
"segments": { $push: "$$ROOT" }
}},
{$sort: {_id: -1}},
]);
This does the grouping and returns all the segments in an array.
I want to also aggregate the information inside data.counts, so that I get the sum of values for all keys that are the same within the daily group.
This would save me from having another service loop through each 15 minute segment summing values with the same keys. E.g. the query would return something like this:
{
"_id" : "2019-02-27",
"counts" : {
"5b434d05da1f0e00252566be" : 351,
"5b434d05da1f0e00252566cc" : 194,
"5b434d05da1f0e00252566ca" : 111
... any other keys that were found within a day
}
}
How might I amend the query I already have, or use a different query?
Thanks!

You could use the $facet pipeline stage to create two sub-pipelines; one for segments and another for counts. These sub-pipelines can be joined by using $zip to stitch them together and $map to merge each 2-element array produced from zip. Note this will only work correctly if the sub-pipelines output sorted arrays of the same size, which is why we group and sort by start_date in each sub-pipeline.
Here's the query:
db.getCollection("segments").aggregate([{
$match: {
recording: ObjectId("5b1a654683552d002516ac16")
}
}, {
$project: {
start: 1,
recording: 1,
data: 1,
start_date: { $dateToString: { format: "%Y-%m-%d", date: "$start" }}
}
}, {
$facet: {
segments_pipeline: [{
$group: {
_id: "$start_date",
segments: {
$push: {
start: "$start",
recording: "$recording",
data: "$data"
}
}
}
}, {
$sort: {
_id: -1
}
}],
counts_pipeline: [{
$project: {
start_date: "$start_date",
count: { $objectToArray: "$data.counts" }
}
}, {
$unwind: "$count"
}, {
$group: {
_id: {
start_date: "$start_date",
count_id: "$count.k"
},
count_sum: { $sum: "$count.v" }
}
}, {
$group: {
_id: "$_id.start_date",
counts: {
$push: {
$arrayToObject: [[{
k: "$_id.count_id",
v: "$count_sum"
}]]
}
}
}
}, {
$project: {
counts: { $mergeObjects: "$counts" }
}
}, {
$sort: {
_id: -1
}
}]
}
}, {
$project: {
result: {
$map: {
input: { $zip: { inputs: ["$segments_pipeline", "$counts_pipeline"] }},
in: { $mergeObjects: "$$this" }
}
}
}
}, {
$unwind: "$result"
}, {
$replaceRoot: {
newRoot: "$result"
}
}])
Try it out here: Mongoplayground.

Related

Flatten group of multiple fields in mongo?

I want to get the sum of amounts grouped by address and date:
db.coolCollection.aggregate([
{ $group:
{ _id :
{ address:"$address",
date: { $dateFromString: { dateString: "$block.time"}}}},
sum : { $sum:{ "$amount" }}} ])
Great, except the results look like this:
{
_id: {
address: "abc123",
date: 2021-03-22T00:00:00.000+00:00
},
sum: 48645
}
I want this:
{
address: "abc123",
date: 2021-03-22T00:00:00.000+00:00,
sum: 48645
}
Usually you'd just add a $project stage to restructure, here is how to do is using $replaceRoot under the assumption the _id can contain many fields you don't want to manually convert:
db.collection.aggregate([
{
$replaceRoot: {
newRoot: {
$mergeObjects: [
"$_id",
"$$ROOT"
]
}
}
},
{
$project: {
_id: 0
}
}
])
Mongo Playground

Combine arrays in MongoDB $group aggregation

I am using Mongo daily bucketing pattern. Each daily document contains an array with value calculated for every hour for that day:
{
meter: 'meterId',
date: 'dailyBucket',
hourlyConsumption: [0,0,1,1,1,2,2,2,4,4,4,4,3,3,3...] // array with 24 values for every hour of a day
}
Now in one of my aggregation queries, I would like to group documents for the same day of multiple meters and get a result like this:
INPUT (consumption of multiple meters in a same day)
{
meter: 'MeterA',
date: '2021-05-01',
hourlyConsumption: [0,0,1,1,1,2,2,2,4,4,4,4,3,3,3...]
},
{
meter: 'MeterB',
date: '2021-05-01',
hourlyConsumption: [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10...]
}
RESULT (combined into single document)
{
date: '2021-05-01',
hourlyConsumption: [10,10,11,11,11,12,12,12,14,14,14,14,13,13,13...]
}
is there a way to achieve this without using $accumulator?
You can use $reduce
db.collection.aggregate([
{
$group: {
_id: "$date",
hourlyConsumption: { $push: "$hourlyConsumption" }
}
},
{
$set: {
hourlyConsumption: {
$reduce: {
input: "$hourlyConsumption",
initialValue: [],
in: { $map: { input: { $range: [ 0, 23 ] },
as: "h",
in: {
$sum: [
{ $arrayElemAt: [ "$$value", "$$h" ] },
{ $arrayElemAt: [ "$$this", "$$h" ] }
]
}
}
}
}
}
}
}
])
Mongo Playground
Or you use $unwind and $group:
db.collection.aggregate([
{
$unwind: {
path: "$hourlyConsumption",
includeArrayIndex: "hour"
}
},
{
$group: {
_id: {
date: "$date",
hour: "$hour"
},
hourlyConsumption: { $sum: "$hourlyConsumption" }
}
},
{ $sort: { "_id.hour": 1 } },
{
$group: {
_id: "$_id.date",
hourlyConsumption: { $push: "$hourlyConsumption" }
}
}
])
Mongo Playground
However, when you use $unwind, then you actually contradict your bucketing design pattern.

How to group mongodb aggregate array of objects data To sum numbers on the same date

I'm trying to create charts but I can't combine the data to be like "Expected result below"
What can I use to return "Expected result".
I tried using $group in $group and $reduce and it didn't work well.
I hope someone can help me solve this task
Current result is
[
{
"_id":"5fd4c3586e83b334d97c5218",
"consumption":5,
"charts":[
{
"date":"2020-10",
"consumption":1
},
{
"date":"2020-10",
"consumption":1
},
{
"date":"2020-11",
"consumption":1
},
{
"date":"2020-11",
"consumption":1
}
]
}
]
Expected result is
[
{
"_id":"5fd4c3586e83b334d97c5218",
"consumption":5,
"charts":[
{
"date":"2020-10",
"consumption":2
},
{
"date":"2020-11",
"consumption":2
},
}
]
You need to go with aggregations
db.collection.aggregate([
{ $unwind: "$charts" },
{
$group: {
_id: { _id: "$_id", month: "$charts.date.month", year: "$charts.date.year" },
consumption: { $first: "$consumption" },
total: { $sum: "$charts.consumption" },
date: { $first: "$charts.date" }
}
},
{
$group: {
_id: "$_id._id",
consumption: { $first: "$consumption" },
charts: {
$push: {
date: "$date",
consumption: "$total"
}
}
}
}
])
Working Mongo playground

Aggregate by all days of month mongodb

Hey i need to get the sum of all totalPrice group by days
I get this result
but i need to fetch all rest days of month even if it returns 0
i need solution
this is my code
Order.aggregate([
{ $project: { yearMonthDay: { $dateToString: { format: "%Y-%m-%d", date: '$created' }}, totalPrice:"$totalPrice" }},
{ $group: { _id: "$yearMonthDay", count: { $sum: 1 }, total: {"$sum": "$totalPrice"} }},
{ $sort: { _id: -1 } },
{ $group: { _id: null, stats: { $push: "$$ROOT" }}},
{
$project: {
results: {
$map: {
input:{ $range:[16,31] },
as: 'day',
in: {
$let: {
vars: {
dateIndex: {
"$indexOfArray": ["$stats._id", {$dateToString:{ date:{$dateFromParts:{'year':2020, 'month':5, 'day':"$$day"}}, format:'%Y-%m-%d'}}]
}
},
in: {
$cond: {
if: { $ne: ["$$dateIndex", -1] },
then: { $arrayElemAt: ["$stats", "$$dateIndex"] },
else: { _id: {$dateToString:{ date:{$dateFromParts:{'year':2020, 'month':5, 'day':"$$day"}}, format:'%Y-%m-%d'}, count: 0, total: 0 } }
}
}
}
}
}
}
}
},
{ $unwind: "$results" },
{ $replaceRoot: { newRoot: "$results"}}
]
This query should work for you.
db.collectionName.aggregate([
{ $project: { yearMonthDay: { $dateToString: { format: "%Y-%m-%d", date: '$created' }}, totalPrice:"$totalPrice" }},
{ $group: { _id: "$yearMonthDay", count: { $sum: 1 }, total: {"$sum": "$totalPrice"} }},
{ $sort: { _id: -1 } },
{ $group: { _id: null, stats: { $push: "$$ROOT" }},
{
$project: {
results: {
$map: {
input: ["2020-05-16","2020-05-15","2020-05-14","2020-05-13","2020-05-12"],
as: "date",
in: {
$let: {
vars: {
dateIndex: {
"$indexOfArray": ["$stats._id", "$$date"]
}
},
in: {
$cond: {
if: { $ne: ["$$dateIndex", -1] },
then: { $arrayElemAt: ["$stats", "$$dateIndex"] },
else: { _id: "$$date", count: 0, total: 0 }
}
}
}
}
}
}
}
},
{ $unwind: "$results" },
{ $replaceRoot: { newRoot: "$results"}}
])
The First 3 steps is same as yours.
{ $group: { _id: null, stats: { $push: "$$ROOT" }} will push previous stage results into an arrray stats which we will use for lookup in later stage.
In last stage, we will create possible date range and iterate over that.
for each key in range.
"$indexOfArray": ["$stats._id", "$$date"] will check if date is present in stats array or not
Then we will use that index to fetch value from stats array otherwise push default values.
As these results are still under results, we will unwind that array and move to root.
If you server version is above 3.6,
we can simplify date range creation part as well. let's initialize input arrays as days using $range.
input:{ $range:[16,31] },
as: 'day'
and modifiy dateIndex part like this
dateIndex: {
"$indexOfArray": ["$stats._id", {$dateToString:{ date:{$dateFromParts:{'year':2020, 'month':5, 'day':"$$day"}}, format:'%Y-%m-%d'}]
}
And change default value part as well similarly.
else: { _id: {$dateToString:{ date:{$dateFromParts:{'year':2020, 'month':5, 'day':"$$day"}}, format:'%Y-%m-%d'}}, count: 0, total: 0 }
Or alternatively, we can also use concat for generating keys
dateIndex: {
"$indexOfArray": ["$stats._id", {$concat:["2020-05","-", {$convert:{input:"$$day", to:"string"}}]}]
}
// And default value
else: { _id: {$concat:["2020-05","-", {$convert:{input:"$$day", to:"string"}}]}, count: 0, total: 0 }
Similarly, you can run another loop for months as well.

Mongo groupby three fields, get unique count for one field

I have records stored in my collection as follows:
{
"sessionId" : "f960e3db-838c-42aa-95ce-a807096f7036",
"date" : "12-02-2020",
"hour" : "13",
"month" : "02",
"time" : "13:46:50",
"weekDay" : "Wednesday",
}
I want to group the above records by 'date', 'hour', getting the number of unique'sessionId' per hour. Something like below:
{
"12-02-2020": {
00: 23,//hour:unique number of sessions in that hour
01: 3,
04: 33,
05: 0,
10: 1
},
"13-02-2020": {
00: 2,//hour:unique number of sessions in that hour
03: 33,
09: 23,
05: 6,
10: 1
}
}
Can anyone please formulate the query for the above?
It is often a challenge when you desire dynamic field names and arrays, I found this solution:
db.collection.aggregate([
// group by hour
{
$group: {
_id: { date: "$date", hour: "$hour" },
sessions: { $addToSet: "$sessionId" }
}
},
// count the sessions
{ $set: { sessions: { $size: "$sessions" } } },
// group by day
{
$group: {
_id: "$_id.date",
hour: { $push: "$_id.hour" },
sessions: { $push: "$sessions" }
}
},
// transform result
{ $set: { data: { $range: [0, { $size: "$hour" }] } } },
{
$set: {
data: {
$map: {
input: "$data",
as: "idx",
in: {
k: { $arrayElemAt: ["$hour", "$$idx"] },
v: { $arrayElemAt: ["$sessions", "$$idx"] }
}
}
}
}
},
// transform day and hour values
{ $set: { v: { $arrayToObject: "$data" } } },
{ $project: { data: { k: "$_id", v: "$v" } } },
{ $set: { data: { $arrayToObject: "$data" } } },
{ $replaceRoot: { newRoot: "$data" } }
])
Mongo playground
You can try as below :
db.collection.aggregate([
/** group based on session & date & hour to get unique docs based on session */
{ $group: { _id: { session: "$sessionId", date: "$date", hour: "$hour" } } },
/** group on date & hour & count no.of docs */
{
$group: {
_id: { date: "$_id.date", hour: "$_id.hour" },
count: { $sum: 1 }
}
},
/** transform into entire data of each doc into data field with converting ['k':k..., 'v':v...] into {k,v} */
{
$project: {
data: {
$arrayToObject: [
[
{
k: "$_id.date",
v: { $arrayToObject: [[{ k: "$_id.hour", v: "$count" }]] }
}
]
]
}
}
},
/** replace root of each doc with new root as data */
{
$replaceRoot: {
newRoot: "$data"
}
}
]);
Test : MongoDB-Playground