I have a structure like this coming from IOT sensor device
[{
"_id" : ObjectId("5e8e1af3f1563046e084cf65"),
"value" : 462.2382850719,
"start" : 1586293200001,
"year" : 2020,
"month" : "04",
"day" : "07",
"hour" : "21",
"channelId" : 3462
},
{
"_id" : ObjectId("5e8e1af3f1563046e084cf64"),
"value" : 1636.8770905333,
"start" : 1586289600001,
"year" : 2020,
"month" : "04",
"day" : "07",
"hour" : "19",
"channelId" : 3462
},
{
"_id" : ObjectId("5e8e1af3f1563046e084cf63"),
"value" : 1665.4116577475,
"start" : 1586286000001,
"year" : 2020,
"month" : "04",
"day" : "07",
"hour" : "20",
"channelId" : 3462
}]
I want to group this structure first by channelId, then by year,month,day and hour. I want to develop this sort of nested structure
{"channel_id":XXX,"aggregates":[2020:[04:[01:[00:AVG_VALUE,01:AVG_VALUE...],...],...]}
Similar like this
output = [
{
channelId: 3462,
"value": '',
aggregates: [
{
year: 2020,
months: [
{
month: 4,
value: '',
days: [
{
day: 7,
value: '',
hours: [
{ hour: 19, value: '' },
{ hour: 20, value: '' },
{ hour: 21, value: '' }
]
}
]
}
]
}
]
}
]
I can do one level $group by and then push the $$ROOT in it, but don't know how to go nested with groups. Do I have to use reduce logic or any.
Need help
Check if the solution meets your requirements:
db.collection.aggregate([
{
$group: {
_id: {
"channelId": "$channelId",
"year": "$year",
"month": "$month",
"day": "$day",
"hour": "$hour"
},
value: {
$avg: "$value"
}
}
},
{
$group: {
_id: {
"channelId": "$_id.channelId",
"year": "$_id.year",
"month": "$_id.month",
"day": "$_id.day"
},
value: {
$avg: "$value"
},
hours: {
$push: {
hour: "$_id.hour",
value: "$value"
}
}
}
},
{
$group: {
_id: {
"channelId": "$_id.channelId",
"year": "$_id.year",
"month": "$_id.month"
},
value: {
$avg: "$value"
},
days: {
$push: {
day: "$_id.day",
value: "$value",
hours: "$hours"
}
}
}
},
{
$group: {
_id: {
"channelId": "$_id.channelId",
"year": "$_id.year"
},
value: {
$avg: "$value"
},
months: {
$push: {
month: "$_id.month",
value: "$value",
days: "$days"
}
}
}
},
{
$group: {
_id: {
"channelId": "$_id.channelId"
},
channelId: {
$first: "$_id.channelId"
},
value: {
$avg: "$value"
},
aggregates: {
$push: {
year: "$_id.year",
value: "$value",
months: "$months"
}
}
}
},
{
$project: {
_id: 0
}
}
])
MongoPlayground
Related
I have a timeseries data in mongodb and I want to calculate the sum per day between two given dates of every sensor after I have calculated the difference between the max and min reading of the day by the sensor, using the below query
db.ts_events.aggregate([
{ $match: {
"metadata.assetCode": { $in: [
"h"
]
},
"timestamp": { $gte: ISODate("2022-07-01T02:39:02.000+0000"), $lte: ISODate("2022-07-01T06:30:00.000+0000")
}
}
},
{
$project: {
date: {
$dateToParts: { date: "$timestamp"
}
},
activeEnergy: 1,
"metadata.meterId": 1,
}
},
{
$group: {
_id: {
date: {
year: "$date.year",
month: "$date.month",
day: "$date.day"
},
meter: "$metadata.meterId",
},
maxValue: { $max: "$activeEnergy"
},
minValue: { $min: "$activeEnergy"
},
}
},
{
$addFields: {
differnce: { $subtract: [
"$maxValue",
"$minValue"
]
},
}
},
])
I get the following output
{
"_id" : {
"date" : {
"year" : NumberInt(2022),
"month" : NumberInt(7),
"day" : NumberInt(1)
},
"meter" : "B"
},
"maxValue" : 1979.78,
"minValue" : 1979.77,
"differnce" : 0.009999999999990905
}
{
"_id" : {
"date" : {
"year" : NumberInt(2022),
"month" : NumberInt(7),
"day" : NumberInt(1)
},
"meter" : "A"
},
"maxValue" : 7108.01,
"minValue" : 7098.18,
"differnce" : 9.829999999999927
}
I want to calculate the sum of both meter difference how can I do that?
Apart from this one more problem I am facing which I am putting forward in this edited version, as you can see date is in ISODate format but I will be getting a unix epoch format,
I tried to tweak the query but it is not working
db.ts_events.aggregate([
{
$project: {
date: {
$dateToParts: {
date: "$timestamp"
}
},
activeEnergy: 1,
"metadata.meterId": 1,
"metadata.assetCode": 1,
"timestamp": 1,
startDate: {
$toDate: 1656686342000
},
endDate: {
$toDate: 1656700200000
}
}
},
{
$match: {
"metadata.assetCode": {
$in: [
"h"
]
},
"timestamp": {
$gte: "$startDate", $lte: "$endDate"
}
}
},
{
$group: {
_id: {
date: {
year: "$date.year",
month: "$date.month",
day: "$date.day"
},
meter: "$metadata.meterId",
},
maxValue: {
$max: "$activeEnergy"
},
minValue: {
$min: "$activeEnergy"
},
}
},
{
$addFields: {
differnce: {
$subtract: [
"$maxValue",
"$minValue"
]
},
}
},
{
$group: {
_id: "$_id.date", res: {
$push: '$$ROOT'
}, differnceSum: {
$sum: '$differnce'
}
}
}
])
Can you help me solve the problem?
One option is to add one more step like this (depending on your expected output format):
This step will group together your separate documents, into one document, which will allow you to sum their values together. Be careful when grouping, since now it is a one big document and a document has a size limit.
We use $$ROOT to keep the original document structure (here inside a new array)
{$group: {_id: 0, res: {$push: '$$ROOT'}, differnceSum: {$sum: $differnce'}}}
Below an extract of my data
{ _id: ObjectId("1"), date: 2021-07-01T08:00.000+00:00, price: 10, id: 1}
{ _id: ObjectId("2"), date: 2021-07-01T08:20.000+00:00, price: 10.5, id: 1}
{ _id: ObjectId("3"), date: 2021-07-01T08:40.000+00:00, price: 9.8, id: 1}
{ _id: ObjectId("4"), date: 2021-07-01T09:00.000+00:00, price: 10.2, id: 1}
{ _id: ObjectId("5"), date: 2021-07-01T09:20.000+00:00, price: 10.9, id: 1}
{ _id: ObjectId("6"), date: 2021-07-01T09:40.000+00:00, price: 13, id: 1}
{ _id: ObjectId("7"), date: 2021-07-02T07:00.000+00:00, price: 10, id: 2}
{ _id: ObjectId("8"), date: 2021-07-02T07:20.000+00:00, price: 10.2, id: 2}
{ _id: ObjectId("9"), date: 2021-07-02T07:40.000+00:00, price: 8, id: 2}
{ _id: ObjectId("10"), date: 2021-07-02T08:00.000+00:00, price: 10.7, id: 2}
{ _id: ObjectId("11"), date: 2021-07-02T08:20.000+00:00, price: 10, id: 2}
{ _id: ObjectId("12"), date: 2021-07-02T08:40.000+00:00, price: 11, id: 2}
{ _id: ObjectId("13"), date: 2021-07-03T08:00.000+00:00, price: 11.5, id: 2}
{ _id: ObjectId("14"), date: 2021-07-03T08:20.000+00:00, price: 10.8, id: 2}
Is there a way to SELECT for each IDs the first hours within a day on each day ?
The result based on the extract would be
{ _id: ObjectId("1"), date: 2021-07-01T08:00.000+00:00, price: 10, id: 1}
{ _id: ObjectId("2"), date: 2021-07-01T08:20.000+00:00, price: 10.5, id: 1}
{ _id: ObjectId("3"), date: 2021-07-01T08:40.000+00:00, price: 9.8, id: 1}
{ _id: ObjectId("7"), date: 2021-07-02T07:00.000+00:00, price: 10, id: 2}
{ _id: ObjectId("8"), date: 2021-07-02T07:20.000+00:00, price: 10.2, id: 2}
{ _id: ObjectId("9"), date: 2021-07-02T07:40.000+00:00, price: 8, id: 2}
{ _id: ObjectId("13"), date: 2021-07-03T08:00.000+00:00, price: 11.5, id: 2}
{ _id: ObjectId("14"), date: 2021-07-03T08:20.000+00:00, price: 10.8, id: 2}
Query does
makes string dates to date objects
group by year and day
collect all documents in day-info
finds the minimun hour, for each day
filter all the day-info, and keep only those that happened in the minimun hour
restore document structure
Test code here
Query (if you have dates, and not strings, skip the first $set stage)
aggregate(
[ {
"$set" : {
"date" : {
"$dateFromString" : {
"dateString" : "$date"
}
}
}
}, {
"$group" : {
"_id" : {
"year" : {
"$year" : "$date"
},
"day" : {
"$dayOfYear" : "$date"
}
},
"day-info" : {
"$push" : "$$ROOT"
},
"min-hour" : {
"$min" : {
"$hour" : "$date"
}
}
}
}, {
"$project" : {
"day-info" : {
"$filter" : {
"input" : "$day-info",
"as" : "d",
"cond" : {
"$eq" : [ {
"$hour" : "$$d.date"
}, "$min-hour" ]
}
}
}
}
}, {
"$unwind" : {
"path" : "$day-info"
}
}, {
"$replaceRoot" : {
"newRoot" : "$day-info"
}
} ]
)
Edit (to keep the dates that their difference is < 1 hour from the minimun date of each day you can do this)
Test code here
Query
db.collection.aggregate([
{
"$set": {
"date": {
"$dateFromString": {
"dateString": "$date"
}
}
}
},
{
"$group": {
"_id": {
"year": {
"$year": "$date"
},
"day": {
"$dayOfYear": "$date"
}
},
"day-info": {
"$push": "$$ROOT"
},
"min-date": {
"$min": "$date"
}
}
},
{
"$set": {
"day-info": {
"$filter": {
"input": "$day-info",
"as": "d",
"cond": {
"$lt": [
{
"$subtract": [
"$$d.date",
"$min-date"
]
},
{
"$multiply": [
60,
60,
1000
]
}
]
}
}
}
}
},
{
"$unwind": {
"path": "$day-info"
}
},
{
"$replaceRoot": {
"newRoot": "$day-info"
}
},
{
"$sort": {
"date": 1
}
}
])
Mongo 4.2
Have a bunch of data for server instance and CPU usage.
{
"_id" : "1",
"instance" : "172.00.00.01",
"client" : "A",
"date" : ISODate("2020-12-06T22:47:00.163Z"),
"app" : 0.0133317
}
{
"_id" : "2",
"instance" : "172.00.00.01",
"client" : "A",
"date" : ISODate("2020-12-06T22:47:03.163Z"),
"app" : 0.0400000
}
{
"_id" : "3",
"instance" : "172.00.00.02",
"client" : "B",
"date" : ISODate("2020-12-06T22:47:00.163Z"),
"app" : 0.0800000
}
I am wanting this as my result, data grouped per minute and then fields for each instance with MAX "app" value.
{
"_id" : {
"minute" : 47,
"hour" : 22,
"day" : 6,
"month" : 12,
"year" : 2020
},
"172.00.00.01": 0.0400000,
"172.00.00.02": 0.0800000
}
So my aggregate needs to group to get minutes:
{
"$group": {
"_id": {
"minute": {
"$minute": {
"date": "$date",
"timezone": "America/Chicago"
}
},
"hour": {
"$hour": {
"date": "$date",
"timezone": "America/Chicago"
}
},
"day": {
"$dayOfMonth": {
"date": "$date",
"timezone": "America/Chicago"
}
},
"month": {
"$month": {
"date": "$date",
"timezone": "America/Chicago"
}
},
"year": {
"$year": {
"date": "$date",
"timezone": "America/Chicago"
}
},
instance: '$instance',
client: '$client'
},
app: {
$max: '$app'
}
}
}
That pipeline gives me this as result:
{
"_id" : {
"minute" : 47,
"hour" : 22,
"day" : 6,
"month" : 12,
"year" : 2020,
"instance" : "172.00.00.01",
"client" : "A"
},
"app" : 0.040000
},
{
"_id" : {
"minute" : 47,
"hour" : 22,
"day" : 6,
"month" : 12,
"year" : 2020,
"instance" : "172.00.00.02",
"client" : "B"
},
"app" : 0.080000
}
So to get to the desired output, I understand I need to group again without instance/client to combine the two, but how do I dynamically get the instance variable as field name? These do not work!
{
"$addFields": {
"[$instance]": "$app"
}
},
{
"$addFields": {
"[$$instance]": "$app"
}
},
{
"$addFields": {
"$instance": "$app"
}
},
{
"$addFields": {
"$$instance": "$app"
}
},
With your code, additionally
{
$group: {
_id: {
day: "$_id.day",
hour: "$_id.hour",
minute: "$_id.minute",
month: "$_id.month",
"year": "$_id.year"
},
data: {
$push: { k: "$_id.instance", v: "$app" }
}
}
},
{
$addFields: {
data: { "$arrayToObject": "$data" }
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [ "$data", "$$ROOT" ]
}
}
},
{
$project: {
data: 0
}
}
Working Mongo playground
I currently have an aggregation pipeline:
db.getCollection('forms').aggregate([
{ $unwind: //unwind },
{
$match: {
//some matches
}
},
{
$project: {
//some projections
}
},
{
//Finally, im grouping the results
$group: {
_id: {
year: { $year: '$createdAt' },
month: { $month: '$createdAt' },
raceEthnicity: '$demographic.raceEthnicity'
},
count: { $sum: 1 },
}
]
My current results are similar to:
[{
"_id" : {
"year" : 2020,
"month" : 11,
"raceEthnicity" : "Asian"
},
"count" : 1.0
},
{
"_id" : {
"year" : 2020,
"month" : 11,
"raceEthnicity" : "Multiracial"
},
"count" : 3.0
},
{
"_id" : {
"year" : 2020,
"month" : 11,
"raceEthnicity" : "White"
},
"count" : 3.0
},
{
"_id" : {
"year" : 2020,
"month" : 10,
"raceEthnicity" : "White"
},
"count" : 33.0
}]
Is there a way to add a new stage on the pipeline to "merge" results of the same year/month into a single object?
I want to achieve something like:
{
"_id" : {
"year" : 2020,
"month" : 11,
},
"Asian" : 1.0,
"Multiracial": 3.0,
"White": 1.0
},
{
"_id" : {
"year" : 2020,
"month" : 10,
},
"White": 33
}
Is it possible? How can I do that?
Add this one to your aggregation pipeline.
db.collection.aggregate([
{ $set: { "data": { k: "$_id.raceEthnicity", v: "$count" } } },
{ $group: { _id: { year: "$_id.year", month: "$_id.month" }, data: { $push: "$data" } } },
{ $set: { "data": { $arrayToObject: "$data" } } },
{ $replaceRoot: { newRoot: { $mergeObjects: ["$$ROOT", "$data"] } } },
{ $unset: "data" }
])
Unlike the solution from #wak786 you don't need to know all ethnicity at design time. It works for arbitrary ethnicity.
Add these stages to your pipeline.
db.collection.aggregate([
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$$ROOT",
{
$arrayToObject: [
[
{
k: "$_id.raceEthnicity",
v: "$count"
}
]
]
}
]
}
}
},
{
"$group": {
"_id": {
year: "$_id.year",
month: "$_id.month",
},
"Asian": {
"$sum": "$Asian"
},
"Multiracial": {
"$sum": "$Multiracial"
},
"White": {
"$sum": "$White"
}
}
}
])
Below is the mongo playground link. I have taken the current result of your pipeline as input to my query.
Try it here
I want to get data to each month. in my table data is stored like this:-
"patient" : [
{
"status" : 'arrived',
start_time: '2017-08-17T09:17:00.000Z
},
{
"status" : 'arraived',
start_time: '2017-08-16T07:17:00.000Z
},
{
"status" : 'arrived',
start_time: '2017-07-12T09:17:00.000Z
},
{
"status" : 'arraived',
start_time: '2017-07-05T08:10:00.000Z
},
{
"status" : 'arrived',
start_time: '2017-06-02T09:17:00.000Z
},
{
"status" : 'arraived',
start_time: '2017-05-05T08:16:00.000Z
}
]
etc,
and I want to sum of patient of each month (jan to des), like this :-
{
"month" : 8,
"count" : 2
}and like this month 1 to 12
I assume, patient array is associated with a customer and the date is stored in mongo ISO format.
So, the actual document would look like :
{
name: "stackOverflow",
"patient" : [
{
"status" : 'arrived',
"start_time": ISODate("2017-08-17T09:17:00.000Z")
},
{
"status" : 'arraived',
"start_time": ISODate("2017-08-16T07:17:00.000Z")
},
{
"status" : 'arrived',
"start_time": ISODate("2017-07-12T09:17:00.000Z")
},
{
"status" : 'arraived',
"start_time": ISODate("2017-07-05T08:10:00.000Z")
},
{
"status" : 'arrived',
"start_time": ISODate("2017-06-02T09:17:00.000Z")
},
{
"status" : 'arraived',
"start_time": ISODate("2017-05-05T08:16:00.000Z")
}
]
}
here is a sample query which you can try -
db.test.aggregate([
{$unwind: "$patient"},
{ $group: {
_id: {name: "$name", month: {$month: "$patient.start_time"}},
count: { $sum: 1}
}},
{$group: {
_id: "$_id.name",
patient: {$push: {month: "$_id.month", count: "$count"}}
}}
])
Sample output:
{
"_id" : "stackOverflow",
"patient" : [
{
"month" : 5,
"count" : 1
},
{
"month" : 6,
"count" : 1
},
{
"month" : 7,
"count" : 2
},
{
"month" : 8,
"count" : 2
}
]
}
You can change query according to your use-case. hope this will help you!
This is my code:-
db.appointments.aggregate( [
{
$project:
{
"patient_id": 1,
"start_time": 1,
"status": 1
}
},
{
$match: {
'start_time' : { $gte: startdate.toISOString() },
'status': { $eq: 'arrived' }
} ,
},
{ $group: {
_id: {id: "$_id", start_time: {$month: "$appointments.start_time"}},
count: { $sum: 1}
}}
])
When I used this :-
{ $group: {
_id: {id: "$_id", start_time: {$month: "$start_time"}},
count: { $sum: 1}
}
}
its showing error message:-
{"name":"MongoError","message":"can't convert from BSON type missing to Date","ok":0,"errmsg":"can't convert from BSON type missing to Date","code":16006,"codeName":"Location16006"}
And when I comment this its showing this :-
Out Put here:-
:[{"count":{"_id":"595b6f95ab43ec1f6c92b898","patient_id":"595649904dbe9525c0e036ef","start_time":"2017-07-04T10:35:00.000Z","status":"arrived"}},
{"count":{"_id":"595dff870960d425d4f14633","patient_id":"5956478b4dbe9525c0e036ea","start_time":"2017-03-08T09:14:00.000Z","status":"arrived"}},{"count":{"_id":"595dffaa0960d425d4f14634","patient_id":"595649904dbe9525c0e036ef","start_time":"2017-03-17T09:15:00.000Z","status":"arrived"}},{"count":{"_id":"595dffcf0960d425d4f14635","patient_id":"595648394dbe9525c0e036ec","start_time":"2017-06-08T09:15:00.000Z","status":"arrived"}},{"count":{"_id":"595dfffb0960d425d4f14636","patient_id":"5956478b4dbe9525c0e036ea","start_time":"2017-06-20T09:16:00.000Z","status":"arrived"}},{"count":{"_id":"595e00160960d425d4f14637","patient_id":"5959ea7f80388b19e0b57817","start_time":"2017-08-17T09:17:00.000Z","status":"arrived"}}]}
const group = {
$group: {
_id: { month: { $month: "$createdAt" } },
count: { $sum: 1 },
},
};
const groups = {
$group: {
_id: null,
patient: { $push: { month: '$_id.month', count: '$count' } },
},
};
return db.Patient.aggregate([group, groups]);