Get Highest Value By Hour in MongoDB for given Date Range? - mongodb

I have the following document structure in MongoDB :
{
"_id" : ObjectId("5c1b7451b1829b69963029ea"),
"duration" : 92,
"accountId" : ObjectId("9aafe7b01cf4560c9bb5d68"),
"createdAt" : ISODate("2018-12-20T10:52:01.560Z"),
"__v" : 0,
},
{
"_id" : ObjectId("5c1b7451b1829b69963029ea"),
"duration" : 192,
"accountId" : ObjectId("9aafe7b01cf4560c9bb5d68"),
"createdAt" : ISODate("2018-12-20T11:52:01.560Z"),
"__v" : 0,
}
Now I want to get the highest sum of duration for the day with corresponding hour. Technically, something like this :
{
"readableDate" : "2018-12-20",
"hour" : 11,
"total" : 192
}
Where total is the hourly total which is HIGHEST for that particular day.
The query which I have tried is as follows :
db.getCollection('operational_details').aggregate(
{"$match": {"accountId": ObjectId("9aafe7b01cf4560c9bb5d68"),
"createdAt": {"$gte": ISODate("2019-06-01T10:30:29.725Z"),
"$lte": ISODate("2019-06-04T10:30:29.725Z")},
}},
{ "$project": {
"date": {"$dateToString": {"format": "%Y-%m-%d", "date": "$createdAt"}},
"hour": {"$hour":"$createdAt"},
"total":{"$sum": "$duration"} }
},
{ "$group":{
"_id": { "hour":"$hour","date":"$date"},
"max": {"$max": "$total"}
}})
Hope I am clear with my example. TIA

Please try the below
working playground link
//Array
[
{
"_id": ObjectId("5cf4f20243f560e1e0a77014"),
"duration": 92,
"accountId": ObjectId("6ef4f20243f560e1e0a77015"),
"createdAt": ISODate("2018-12-20T10:52:10.320Z")
},
{
"_id": ObjectId("5cf4f21843f560e1e0a7701a"),
"duration": 192,
"accountId": ObjectId("6ef4f20243f560e1e0a77015"),
"createdAt": ISODate("2018-12-20T11:52:11.123Z")
}
]
//Script
db.collection.aggregate([
{
"$match": {
"accountId": {
$eq: ObjectId("6ef4f20243f560e1e0a77015")
},
"createdAt": {
$gte: ISODate("2018-12-19T10:30:00.000Z"),
$lte: ISODate("2018-12-21T10:30:00.000Z")
}
}
},
{
"$project": {
"date": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$createdAt"
}
},
"hour": {
"$hour": "$createdAt"
},
"duration": "$duration"
}
},
{
"$group": {
"_id": {
"hour": "$hour",
"date": "$date"
},
"total": {
"$sum": "$duration"
}
}
},
{
"$project": {
"readableDate": "$_id.date",
"hour": "$_id.hour",
"total": "$total"
}
},
{
"$project": {
"_id": 0
}
},
{
"$sort": {
"total": -1
}
},
{
"$limit": 1
}
])
//Result
[
{
"hour": 11,
"readableDate": "2018-12-20",
"total": 192
}
]

Related

Sort multiple levels of array after group in Mongo Java

I have documents with below schema
id :
currencyCode : "USD"
businessDayStartDate : ""
hourZoneNumber : 1
customerCount : 0
itemQuantity : 4
nodeId : "STORE_DEV"
endpointId : "998"
amount : 4
I am trying to find documents that match nodeId and trying to aggregate customerCount, itemQuantity and amount for each hourZoneNumber.
Below is the query
db.getCollection("xxx").aggregate([
{ "$match": { "nodeId": { "$in":["STORE_DEV_1","STORE_DEV_2"] }, "businessDayStartDate" : { "$gte": "2022-03-04" , "$lte": "2022-03-07" } }},
{ "$group": {
"_id": {
"nodeId": "$nodeId",
"endpointId": "$endpointId",
"hourZoneNumber": "$hourZoneNumber"
},
"customerCount": { "$sum": "$customerCount" },
"itemQuantity" : { "$sum": "$itemQuantity" },
"amount" : { "$sum": "$amount" }
}
},
{ "$group": {
"_id": {
"nodeId": "$_id.nodeId",
"endpointId": "$_id.endpointId"
},
"hourZones": {
"$addToSet": {
"hourZoneNumber": "$_id.hourZoneNumber",
"customerCount": { "$sum": "$customerCount" },
"itemQuantity" : { "$sum": "$itemQuantity" },
"amount" : { "$sum": "$amount" }
}
}
}
},
{ "$group": {
"_id": "$_id.nodeId",
"endpoints": {
"$addToSet": {
"endpointId": "$_id.endpointId",
"hourZones": "$hourZones"
}
},
"total": {
"$addToSet": {
"customerCount": { "$sum": "$hourZones.customerCount" },
"itemQuantity" : { "$sum": "$hourZones.itemQuantity" },
"amount" : { "$sum": "$hourZones.amount" }
}
}
}
},
{
$project: {
_id: 0,
nodeId: "$_id",
endpoints: 1,
hourZones: 1,
total: 1
}
}
])
Output is as below:
{
nodeId: 'STORE_DEV_2',
endpoints: [
{ endpointId: '998',
hourZones:
[
{ hourZoneNumber: 1,
customerCount: 0,
itemQuantity: 4,
amount: Decimal128("4") }
] } ],
total: [ { customerCount: 0, itemQuantity: 4, amount: Decimal128("4") } ],
}
{
nodeId: 'STORE_DEV_1',
endpoints:
[ { endpointId: '999',
hourZones:
[ { hourZoneNumber: 2,
customerCount: 2,
itemQuantity: 4,
amount: Decimal128("4") },
{ hourZoneNumber: 1,
customerCount: 4,
itemQuantity: 8,
amount: Decimal128("247.56") } ] } ],
total:
[ { customerCount: 6,
itemQuantity: 12,
amount: Decimal128("251.56") } ]
}
I want the output to be sorted as : First sort by nodeId, then by endpointId within the endpoints and lastly by hourZoneNumber within hourZones.
How do I do this ? I tried using sort() with all the three fields. But it did not work. Also, can someone please confirm if there is any better way than the above code, as I am new to Mongo DB.
Edit:
Please find sample input data at https://mongoplayground.net/p/FYm3QMMgrNI
Since you already have the separated data at the beginning, it is simply a matter of saving these values through the grouping and then sorting by them in the end.
Edit: In order to sort each inner array, we use $push instead of $addToSet inside the $group and $sort before each $group:
db.collection.aggregate([
{
"$match": {
"nodeId": {"$in": ["STORE_DEV_TTEC", "STORE_DEV_TTEZ"]
},
"businessDayStartDate": {"$gte": "2022-03-04", "$lte": "2022-03-07"}
}
},
{
"$sort": {"nodeId": 1, "endpointId": 1, "hourZoneNumber": 1}
},
{
"$group": {
"_id": {
"nodeId": "$nodeId",
"endpointId": "$endpointId",
"hourZoneNumber": "$hourZoneNumber"
},
"customerCount": {"$sum": "$customerCount"},
"itemQuantity": {"$sum": "$itemQuantity"},
"amount": {"$sum": "$amount"}
}
},
{"$sort": {"_id.hourZoneNumber": 1}
},
{
"$group": {
"_id": {
"nodeId": "$_id.nodeId",
"endpointId": "$_id.endpointId"
},
"hourZones": {
"$push": {
"hourZoneNumber": "$_id.hourZoneNumber",
"customerCount": {"$sum": "$customerCount"},
"itemQuantity": {"$sum": "$itemQuantity"},
"amount": {"$sum": "$amount"}
}
},
hourZoneKey: {$first: "$_id.hourZoneNumber"}
}
},
{"$sort": {"_id.endpointId": 1}
},
{
"$group": {
"_id": "$_id.nodeId",
"endpoints": {
"$push": {
"endpointId": "$_id.endpointId",
"hourZones": "$hourZones"
}
},
endpointKey: {$first: "$_id.endpointId"},
hourZoneKey: {$first: "$hourZoneKey"}
}
},
{"$sort": {"nodeId": 1, "endpointKey": 1, "hourZoneKey": 1}
},
{
$project: {_id: 0, nodeId: "$_id", endpoints: 1, hourZones: 1, total: 1}
}
])
You can see it here

need to convert the data in another format

We have Data:
[
{
"_id": ObjectId("5f87e152219aaf1f9404ef3f"),
"parameterId": "5f914ca2679bae721d38410b",
"average": 574998.153846154,
"count": 26.0,
"date": ISODate("2020-09-08T18:30:00.000Z"),
"_class": "org.nec.iotplatform.entities.RawData"
},
{
"_id": ObjectId("5f87e1e2219aaf1f9404eff5"),
"parameterId": "5f914ca2679bae721d38410b",
"average": 494217.606225681,
"count": 1285.0,
"date": ISODate("2020-09-09T18:30:00.000Z"),
"_class": "org.nec.iotplatform.entities.RawData"
}
]
I have query which I am executing on above data and then getting the result as below the query
db.collection.aggregate([
{
"$project": {
"year": {
"$year": "$date"
},
"month": {
"$month": "$date"
},
"dayOfMonth": {
"$dayOfMonth": "$date"
},
"average": "$average",
"count": "$count",
"Symbol": 1
}
},
{
"$group": {
"_id": {
year: "$year",
month: "$month",
dayOfMonth: "$dayOfMonth"
},
"data": {
"$push": "$$ROOT"
}
}
},
{
"$project": {
"average": {
"$divide": [
{
"$reduce": {
"input": "$data",
"initialValue": 0,
"in": {
"$add": [
"$$value",
{
"$multiply": [
"$$this.count",
"$$this.average"
]
}
]
}
}
},
{
$reduce: {
input: "$data",
initialValue: 0,
in: {
"$add": [
"$$value",
"$$this.count"
]
}
}
}
]
}
}
}
])
I am getting output :
[{
"_id" : {
"year" : 2020,
"month" : 9,
"dayOfMonth" : 8
},
"average" : 574998.153846154
},
{
"_id" : {
"year" : 2020,
"month" : 9,
"dayOfMonth" : 9
},
"average" : 494217.606225681
}]
But I need to format the result data like this. by adding the date like this:
{
2020-09-08T18:30:00.000Z : 574998.153846154,
2020-09-09T18:30:00.000Z : 494217.606225681
}
Thanks in advance.
You can use $dateFromString to create the date you want.
Also, you need $concat and $toString to parse the numbers to string and concat into a single string.
After that, using $group you can get the all values you need in the same array. And how you want set the date as KEY, is neccesary create fields k and v and parse again to string.
With the values together, using $arrayToObject you can cerate the schema you want date: average and use $replaceRoot to get only the values at top level.
To do this you need to add this query at the end of your aggregation.
{
"$set": {
"date": { "$dateFromString": { "dateString": {
"$concat": [
{ "$toString": "$_id.dayOfMonth" }, "-",
{ "$toString": "$_id.month" }, "-",
{ "$toString": "$_id.year" }
] },
"format": "%d-%m-%Y", "timezone": "Europe/Madrid"
} } }
},
{
"$group": {
"_id": null,
"date": { "$push": { "k": { "$toString": "$date" }, "v": "$average" } }
}
},
{
"$replaceRoot": { "newRoot": { "$arrayToObject": "$date" } }
}
This query add a new field called date like this:
"date": ISODate("2020-09-08T04:00:00Z")
I've used Europe/Madrid as timezone but you can choose you want to get your desired date.
Example here.
The output is:
{
"2020-09-07T22:00:00.000Z": 574998.153846154,
"2020-09-08T22:00:00.000Z": 494217.606225681
}
Using America/New_York as timezone:
{
"2020-09-08T04:00:00.000Z": 574998.153846154,
"2020-09-09T04:00:00.000Z": 494217.606225681
}

MongoDB Nested GroupBy

I have a sample data like below:
(Every floor has multiple sensor data)
{"Floor_Id": "Galileo_001",
"name": "Forklifts",
"Sensor_data": [{
"Floor_Id": "Galileo_001",
"Floor_name": "Forklifts",
"Name": "forkLift_002",
"Asset_Id": 123,
"Load": 1.7096133,
"Timestamp": 1537878750996
},
{
"Floor_Id": "Galileo_001",
"Floor_name": "Forklifts",
"Name": "forkLift_003",
"Asset_Id": 456,
"Load": 1.7096133,
"Timestamp": 1537878750996,
},
{
"Floor_Id": "Galileo_001",
"Floor_name": "Forklifts",
"Name": "forkLift_005 ",
"Asset_Id": 127,
"Load": 1.7096133,
"Timestamp": 1537878750996
},
{
"Floor_Id": "Galileo_001",
"Floor_name": "Forklifts",
"Name": "forkLift_001",
"Asset_Id": 157,
"Load": 1.7096133,
"Timestamp": 1537878750996,
}
]}
For the response, I need total load calculated for every floor and individual load for every day. The desired response is like below:
{
"TotalLoad": 3214,
"Floor_Id": "Galileo_001",
"LoadUnit": "Kgs",
"AssetStatus": [{
"TotalLoad": 200,
"LoadUnit": "Kgs",
"Date": "1539588994"
}, {
"TotalLoad": 400,
"LoadUnit": "Kgs",
"Date": "1539475200"
}, {
"TotalLoad": 100,
"LoadUnit": "Kgs",
"Date": "1539388800"
}]
}
I am writing the below Mongo aggregation:
db.sensordata.aggregate([{"$unwind" : "$Sensor_data" },
{"$group": {
"_id": {"Floor_Id": "$Floor_Id",
"DailyDate":{"$dateFromParts":{
"year":{"$year":{"$add": [new Date("1970-01-01"), "$Sensor_data.Timestamp"]}},
"month":{"$month":{"$add": [new Date("1970-01-01"), "$Sensor_data.Timestamp"]}},
"day":{"$dayOfMonth":{"$add": [new Date("1970-01-01"), "$Sensor_data.Timestamp"]}}
}
}
},
"AssetLoad": {"$sum": "$Sensor_data.Load" }
}
},
{
"$group" : {
"_id": {"Floor_Id": "$_id.Floor_Id"},
"TotalLoad": { "$sum": "$Sensor_data.Load" },
"AssetStatus":{
"$push":{
"TotalLoad": "$AssetLoad",
"LoadUnit": "Kgs",
"Date": "$_id.DailyDate"
}
}
}
}
])
Problem:
For the total load calculated for every floor, I am getting 0.
{
"_id" : {
"Floor_Id" : "Galileo_001"
},
"TotalLoad" : 0,
"AssetStatus" : [
{
"TotalLoad" : 8.5480665,
"LoadUnit" : "Kgs",
"Date" : ISODate("2018-09-25T00:00:00.000Z")
}
]
}
What am I doing wrong here?
How can I get the desired output?
You're getting zero because the second $group pipeline does not recognise the field Sensor_data.Load so it defaults to 0.
Replace the expression
"TotalLoad": { "$sum": "$Sensor_data.Load" },
with the pipeline-changed field AssetLoad
"TotalLoad": { "$sum": "$AssetLoad" },
To calculate the number of sensors each floor has in total, your first pipeline needs to calculate the size of the
sensor data using $size, store that in a new field using $addFields then retain the field in the preceding pipeline stages pipelines by using the $first operators.audit
Amend your pipeline to the following:
db.sensordata.aggregate([
{ "$addFields": {
"TotalSensors": { "$size": "Sensor_data" }
} },
{"$unwind" : "$Sensor_data" },
{ "$group": {
"_id": {
"Floor_Id": "$Floor_Id",
"DailyDate": {
"$dateFromParts": {
"year": { "$year": {
"$add": [new Date("1970-01-01"), "$Sensor_data.Timestamp"]
} },
"month": { "$month": {
"$add": [new Date("1970-01-01"), "$Sensor_data.Timestamp"]
} },
"day": { "$dayOfMonth": {
"$add": [new Date("1970-01-01"), "$Sensor_data.Timestamp"]
} }
}
}
},
"AssetLoad": {"$sum": "$Sensor_data.Load" },
"TotalSensors": { "$first": "$TotalSensors" }
} },
{ "$group" : {
"_id": {"Floor_Id": "$_id.Floor_Id"},
"TotalLoad": { "$sum": "$Sensor_data.Load" },
"AssetStatus":{
"$push": {
"TotalLoad": "$AssetLoad",
"LoadUnit": "Kgs",
"Date": "$_id.DailyDate"
}
},
"TotalSensors": { "$first": "$TotalSensors" }
} }
])

mongo aggregation framework group by quarter/half year/year

I have a database with this schema structure :
{
"name" : "Carl",
"city" : "paris",
"time" : "1-2018",
"notes" : [
"A",
"A",
"B",
"C",
"D"
]
}
And this query using the aggregation framework :
db.getCollection('collection').aggregate(
[{
"$match": {
"$and": [{
"$or": [ {
"time": "1-2018"
}, {
"time": "2-2018"
} ]
}, {
"name": "Carl"
}, {
"city": "paris"
}]
}
}, {
"$unwind": "$notes"
}, {
"$group": {
"_id": {
"notes": "$notes",
"time": "$time"
},
"count": {
"$sum": 1
}
}
}
, {
"$group": {
"_id": "$_id.time",
"count": {
"$sum": 1
}
}
}, {
"$project": {
"_id": 0,
"time": "$_id",
"count": 1
}
}])
It working correcly and i'm getting these results these results :
{
"count" : 4.0,
"time" : "2-2018"
}
{
"count" : 4.0,
"time" : "1-2018"
}
My issue is that i'd like to keep the same match stage and i'd like to group by quarter.
Here the result i'd like to have :
{
"count" : 8.0,
"time" : "1-2018" // here quarter 1
}
Thanks

How to compare and count each value of element with condition in mongoDB pipeline after unwinding?

This is my command I ran in tools->command
{
aggregate : "hashtags",
pipeline:
[
{$unwind:"$time"},
{$match:{"$time":{$gte:NumberInt(1450854385), $lte:NumberInt(1450854385)}}},
{$group:{"_id":"$word","count":{$sum:1}}}
]
}
which gave us this result
Response from server:
{
"result": [
{
"_id": "dear",
"count": NumberInt(1)
},
{
"_id": "ghost",
"count": NumberInt(1)
},
{
"_id": "rat",
"count": NumberInt(1)
},
{
"_id": "police",
"count": NumberInt(1)
},
{
"_id": "bugs",
"count": NumberInt(3)
},
{
"_id": "dog",
"count": NumberInt(2)
},
{
"_id": "batman",
"count": NumberInt(9)
},
{
"_id": "ear",
"count": NumberInt(1)
}
],
"ok": 1
}
The documents are in collection 'hashtags'
The documents inserted are as shown below
1.
{
"_id": ObjectId("567a483bf0058ed6755ab3de"),
"hash_count": NumberInt(1),
"msgids": [
"1583"
],
"time": [
NumberInt(1450854385)
],
"word": "ghost"
}
2.
{
"_id": ObjectId("5679485ff0058ed6755ab3dd"),
"hash_count": NumberInt(1),
"msgids": [
"1563"
],
"time": [
NumberInt(1450788886)
],
"word": "dear"
}
3.
{
"_id": ObjectId("567941aaf0058ed6755ab3dc"),
"hash_count": NumberInt(9),
"msgids": [
"1555",
"1556",
"1557",
"1558",
"1559",
"1561",
"1562",
"1584",
"1585"
],
"time": [
NumberInt(1450787170),
NumberInt(1450787292),
NumberInt(1450787307),
NumberInt(1450787333),
NumberInt(1450787354),
NumberInt(1450787526),
NumberInt(1450787615),
NumberInt(1450855148),
NumberInt(1450855155)
],
"word": "batman"
}
4.
{
"_id": ObjectId("567939cdf0058ed6755ab3d9"),
"hash_count": NumberInt(3),
"msgids": [
"1551",
"1552",
"1586"
],
"time": [
NumberInt(1450785157),
NumberInt(1450785194),
NumberInt(1450856188)
],
"word": "bugs"
}
So I want to count the number of values in the field 'time' which comes in between two limits
such as this
foreach word
{
foreach time
{
if((a<time)&&(time<b))
word[count]++
}
}
but my query is just giving output of the total size of array 'time'.
What is the correct query?
for eg
if lower bound is 1450787615 and upper bound is 1450855155
there are 3 values in 'time'. for word 'batman'
The answer should be
{
"_id": "batman",
"count": NumberInt(3)
},
for batman.Thank you.
Use the following aggregation pipeline:
db.hashtags.aggregate([
{
"$match": {
"time": {
"$gte": 1450787615, "$lte": 1450855155
}
}
},
{ "$unwind": "$time" },
{
"$match": {
"time": {
"$gte": 1450787615, "$lte": 1450855155
}
}
},
{
"$group": {
"_id": "$word",
"count": {
"$sum": 1
}
}
}
])
For the given sample documents, this will yield:
/* 0 */
{
"result" : [
{
"_id" : "batman",
"count" : 3
},
{
"_id" : "dear",
"count" : 1
},
{
"_id" : "ghost",
"count" : 1
}
],
"ok" : 1
}